from functools import partial

import re
import luigi
import yt.wrapper as yt

from lib.luigi import yt_luigi
from rtcconf import config
from utils import mr_utils as mr
from utils import utils
from v2.soup import soup_config
from v2.soup.soup_tables import SoupDumpTable


# filter correct date (%Y-%m-%d), for 2000+ year
DATE_REGEXP = re.compile(r'^20\d{2}(?:-\d{2}){2}$')


def map_yandex_tickets(rec):
    """ Parse tickets and prepare temporary table """
    created_ts = rec.get('service_info').get('created')
    yuid = rec.get('user_yandexuid')
    puid = rec.get('user_puid')
    email = rec.get('service_info').get('userInfo').get('email')
    phone = rec.get('service_info').get('userInfo').get('phone')

    old_dates = filter(None, (
        rec.get('service_info').get('created'),
        rec.get('service_info').get('sessionInfo').get('sessionDate'),
        rec.get('service_info').get('updated')
    ))

    new_dates = set()

    for date in old_dates:
        try:
            new_date = utils.ts_to_date_str(date / 1000)  # originally in milliseconds
            if DATE_REGEXP.match(new_date):
                new_dates.add(new_date)
        except TypeError:
            continue

    id_date = utils.ts_to_date_str(created_ts / 1000)  # originally in milliseconds

    for left_id, left_col in [(puid, config.ID_TYPE_PUID), (yuid, config.ID_TYPE_YUID)]:
        for right_id, right_col in [(email, config.ID_TYPE_EMAIL), (phone, config.ID_TYPE_PHONE)]:
            if left_id and right_id:
                yield {
                    'left_col': left_col, 'left_id': str(left_id),
                    'right_col': right_col, 'right_id': str(right_id),
                    'id_date': id_date, 'dates': list(new_dates)}


def reduce_yandex_tickets(key, recs, soup_indexes, table_indexes):
    """ Group by ids and count ticket occurrences """
    dates = set()
    ids_date = set()
    count = 0
    for record in recs:
        # aggregate uniq dates and count records
        dates.update(record['dates'])
        ids_date.add(record['id_date'])
        count += 1
    # make temporary table record
    yield dict(
        left_col=key['left_col'], left_id=key['left_id'],
        right_col=key['right_col'], right_id=key['right_id'],
        id_date=list(ids_date), dates=list(dates), count=count, )
    # filter single events
    if count >= config.TICKETS_MIN_COUNT:
        # make soup record
        yield SoupDumpTable.make_rec(
            key['left_id'], key['right_id'],
            vars(soup_config)['_'.join((key['left_col'], key['right_col'], 'yatickets'))],
            list(dates), soup_indexes[key['left_col'], key['right_col']], )
        for id_date in ids_date:
            # for each uniq id_date make dict record
            yield {
                key['left_col']: key['left_id'],
                'id_value': key['right_id'],
                'id_type': key['right_col'],
                'id_date': id_date,
                'source_type': config.ID_SOURCE_TYPE_TICKETS,
                'id_count': count,
                '@table_index': table_indexes[key['left_col'], key['right_col']],
            }


class ImportYandexTicketsDump(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            'tickets': '//home/afisha/alet/user_actions/production/'
        }

    def output_folders(self):
        return {
            'yuid_raw': config.GRAPH_YT_DICTS_FOLDER + 'yuid_raw/',
        }

    def __init__(self, *args, **kwargs):
        super(ImportYandexTicketsDump, self).__init__(*args, **kwargs)

        self.matching_types = [
            (config.ID_TYPE_PUID, config.ID_TYPE_PHONE),
            (config.ID_TYPE_PUID, config.ID_TYPE_EMAIL),
            (config.ID_TYPE_YUID, config.ID_TYPE_PHONE),
            (config.ID_TYPE_YUID, config.ID_TYPE_EMAIL)
        ]
        self.puid_email_yatickets = SoupDumpTable(soup_config.puid_email_yatickets, self.date)
        self.puid_phone_yatickets = SoupDumpTable(soup_config.puid_phone_yatickets, self.date)
        self.yuid_email_yatickets = SoupDumpTable(soup_config.yuid_email_yatickets, self.date)
        self.yuid_phone_yatickets = SoupDumpTable(soup_config.yuid_phone_yatickets, self.date)

    def requires(self):
        return [yt_luigi.ExternalInput(self.in_f('tickets') + 'ticket_orders_aletuid')]

    def run(self):
        with yt.Transaction() as tr:
            mr.mkdir(self.out_f('yuid_raw'))

            temporary_table = self.out_f('yuid_raw') + 'temporary_table_tickets'

            table_indexes = dict()
            out_tables = []

            for idx, (left_id, right_id) in enumerate(self.matching_types, start=5):
                table_indexes[(left_id, right_id)] = idx
                out_tables.append(
                    self.out_f('yuid_raw') + left_id + '_with_'
                    + right_id + '_' + config.ID_SOURCE_TYPE_TICKETS)

            soup_tables = [
                self.puid_email_yatickets.create(tr),
                self.puid_phone_yatickets.create(tr),
                self.yuid_email_yatickets.create(tr),
                self.yuid_phone_yatickets.create(tr),
            ]
            soup_indexes = {
                (config.ID_TYPE_PUID, config.ID_TYPE_EMAIL): 1,
                (config.ID_TYPE_PUID, config.ID_TYPE_PHONE): 2,
                (config.ID_TYPE_YUID, config.ID_TYPE_EMAIL): 3,
                (config.ID_TYPE_YUID, config.ID_TYPE_PHONE): 4,
            }

            yt.run_map_reduce(
                map_yandex_tickets,
                partial(reduce_yandex_tickets, soup_indexes=soup_indexes, table_indexes=table_indexes),
                self.in_f('tickets') + 'ticket_orders_aletuid',
                [temporary_table, ] + soup_tables + out_tables,
                reduce_by=['left_col', 'left_id', 'right_col', 'right_id']
            )

            SoupDumpTable.finalize_all([
                self.puid_email_yatickets,
                self.puid_phone_yatickets,
                self.yuid_email_yatickets,
                self.yuid_phone_yatickets
            ], tr)

            mr.drop(temporary_table)

            for left_id, right_id in self.matching_types:
                mr.set_generate_date(
                    self.out_f('yuid_raw') + left_id + '_with_'
                    + right_id + '_' + config.ID_SOURCE_TYPE_TICKETS, self.date)

    def output(self):
        soup_targets = [t.as_target() for t in [
            self.puid_email_yatickets,
            self.puid_phone_yatickets,
            self.yuid_email_yatickets,
            self.yuid_phone_yatickets
        ]]

        return [yt_luigi.YtDateTarget(
            self.out_f('yuid_raw') + left_id + '_with_' + right_id + '_' + config.ID_SOURCE_TYPE_TICKETS, self.date)
            for left_id, right_id in self.matching_types] + soup_targets


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)

    luigi.build([ImportYandexTicketsDump('2016-09-10')], scheduler_port=8083)
