import os
import re
from functools import partial

import luigi
import yt.wrapper as yt

from data_imports.day_aggregate import reduce_device_log_events_day, finalize_device_yuid_day_tables
from lib.luigi.yt_luigi import BaseYtTask, ExternalInput, YtTarget
from rtcconf import config
from utils import mr_utils as mr
from v2.soup import soup_config
from v2.soup.soup_tables import SoupDailyLogTable

additional_gaid_regex = re.compile(r'(idfa-gps-adid|googlead-id|advertising_id)=([a-zA-Z0-9-]+)')


def get_device_ids(rec):
    idfa = rec.get('IDFA')
    if idfa:
        yield 'idfa', idfa

    gaid = rec.get('GoogleAdID')
    if gaid:
        yield 'gaid', gaid

    extpostback = rec.get('ExtPostBack')
    if not idfa and not gaid and extpostback:
        for orig_id_name, gaid in additional_gaid_regex.findall(extpostback):
            yield orig_id_name, gaid


def map_postclick_log(rec):
    try:
        for orig_id_name, devid in get_device_ids(rec):
            ts = int(rec['EventTime'])
            yuid = rec.get('UserID')
            crypta_id = rec.get('CryptaID')
            os = rec.get('DetailedDeviceType', '').lower()

            if yuid:
                if yuid == crypta_id:
                    rec['@table_index'] = 2
                    yield rec
                    return

                yield {
                    'source_type': config.ID_SOURCE_TYPE_POSTCLICK,
                    config.ID_TYPE_DEVID: devid,
                    config.ID_TYPE_YUID: yuid,
                    'orig_id_name': orig_id_name,
                    'match_type': rec.get('MatchType'),
                    'ts': -ts,
                    '@table_index': 0
                }

                if os == 'android':
                    yield SoupDailyLogTable.make_rec(
                        yuid, devid, soup_config.yuid_gaid_postclick,
                        ts=-ts, table_index=1, )
                elif os == 'iOS':
                    yield SoupDailyLogTable.make_rec(
                        yuid, devid, soup_config.yuid_idfa_postclick,
                        ts=-ts, table_index=1, )

    except Exception as e:
        rec['error'] = str(e)
        rec['@table_index'] = 3
        yield rec


class ImportPostclicksDailyTask(BaseYtTask):
    date = luigi.Parameter()
    run_date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(ImportPostclicksDailyTask, self).__init__(*args, **kwargs)
        self.soup_log = SoupDailyLogTable(soup_config.LOG_SOURCE_POSTCLICK, self.date)

    def input_folders(self):
        return {
            'postclick': config.LOG_FOLDERS['postclick']
        }

    def workdir(self):
        return os.path.join(config.INDEVICE_YT_FOLDER, self.date, 'postclick') + '/'

    def output_folders(self):
        return {
            'devid_raw': os.path.join(config.INDEVICE_YT_FOLDER, self.date, 'perfect', 'devid_raw_day') + '/'
        }

    def requires(self):
        return [ExternalInput(os.path.join(self.in_f('postclick'), self.date))]

    def run(self):
        workdir = self.workdir()

        mr.mkdir(workdir)
        mr.mkdir(self.out_f('devid_raw'))
        self.soup_log.ensure_dir()

        postclick_log = os.path.join(self.in_f('postclick'), self.date)
        out_tmp_t = workdir + 'postback_tmp'

        yt.run_map(map_postclick_log,
                   postclick_log,
                   [out_tmp_t,
                    self.soup_log.create(),
                    workdir + 'crypta_id_postclicks',
                    workdir + 'bad_postclicks'
                    ])

        self.soup_log.prepare_daily_tables_from_log()

        yt.run_sort(out_tmp_t, sort_by=[config.ID_TYPE_DEVID, config.ID_TYPE_YUID, 'ts'])

        out_t = self.out_f('devid_raw') + '%s_%s_%s' % (
            config.ID_TYPE_DEVID, config.ID_TYPE_YUID, config.ID_SOURCE_TYPE_POSTCLICK
        )
        yt.run_reduce(partial(reduce_device_log_events_day,
                              dt=self.date,
                              source_type=config.ID_SOURCE_TYPE_POSTCLICK),
                      out_tmp_t,
                      out_t,
                      sort_by=[config.ID_TYPE_DEVID, config.ID_TYPE_YUID, 'ts'],
                      reduce_by=[config.ID_TYPE_DEVID, config.ID_TYPE_YUID])

        finalize_device_yuid_day_tables([out_t])

    def output(self):
        if self.date == self.run_date:
            soup_out_tables = self.soup_log.daily_tables_targets()
        else:
            soup_out_tables = []

        out_t = self.out_f('devid_raw') + '%s_%s_%s' % (
            config.ID_TYPE_DEVID, config.ID_TYPE_YUID, config.ID_SOURCE_TYPE_POSTCLICK
        )
        return [YtTarget(os.path.join(self.out_f('devid_raw'), out_t), allow_empty=True)] + soup_out_tables


if '__main__' == __name__:
    yt.config.set_proxy('hahn.yt.yandex.net')
    yt.config["tabular_data_format"] = yt.YsonFormat(process_table_index=True)

    config.YT_OUTPUT_FOLDER = '//home/crypta/team/artembelov/postclick_test/'
    config.INDEVICE_YT_FOLDER = '//home/crypta/team/artembelov/postclick_test/'

    luigi.build([ImportPostclicksDailyTask('2017-11-15', '2017-11-15')], local_scheduler=True)
