import sys
from functools import partial
import re
import urlparse
import luigi
import urllib
import yt.wrapper as yt

from data_imports.day_aggregate import reduce_device_log_events_day, finalize_device_yuid_day_tables
from lib.luigi import yt_luigi
from rtcconf import config
from utils import mr_utils as mr
from v2.soup import soup_config
from v2.soup.soup_tables import SoupDailyLogTable


def extract_uuid(referer):
    error = ''
    try:
        parsed = urlparse.urlparse(referer)
        uuid = urlparse.parse_qs(parsed.query).get('uuid', '')
    except Exception as e:
        uuid = ''
        error = e
    if isinstance(uuid, list):
        uuid = re.sub("[^a-z0-9]", "", uuid[0])
    return uuid, error


def map_redir(rec):
    yuid = mr.get_field_value('yandexuid', rec['value'])
    http_referer = mr.get_field_value('HTTP_REFERER', rec['value'])
    uuid, extract_error = extract_uuid(http_referer)

    if uuid and len(uuid) == 32:
        if yuid:
            ts = int(mr.get_field_value('unixtime', rec['value']))
            yield {'uuid': uuid,
                   'yuid': yuid,
                   'ts': -ts,
                   'source': config.ID_SOURCE_TYPE_REDIR}
            yield SoupDailyLogTable.make_rec(
                yuid,
                uuid,
                soup_config.yuid_uuid_redir,
                ts=-ts,
                table_index=1
            )
    elif extract_error:
        yield {'http_referer': http_referer,
               'extract_error': str(extract_error),
               '@table_index': 2}
    elif uuid:
        yield {'http_referer': http_referer,
               'extract_error': "Error uuid format, length - " + str(len(uuid)),
               '@table_index': 2}


def import_redir_log(redir_log, dt, workdir, devid_raw_f, soup_log):
    mr.mkdir(workdir)
    mr.mkdir(devid_raw_f)
    soup_log.ensure_dir()
    yt.run_map(
        map_redir,
        redir_log,
        [workdir + 'uuid_yuid_redir_tmp',
         soup_log.create(),
         workdir + 'error_extract_uuids'],
        spec=mr.DATA_SIZE_PER_JOB_2GB_SPEC
    )

    soup_log.prepare_daily_tables_from_log()

    yt.run_sort(workdir + 'uuid_yuid_redir_tmp',
                sort_by=[config.ID_TYPE_UUID, config.ID_TYPE_YUID, 'ts'])

    uuid_yuid_redir = devid_raw_f + 'uuid_yuid_' + config.ID_SOURCE_TYPE_REDIR
    yt.run_reduce(partial(reduce_device_log_events_day, dt=dt,
                          source_type=config.ID_SOURCE_TYPE_REDIR),
                  workdir + 'uuid_yuid_redir_tmp',
                  uuid_yuid_redir,
                  sort_by=[config.ID_TYPE_UUID, config.ID_TYPE_YUID, 'ts'],
                  reduce_by=[config.ID_TYPE_UUID, config.ID_TYPE_YUID])

    finalize_device_yuid_day_tables([uuid_yuid_redir])


class ImportRedirLogsDayTask(yt_luigi.BaseYtTask):
    date = luigi.Parameter()
    run_date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(ImportRedirLogsDayTask, self).__init__(*args, **kwargs)
        self.soup_log = SoupDailyLogTable(soup_config.LOG_SOURCE_REDIR, self.date)

    def input_folders(self):
        return {
            'statbox': config.LOG_FOLDERS['redir'],
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    def output_folders(self):
        return {
            'raw': config.INDEVICE_YT_FOLDER + self.date + '/perfect/devid_raw_day/',
            'logs': config.INDEVICE_YT_FOLDER + self.date + '/perfect/logs/',
        }

    def requires(self):
        return [
            yt_luigi.ExternalInput(self.in_f('statbox').rstrip('/') + '/' + self.date)
        ]

    def run(self):
        mr.mkdir(self.out_f('raw'))
        mr.mkdir(self.out_f('logs'))
        redir_log = self.in_f('statbox') + self.date

        import_redir_log(
            redir_log=redir_log,
            dt=self.date,
            workdir=self.out_f('logs'),
            devid_raw_f=self.out_f('raw'),
            soup_log=self.soup_log
        )

    def output(self):

        if self.date == self.run_date:
            soup_out_tables = self.soup_log.daily_tables_targets()
        else:
            soup_out_tables = []

        out_date_folder = self.out_f('raw')
        return [
            yt_luigi.YtTarget(t) for t in [out_date_folder + 'uuid_yuid_' + config.ID_SOURCE_TYPE_REDIR]
        ] + soup_out_tables


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)

    dt = sys.argv[1]

    import smart_runner

    smart_runner.run_isolated('//crypta/team/rodion/CRYPTAIS-911/', dt, ImportRedirLogsDayTask,
                              ImportRedirLogsDayTask, date=dt, run_date=dt)
