import re
import sys
import urlparse
from functools import partial

import luigi
import yt.wrapper as yt

from data_imports.day_aggregate import reduce_device_log_events_day, finalize_device_yuid_day_tables
from lib.luigi import yt_luigi
from rtcconf import config
from utils import mr_utils as mr
from v2.soup import soup_config
from v2.soup.soup_tables import SoupDailyLogTable

yandexuid_regex = re.compile(r'\byandexuid=(\d+)')


def extract_yuid(body):
    if 'yandexuid=' in body:
        m = yandexuid_regex.search(body)
        if m:
            return m.group(1)
        else:
            return ''
    else:
        return ''


def map_mob_report(rec):
    request = rec.get('request', '')
    parsed = urlparse.urlparse(request)
    if parsed.path == '/mobilesearch/config/searchapp':
        params = urlparse.parse_qs(parsed.query)
        uuid = params.get('uuid', [''])[0]
        if uuid:
            yuid = extract_yuid(rec.get('cookies', ''))
            if yuid:
                try:
                    # TODO: use _logfeller_timestamp once LOGFELLER-723 is closed
                    ts = int(rec.get('_logfeller_timestamp', rec.get('timestamp')))
                    yield {'yuid': yuid, 'uuid': uuid, 'ts': -ts, 'source': config.ID_SOURCE_TYPE_SDK}
                    yield SoupDailyLogTable.make_rec(
                        yuid,
                        uuid,
                        soup_config.yuid_uuid_mobreport,
                        ts=-ts,
                        table_index=2
                    )
                except ValueError:
                    rec['@table_index'] = 1
                    yield rec


class ImportMobReportLogsDayTask(yt_luigi.BaseYtTask):

    date = luigi.Parameter()
    run_date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(ImportMobReportLogsDayTask, self).__init__(*args, **kwargs)
        self.soup_log = SoupDailyLogTable(soup_config.LOG_SOURCE_MOBILE_REPORT, self.date)

    def input_folders(self):
        return {
            'statbox': config.STATBOX_MOB_REPORT_LOG_FOLDER
        }

    def output_folders(self):
        return {
            'raw': config.INDEVICE_YT_FOLDER + self.date + '/perfect/devid_raw_day/',
            'logs': config.INDEVICE_YT_FOLDER + self.date + '/perfect/logs/',
        }

    def requires(self):
        return [yt_luigi.ExternalInput(self.in_f('statbox') + self.date)]

    def run(self):

        mr.mkdir(self.out_f('raw'))
        mr.mkdir(self.out_f('logs'))
        self.soup_log.ensure_dir()

        soup_tmp_table = self.soup_log.create()

        yt.run_map(
            map_mob_report, 
            self.in_f('statbox') + self.date,
            [
                self.out_f('logs') + 'uuid_yuid_mob_report',
                self.out_f('logs') + 'uuid_yuid_mob_report_bad',
                soup_tmp_table
            ]
        )

        self.soup_log.prepare_daily_tables_from_log()

        yt.run_sort(self.out_f('logs') + 'uuid_yuid_mob_report',
                    sort_by=[config.ID_TYPE_UUID, config.ID_TYPE_YUID, 'ts'])
        out_table = self.out_f('raw') + 'uuid_yuid_' + config.ID_SOURCE_TYPE_MOB_REPORT
        yt.run_reduce(partial(reduce_device_log_events_day, dt=self.date,
                              source_type=config.ID_SOURCE_TYPE_MOB_REPORT),
                      self.out_f('logs') + 'uuid_yuid_mob_report',
                      out_table,
                      sort_by=[config.ID_TYPE_UUID, config.ID_TYPE_YUID, 'ts'],
                      reduce_by=[config.ID_TYPE_UUID, config.ID_TYPE_YUID])

        finalize_device_yuid_day_tables([out_table])

    def output(self):
        if self.date == self.run_date:
            soup_daily_targets = self.soup_log.daily_tables_targets()
        else:
            soup_daily_targets = []

        return [yt_luigi.YtTarget(t)
                for t in [self.out_f('raw') + 'uuid_yuid_' + config.ID_SOURCE_TYPE_MOB_REPORT]] + soup_daily_targets


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)

    dt = sys.argv[1]

    import smart_runner
    smart_runner.run_isolated('//home/crypta/team/rodion/CRYPTAIS-1073-pp/', dt, ImportMobReportLogsDayTask,
                              ImportMobReportLogsDayTask, date=dt, run_date=dt)
