import urllib
from functools import partial
from urlparse import urlparse

import luigi
import yt.wrapper as yt

from data_imports.day_aggregate import reduce_yuid_log_events_day, reduce_device_log_events_day, \
    reduce_yuid_log_events_days, finalize_yuid_with_x_day_tables, finalize_device_yuid_day_tables
from lib.luigi import base_luigi_task
from lib.luigi import yt_luigi
from rtcconf import config
from utils import mr_utils as mr
from utils import utils
from v2.soup import soup_config
from v2.soup.soup_tables import SoupDailyLogTable

OOM_LIMIT = 100


def map_bar_log(rec, ajax_parsers):
    yuid = rec.get('yandexuid')
    ts_str = rec.get('unixtime')

    if yuid and yuid != '-' and ts_str:
        ts = int(ts_str)
        http_params = rec.get('http_params', '')
        orig_ui = mr.get_field_value('ui', http_params, '&')
        r1 = mr.get_field_value('r1', http_params, '&') or ''
        uuid = mr.get_field_value('uuid', http_params, '&')
        yasoft = mr.get_field_value('yasoft', http_params, '&')
        decoded_ajax = mr.get_field_value('decoded_ajax', http_params, '&')

        # parse ajax requets
        try:
            for id_value, id_type in parse_ajax(decoded_ajax, ajax_parsers):
                yield {'id_value': id_value,
                       'id_type': id_type,
                       'yuid': yuid,
                       '@table_index': 2}
                if id_type == config.ID_TYPE_VKCOM:
                    edge_type = soup_config.yuid_vkcom_barnavig
                elif id_type == config.ID_TYPE_OKRU:
                    edge_type = soup_config.yuid_okru_barnavig
                elif id_type == config.ID_TYPE_EMAIL:
                    edge_type = soup_config.yuid_mailru_barnavig
                else:
                    raise Exception('Not implemented yet')
                yield SoupDailyLogTable.make_rec(
                    yuid,
                    id_value,
                    edge_type,
                    ts=-ts,
                    table_index=6
                )
        except ValueError:
            return

        if r1:
            yield {'id_value': r1,
                   'id_type': config.ID_TYPE_BAR_R1, 'source': config.ID_SOURCE_TYPE_BARLOG,
                   'yuid': yuid, 'ts': ts, 'yasoft': yasoft, '@table_index': 1}
            yield SoupDailyLogTable.make_rec(
                yuid,
                r1,
                soup_config.yuid_r1_barnavig,
                ts=-ts,
                table_index=6
            )

        if orig_ui and uuid:
            rec['@table_index'] = 4
            yield rec
        elif orig_ui:  # desktop browser
            ui = parse_uuid(orig_ui)
            if ui:
                yield {'id_value': ui, 'orig_id': orig_ui,
                       'id_type': config.ID_TYPE_BAR_UI, 'source': config.ID_SOURCE_TYPE_BARLOG,
                       'yuid': yuid, 'ts': ts, 'yasoft': yasoft, '@table_index': 0}
                yield SoupDailyLogTable.make_rec(
                    yuid,
                    ui,
                    soup_config.yuid_ui_barnavig,
                    ts=-ts,
                    table_index=6
                )
            else:  # to track parsing errors
                yield {'orig_ui': orig_ui, 'yuid': yuid, '@table_index': 5}
        elif uuid:
            yield {'uuid': uuid, 'yuid': yuid, 'ts': -ts, 'source_type': config.ID_SOURCE_TYPE_YABROWSER_ANDROID,
                   '@table_index': 3}
            yield SoupDailyLogTable.make_rec(
                yuid,
                uuid,
                soup_config.yuid_uuid_barnavig,
                ts=-ts,
                table_index=6
            )


def parse_uuid(ui):
    if len(ui) < 32:
        return ''

    if '%12' in ui:  # some frequent specific case
        return ''

    if ui[0:3] == '%7B':  # has encoded brace
        ui = ui[3:-3]

    if ui[0:1] == '{':
        ui = ui[1:-1]

    if len(ui) == 32:  # format without dashes
        ui = ui[0:8] + '-' + ui[8:12] + '-' + ui[12:16] + '-' + ui[16:20] + '-' + ui[20:32]

    if len(ui) == 36:
        return ui.upper()
    else:
        return ''


def parse_mailru_social_ajax(ajax, resource, network='vk.com', id_prefix='vk'):
    id_name = id_prefix + '_id'
    netloc = urlparse(resource).netloc
    if netloc == network:
        ajax_query = urlparse(ajax)
        id_param = ([qparam
                     for qparam in ajax_query.query.split('&')
                     if qparam.startswith(id_name + '=')] or [None])[0]
        if ajax_query.netloc == 'r3.mail.ru' and id_param:
            id_value = id_param.split('=')[1]
            return [id_value]
    return []


def parse_mailru_email_ajax(ajax, resource):
    source = urlparse(resource).netloc
    if source.endswith('.mail.ru') or source == 'mail.ru':
        ajax_query = urlparse(ajax)
        user_param = ([qparam
                       for qparam in urllib.unquote(ajax_query.query).split('&')
                       if qparam.startswith('user=')] or [None])[0]
        if ajax_query.netloc == 'filin.mail.ru' and user_param:
            id_value = user_param.split('=')[1]
            return [id_value]
    return []


def parse_ajax(decoded_ajax, parsers):
    ajax_series = urllib.unquote(decoded_ajax)
    for site_ajax in ajax_series.strip('[]').split('],['):  # WARNING [[]]
        netloc = urlparse(site_ajax.split(',')[0].strip('"')).netloc
        for id_type, (social_resource, parser) in parsers.iteritems():
            if netloc.endswith(social_resource):
                resource = site_ajax.split(',')[0].strip('"')
                for ajax_request in site_ajax.split(',')[1:]:
                    for id_value in parser(ajax_request.strip('"'), resource):
                        if bool(id_value):
                            yield (id_value, id_type)


def run_import(dt, statbox_source, workdir, yuid_raw_folder, devid_raw_folder, soup_log):
    mr.mkdir(workdir)
    mr.mkdir(yuid_raw_folder)
    mr.mkdir(devid_raw_folder)
    soup_log.ensure_dir()

    """
    Imports UI(for desktop) or uuid(for mobile) from bar-navig-log for a single date.
    UI is unique identifier of desktop Yandex.Browser installation (previously it was used as Yandex.Bar identifier)
    deviceid is android device id - identifier of a device.
    It helps to track YB requests across sessions an make in-device matching.
    """

    yt.run_map(partial(map_bar_log,
                       ajax_parsers={
                           config.ID_TYPE_VKCOM: ('vk.com',
                                                  partial(parse_mailru_social_ajax,
                                                          network='vk.com',
                                                          id_prefix='vk')),
                           config.ID_TYPE_OKRU: ('ok.ru',
                                                  partial(parse_mailru_social_ajax,
                                                          network='ok.ru',
                                                          id_prefix='ok')),
                           config.ID_TYPE_EMAIL: ('mail.ru', parse_mailru_email_ajax),
                       }),
               statbox_source,
               [workdir + 'ui_yuid_desktop_yasoft',
                workdir + 'r1_yuid_desktop_yasoft',
                workdir + 'id_yuid_yabrowser_ajax',
                workdir + 'uuid_yuid_' + config.ID_SOURCE_TYPE_YABROWSER_ANDROID,
                workdir + 'desktop_mobile_ambiguity',
                workdir + 'ui_parsing_err',
                soup_log.create(),
                ],
               spec=mr.DATA_SIZE_PER_JOB_2GB_SPEC)

    soup_log.prepare_daily_tables_from_log()

    # prepare day data
    utils.wait_all([
        yt.run_sort(workdir + 'ui_yuid_desktop_yasoft',
                    sort_by=config.ID_TYPE_YUID, sync=False),
        yt.run_sort(workdir + 'r1_yuid_desktop_yasoft',
                    sort_by=config.ID_TYPE_YUID, sync=False),
        yt.run_sort(workdir + 'id_yuid_yabrowser_ajax',
                    sort_by=[config.ID_TYPE_YUID, 'id_type'], sync=False),
        yt.run_sort(workdir + 'uuid_yuid_' + config.ID_SOURCE_TYPE_YABROWSER_ANDROID,
                 sort_by=[config.ID_TYPE_UUID, config.ID_TYPE_YUID, 'ts'], sync=False),
        yt.run_sort(workdir + 'ui_parsing_err', sort_by=['orig_ui'])
    ])

    yuid_with_ui = yuid_raw_folder + 'yuid_with_' + config.ID_TYPE_BAR_UI
    yuid_with_r1 = yuid_raw_folder + 'yuid_with_' + config.ID_TYPE_BAR_R1
    yuid_with_vk = yuid_raw_folder + 'yuid_with_' + config.ID_TYPE_VKCOM + '_' + config.ID_SOURCE_TYPE_BARLOG
    yuid_with_ok = yuid_raw_folder + 'yuid_with_' + config.ID_TYPE_OKRU + '_' + config.ID_SOURCE_TYPE_BARLOG
    yuid_with_email = yuid_raw_folder + 'yuid_with_' + config.ID_TYPE_EMAIL + '_' + config.ID_SOURCE_TYPE_BARLOG
    uuid_yuid_yabro_android = devid_raw_folder + 'uuid_yuid_' + config.ID_SOURCE_TYPE_YABROWSER_ANDROID

    utils.wait_all([
        yt.run_reduce(partial(reduce_yuid_log_events_day, dt=dt,
                              id_type=config.ID_TYPE_BAR_UI,
                              source_type=config.ID_SOURCE_TYPE_BARLOG),
                      workdir + 'ui_yuid_desktop_yasoft',
                      yuid_with_ui,
                      reduce_by='yuid', sync=False),

        yt.run_reduce(partial(reduce_yuid_log_events_day, dt=dt,
                              id_type=config.ID_TYPE_BAR_R1,
                              source_type=config.ID_SOURCE_TYPE_BARLOG),
                      workdir + 'r1_yuid_desktop_yasoft',
                      yuid_with_r1,
                      reduce_by='yuid', sync=False),

        yt.run_reduce(partial(reduce_yuid_log_events_days, dt=dt,
                              source_type=config.ID_SOURCE_TYPE_BARLOG,
                              id_type_to_idx={config.ID_TYPE_VKCOM: 0,
                                              config.ID_TYPE_OKRU: 1,
                                              config.ID_TYPE_EMAIL: 2}),
                      workdir + 'id_yuid_yabrowser_ajax',
                      [yuid_with_vk,
                       yuid_with_ok,
                       yuid_with_email],
                      reduce_by=['yuid', 'id_type'], sync=False),

        yt.run_reduce(partial(reduce_device_log_events_day,
                              dt=dt,
                              source_type=config.ID_SOURCE_TYPE_YABROWSER_ANDROID),
                      workdir + 'uuid_yuid_' + config.ID_SOURCE_TYPE_YABROWSER_ANDROID,
                      uuid_yuid_yabro_android,
                      sort_by=[config.ID_TYPE_UUID, config.ID_TYPE_YUID, 'ts'],
                      reduce_by=[config.ID_TYPE_UUID, config.ID_TYPE_YUID], sync=False),
    ])

    finalize_yuid_with_x_day_tables([
        yuid_with_ui,
        yuid_with_r1,
        yuid_with_vk,
        yuid_with_ok,
        yuid_with_email,
    ])

    finalize_device_yuid_day_tables([
        uuid_yuid_yabro_android
    ])

    mr.drop(workdir + 'ui_yuid_desktop_yasoft')
    mr.drop(workdir + 'r1_yuid_desktop_yasoft')


class ImportBarNavigDayTask(base_luigi_task.BaseTask):
    date = luigi.Parameter()
    run_date = luigi.Parameter()

    resources = {'import_bar_navig_lock': 1}

    def __init__(self, *args, **kwargs):
        super(ImportBarNavigDayTask, self).__init__(*args, **kwargs)
        self.soup_log = SoupDailyLogTable(soup_config.LOG_SOURCE_BAR_NAVIG_LOG, self.date)

    def requires(self):
        return yt_luigi.ExternalInput(config.STATBOX_BAR_LOG_FOLDER + self.date)

    def run(self):
        graph_date_folder = config.YT_OUTPUT_FOLDER + self.date + '/'
        device_date_folder = config.INDEVICE_YT_FOLDER + self.date + '/'

        run_import(self.date, config.STATBOX_BAR_LOG_FOLDER + self.date,
                   graph_date_folder + 'barnavig/',
                   graph_date_folder + 'yuid_raw/',
                   device_date_folder + 'perfect/devid_raw_day/',
                   self.soup_log)

    def output(self):
        if self.date == self.run_date:
            soup_out_tables = self.soup_log.daily_tables_targets()
        else:
            soup_out_tables = []

        return [
            yt_luigi.YtTarget(config.YT_OUTPUT_FOLDER + self.date +
                              '/yuid_raw/yuid_with_' + config.ID_TYPE_BAR_UI),
            yt_luigi.YtTarget(config.YT_OUTPUT_FOLDER + self.date +
                              '/yuid_raw/yuid_with_' + config.ID_TYPE_BAR_R1),
            yt_luigi.YtTarget(config.INDEVICE_YT_FOLDER + self.date +
                              '/perfect/devid_raw_day/uuid_yuid_' + config.ID_SOURCE_TYPE_YABROWSER_ANDROID),
            yt_luigi.YtTarget(config.YT_OUTPUT_FOLDER + self.date +
                              '/yuid_raw/yuid_with_' + config.ID_TYPE_VKCOM +
                              '_' + config.ID_SOURCE_TYPE_BARLOG),
            yt_luigi.YtTarget(config.YT_OUTPUT_FOLDER + self.date +
                              '/yuid_raw/yuid_with_' + config.ID_TYPE_OKRU +
                              '_' + config.ID_SOURCE_TYPE_BARLOG),
            yt_luigi.YtTarget(config.YT_OUTPUT_FOLDER + self.date +
                              '/yuid_raw/yuid_with_' + config.ID_TYPE_EMAIL +
                              '_' + config.ID_SOURCE_TYPE_BARLOG),
        ] + soup_out_tables


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)

    workdir = '//home/crypta/team/artembelov/barlog/'
    mr.mkdir(workdir)

    yt.run_map(partial(map_bar_log,
                       ajax_parsers={
                           config.ID_TYPE_VKCOM: ('vk.com',
                                                  partial(parse_mailru_social_ajax,
                                                          network='vk.com',
                                                          id_prefix='vk')),
                           config.ID_TYPE_OKRU: ('ok.ru',
                                                 partial(parse_mailru_social_ajax,
                                                         network='ok.ru',
                                                         id_prefix='ok')),
                           config.ID_TYPE_EMAIL: ('mail.ru', parse_mailru_email_ajax),
                       }),
               '//statbox/bar-navig-log/2016-10-27',
               [workdir + 'ui_yuid_desktop_yasoft',
                workdir + 'r1_yuid_desktop_yasoft',
                workdir + 'id_yuid_yabrowser_ajax',
                workdir + 'uuid_yuid_' + config.ID_SOURCE_TYPE_YABROWSER_ANDROID,
                workdir + 'desktop_mobile_ambiguity',
                workdir + 'ui_parsing_err',
                ],
               spec=mr.DATA_SIZE_PER_JOB_2GB_SPEC)

