# -*- coding: utf-8 -*-
from __future__ import print_function, absolute_import, division

import re
import ast
import urlparse
import logging

from nile.api.v1 import Record
from qb2.api.v1 import (
    extractors as qe,
    filters as qf
)

from ..utils.tools import (
    safe_dict_get,
    safe_list_get,
    parse_key_value_string
)
from ..utils.json import safe_decode_json


class KeyBuilder(object):
    KEY_SIZE = 22
    SHOWID_SIZE = 8

    @classmethod
    def from_reqid(cls, reqid):
        if reqid is not None:
            key = ''.join(symbol for symbol in reqid if symbol.isdigit())
            key = key[:cls.KEY_SIZE]
            if len(key) == cls.KEY_SIZE and key.isdigit():
                return key
        return None

    @classmethod
    def from_showid(cls, showid):
        if showid is not None:
            key = showid[cls.SHOWID_SIZE:][:cls.KEY_SIZE]
            if len(key) == cls.KEY_SIZE and key.isdigit():
                return key
        return None


class RedirMapper(object):
    def __init__(self, config):
        super(RedirMapper, self).__init__()
        self.config = config
        self.serp_regex = re.compile(self.config['serp_regex'])

    def __call__(self, records):
        for record in records:
            context = record.to_dict()
            self.__add_actual_referer(context)
            self.__add_from_actual_referer(context)
            self.__add_distr_obj(context)
            self.__add_reqid(context)
            self.__add_showid(context)
            self.__add_from_parsed_vars(context)
            self.__add_lr(context)
            self.__add_key(context)
            self.__add_origin(context)
            yield Record(
                key=context['key'],
                unixtime=context['timestamp'],
                distr_obj=context['distr_obj'],
                eventtype=context['eventtype'],
                lr=context['lr'],
                bannerid=context['bannerid'],
                score=context['score'],
                showid=context['showid'],
                device=context['device'],
                referer=context['actual_referer'],  # actual_referer
                product=context['product'],
                yandexuid=context['yandexuid'],
                reqid=context['reqid'],
                adata=context['adata'],
                type=context['type'],
                origin=context['origin']
            )

    def __add_actual_referer(self, context):
        context['actual_referer'] = (
            context['url']
            if context['url'] and self.serp_regex.match(context['url']) else
            context['referer']
        )

    def __add_from_actual_referer(self, context):
        try:
            parsed = urlparse.urlparse(context['actual_referer'])
            parameters = urlparse.parse_qs(parsed.query)
        except Exception:
            parameters = None
        context['ref_host'] = safe_list_get(safe_dict_get(parameters, 'host'), 0)
        context['ref_lr'] = safe_list_get(safe_dict_get(parameters, 'lr'), 0)
        context['ref_reqid'] = safe_list_get(safe_dict_get(parameters, 'reqid'), 0)
        context['ref_banerid'] = safe_list_get(safe_dict_get(parameters, 'banerid'), 0)

    def __add_distr_obj(self, context):
        distr_obj = None
        if context['path']:
            distr_obj = context['path'][len(self.config['atom_path_prefix']):]
        elif context['actual_referer']:
            distr_obj = context['ref_host']
        if safe_dict_get(context['parsed_vars'], '-collectpoolmode') == '1' and distr_obj is not None:
            distr_obj += '.training'
        context['distr_obj'] = distr_obj

    def __add_reqid(self, context):
        parsed_vars = context['parsed_vars']
        context['reqid'] = safe_dict_get(parsed_vars, '-reqid') or context['ref_reqid']

    def __add_showid(self, context):
        context['showid'] = safe_dict_get(context['parsed_vars'], '-showid') or context['ref_banerid']

    def __add_from_parsed_vars(self, context):
        for key in ['eventtype', 'bannerid', 'score', 'device', 'product', 'adata', 'type']:
            context[key] = safe_dict_get(context['parsed_vars'], '-{}'.format(key))

    def __add_lr(self, context):
        context['lr'] = safe_dict_get(context['parsed_vars'], '-lr') or context['ref_lr']

    def __add_key(self, context):
        context['key'] = KeyBuilder.from_reqid(context['reqid'])

    def __add_origin(self, context):
        context['origin'] = 'redir_log'


class MetrikaMobileMapper(object):
    def __init__(self, config):
        super(MetrikaMobileMapper, self).__init__()
        self.config = config

    def __call__(self, records):
        for record in records:
            context = record.to_dict()
            self.__add_from_campaign_id(context)
            self.__add_device_id(context)
            self.__add_distr_obj(context)
            self.__add_eventtype(context)
            self.__add_testids(context)
            self.__add_service(context)
            self.__add_key(context)
            self.__add_origin(context)
            yield Record(
                key=context['key'],
                unixtime=context['unixtime'],
                distr_obj=context['distr_obj'],
                eventtype=context['eventtype'],
                bannerid=context['bannerid'],
                lr=context['lr'],
                reqid=context['reqid'],
                showid=context['showid'],
                score=context['score'],
                device=context['device'],
                device_id=context['device_id'],
                uuid=context['uuid'],
                adata=context['adata'],
                service=context['service'],
                os=context['app_platform'],  # app_platform
                product=context['product'],
                origin=context['origin'],
                testids=context['testids']
            )

    def __add_from_campaign_id(self, context):
        campaign_id = context['campaign_id'][len(self.config['atom_vars_trait']):]
        parsed_vars = parse_key_value_string(campaign_id, delimeter='=', separator=';')
        context['parsed_vars'] = parsed_vars
        for key in ['reqid', 'bannerid', 'showid', 'score', 'device', 'product', 'lr', 'adata']:
            context[key] = safe_dict_get(parsed_vars, key)

    def __add_device_id(self, context):
        if context['device_id'] is not None:
            context['device_id'] = context['device_id'].upper()

    def __add_distr_obj(self, context):
        campaign_id = context['campaign_id']
        parsed_vars = parse_key_value_string(campaign_id, delimeter='=', separator=';')
        if safe_dict_get(parsed_vars, 'VARS_host'):
            context['distr_obj'] = safe_dict_get(parsed_vars, 'VARS_host')
        else:
            context['distr_obj'] = None
        if safe_dict_get(parsed_vars, 'collectpoolmode'):
            context['distr_obj'] += '.training'

    def __add_eventtype(self, context):
        context['eventtype'] = (
            'show'
            if context['event_name'] == 'display' else
            context['reaction']
        )

    def __add_testids(self, context):
        testids = []
        extras = safe_dict_get(context['event_value'], 'extras')
        if extras is not None and 'geo_test_buckets' in extras:
            testids = [
                testid_info.split(',')[0]
                for testid_info in extras['geo_test_buckets'].split(';')
                if testid_info
            ]
        context['testids'] = testids

    def __add_service(self, context):
        service_by_api_key = self.config['namings']['api_keys'].get(context['api_key'])
        service_by_app_id = self.config['namings']['app_ids'].get(context['app_id'])
        context['service'] = self.config['namings']['aliases'].get(service_by_api_key or service_by_app_id)

    def __add_key(self, context):
        context['key'] = KeyBuilder.from_reqid(context['reqid'])

    def __add_origin(self, context):
        context['origin'] = 'metrika_mobile_log'


class MobileTrackingMapper(object):
    def __init__(self, config):
        super(MobileTrackingMapper, self).__init__()
        self.config = config

    def __call__(self, records):
        for record in records:
            context = record.to_dict()
            self.__add_parameters(context)
            self.__add_showid(context)
            self.__add_reqid(context)
            self.__add_key(context)
            self.__add_eventtype(context)
            self.__add_origin(context)
            yield Record(
                key=context['key'],
                unixtime=context['unixtime'],
                eventtype=context['eventtype'],
                origin=context['origin'],
                showid=context['showid'],
                reqid=context['reqid']
            )

    def __add_parameters(self, context):
        try:
            keys = ast.literal_eval(context['raw_url_keys'])
            values = ast.literal_eval(context['raw_url_values'])
            parameters = dict(zip(keys, values))
        except Exception:
            parameters = None
        context['parameters'] = parameters

    def __add_showid(self, context):
        showids = [
            safe_dict_get(context['parameters'], name)
            for name in self.config['showid_params']
        ]
        context['showid'] = reduce(
            lambda lhs, rhs: lhs or rhs,
            showids
        )

    def __add_reqid(self, context):
        reqids = [
            safe_dict_get(context['parameters'], name)
            for name in self.config['reqid_params']
        ]
        context['reqid'] = reduce(
            lambda lhs, rhs: lhs or rhs,
            reqids
        )

    def __add_key(self, context):
        key_by_reqid = KeyBuilder.from_reqid(context['reqid'])
        key_by_showid = KeyBuilder.from_showid(context['showid'])
        context['key'] = key_by_reqid or key_by_showid

    def __add_eventtype(self, context):
        context['eventtype'] = 'trueinstall'

    def __add_origin(self, context):
        context['origin'] = 'mobile_tracking_log'


class AtomCubePastReducer(object):
    def __init__(self, config):
        super(AtomCubePastReducer, self).__init__()
        self.config = config

    def __call__(self, groups):
        for key, records in groups:
            for context in self.__group_filterer(key, records):
                yield Record(**context)

    def __group_filterer(self, key, records):
        contexts = [record.to_dict() for record in records]
        if all([context['eventtype'] != 'trueinstall' for context in contexts]):
            events_white_list = set(['show', 'click'])
            for context in contexts:
                if context['eventtype'] in events_white_list:
                    yield context


class AtomDistributionProvider(object):
    def __init__(self, config):
        super(AtomDistributionProvider, self).__init__()
        self.config = config
        self.logger = logging.getLogger(__name__)

    def get(self, name, source):
        self.logger.info('Extracting data for source {}'.format(name))
        extractors = {
            'redir_log': self.__get_redir_log_extract,
            'metrika_mobile_log': self.__get_metrika_mobile_log_extract,
            'export_access_log': self.__get_export_access_log_extract,
            'mobile_tracking_log': self.__get_mobile_tracking_log_extract,
            'bs_watch_log': self.__get_bs_watch_log_extract,
            'id_to_generalized_id': self.__get_id_to_generalized_id_extract,
            'atom_cube_past': self.__get_atom_cube_past_extract
        }
        extractor = extractors[name]
        return extractor(source, self.config[name])

    def __get_redir_log_extract(self, source, config):
        return source.qb2(
            log='redir-log',
            fields=[
                'yandexuid',
                'timestamp',
                'referer',
                'url',
                'path',
                'parsed_vars'
            ],
            filters=[
                qf.default_filtering('redir-log'),
                qf.defined('yandexuid', 'timestamp', 'parsed_vars'),
                qf.contains('vars', config['atom_vars_trait']),
                qf.startswith('path', config['atom_path_prefix']),
            ],
            mode='yamr_lines',
            intensity='large_data'
        ).map(
            RedirMapper(config),
            intensity='large_data'
        )

    def __get_metrika_mobile_log_extract(self, source, config):
        def reaction_extractor(raw_reaction):
            return (
                list(raw_reaction.keys())[0]
                if isinstance(raw_reaction, dict) else
                raw_reaction
            )
        return source.qb2(
            log='metrika-mobile-log',
            fields=[
                'event_name',
                'app_platform',
                'uuid',
                'device_id',
                'api_key',
                qe.custom('event_value', safe_decode_json, 'raw_event_value'),
                qe.dictitem('reaction', from_='event_value').rename('raw_reaction').hide(),
                qe.custom('reaction', reaction_extractor, 'raw_reaction'),
                qe.dictitem('campaign_id', 'event_value'),
                qe.integer_dictitem('SendTimestamp', 'parsed_log_line').rename('unixtime'),
                qe.dictitem('AppID', 'parsed_log_line').rename('app_id'),
            ],
            filters=[
                qf.default_filtering('metrika-mobile-log'),
                qf.defined('event_value', 'campaign_id', 'unixtime'),
                qf.or_(
                    qf.equals('event_name', 'display'),
                    qf.and_(
                        qf.equals('event_name', 'report'),
                        qf.one_of('reaction', {'click', 'close'})
                    )
                ),
                qf.startswith('campaign_id', config['atom_vars_trait']),
                qf.contains('raw_event_value', config['atom_event_value_trait']),
                qf.contains('raw_event_value', 'campaign_id')
            ],
            intensity='large_data'
        ).map(
            MetrikaMobileMapper(config),
            intensity='large_data'
        )

    def __get_export_access_log_extract(self, source, config):
        return source.qb2(
            log='export-access-log',
            fields=[
                'parsed_parameters',
                'timestamp',
                'banner_id',
                qe.dictitem('stat', from_='parsed_parameters').hide(),
                qe.item('stat_item', index=0, from_='stat').hide()
            ],
            filters=[
                qf.default_filtering('export-access-log'),
                qf.defined('stat_item', 'banner_id', 'timestamp'),
                qf.one_of('stat_item', set(config['stat_white_list']))
            ],
            intensity='large_data'
        ).qb2(
            log='generic-yson-log',
            fields=[
                qe.custom('key', KeyBuilder.from_showid, 'showid'),
                qe.log_field('timestamp').rename('unixtime'),
                qe.log_field('banner_id').hide(),
                qe.const('eventtype', 'trueinstall'),
                qe.custom('showid', lambda banner_id: banner_id.split(':')[0], 'banner_id'),
                qe.const('origin', 'export_access_log')
            ],
            filters=[
                qf.defined('key')
            ],
            intensity='large_data'
        )

    def __get_mobile_tracking_log_extract(self, source, config):
        return source.qb2(
            log='mobile-tracking-log',
            fields=[
                qe.dictitem('SendTimestamp', 'parsed_log_line').rename('unixtime'),
                qe.dictitem('EventType', 'parsed_log_line').rename('event_type').hide(),
                qe.dictitem('UrlParameters_Keys', 'parsed_log_line').rename('raw_url_keys'),
                qe.dictitem('UrlParameters_Values', 'parsed_log_line').rename('raw_url_values')
            ],
            filters=[
                qf.defined('raw_url_keys', 'raw_url_values', 'unixtime', 'event_type'),
                qf.one_of('event_type', set(config['events_white_list'])),
            ],
            intensity='large_data'
        ).map(
            MobileTrackingMapper(config)
        )

    def __get_bs_watch_log_extract(self, source, config):
        return source.qb2(
            log='bs-watch-log',
            fields=[
                'url',
                'yandexuid',
                'timestamp',
            ],
            filters=[
                qf.default_filtering('bs-watch-log'),
                qf.defined('url', 'timestamp', 'yandexuid', 'raw_clid_vid'),
                qf.contains('canonized_vhost', 'yandex'),
                qf.one_of('raw_clid_vid', set(config['white_list_apple_clids'])),
                qf.or_(*[
                    qf.contains('page', pattern)
                    for pattern in config['serp_page_trait']
                ])
            ],
            intensity='large_data'
        ).qb2(
            log='generic-yson-log',
            fields=[
                qe.log_field('timestamp').rename('unixtime'),
                qe.log_field('url').rename('referer'),
                qe.log_field('yandexuid'),
                qe.const('eventtype', 'trueinstall'),
                qe.const('product', 'default_search_mobilesafari_ios'),
                qe.const('origin', 'bs_watch_log')
            ],
            intensity='large_data'
        )

    def __get_id_to_generalized_id_extract(self, source, config):
        id_regex = re.compile(config['id_regex'])
        return source.qb2(
            log='generic-yson-log',
            fields=[
                qe.log_field('key').hide(),
                qe.log_field('value').hide(),
                qe.custom('yandexuid', lambda key: id_regex.sub('', key).lower(), 'key'),
                qe.custom('ids', safe_decode_json, 'value').hide(),
                qe.dictitem('d', 'ids').rename('device_ids')
            ],
            filters=[
                qf.defined('yandexuid', 'ids', 'device_ids'),
                qf.nonzero('yandexuid', 'device_ids'),
            ],
            intensity='data'
        ).qb2(
            log='generic-yson-log',
            fields=[
                qe.log_field('yandexuid'),
                qe.log_field('device_ids').hide(),
                qe.custom('device_id', lambda raw_device_id: raw_device_id.upper()),
                qe.unfold('raw_device_id', sequence='device_ids').hide(),
                qe.const('origin', 'id_to_generalized_id')
            ],
            filters=[
                qf.nonzero('device_id')
            ],
            intensity='data'
        )

    def __get_atom_cube_past_extract(self, source, config):
        return source.qb2(
            log='generic-yson-log',
            fields=[
                qe.all(),
                qe.log_field('past').hide(),
                qe.log_field('eventtype').hide()
            ],
            filters=[
                qf.defined('past', 'eventtype'),
                qf.not_(qf.nonzero('past')),
            ],
        ).qb2(
            log='generic-yson-log',
            fields=[
                qe.all(),
                qe.const('past', 1),
                qe.const('origin', 'atom_cube_past')
            ]
        ).groupby(
            'key'
        ).reduce(
            AtomCubePastReducer(self.config),
            memory_limit=16184
        )
