#!/usr/bin/env python
# -*- coding: utf-8 -*-
import itertools

from crypta.lib.python import time_utils
from crypta.profile.lib import date_helpers
from crypta.profile.runners.interests.lib.data.reqans import reqans_lemmas_to_interests_mapping
from crypta.profile.runners.interests.lib.data.phones import get_phones_segments_lemmas
from crypta.profile.runners.interests.lib.log_processors.__base__ import InterestsLogProcessor
from crypta.profile.utils.config import config
from crypta.profile.utils.interests_helpers import LabSegmentsInfo
from crypta.profile.utils.luigi_utils import ExternalInput


class ReqansProcessor(InterestsLogProcessor):

    task_group = 'reqans_for_interests'

    def requires(self):
        return {
            'log': ExternalInput(
                table=self.log_path,
            ),
            'phone_lemmas': ExternalInput(
                table=config.PHONE_MODELS_LEMMAS_BY_PRICE_SEGMENT,
            ),
        }

    def fill_processed_table(self, transaction):
        with self.yt.TempTable() as parsed_log_table:
            self.parse_log(parsed_log_table, transaction)
            self.yt.run_map(
                self.make_mapper(),
                source_table=parsed_log_table,
                destination_table=self.processed_table,
            )
            self.sort_processed_table()
            self.create_empty_to_bigb_raw_table()
            self.yt.run_reduce(
                self.make_to_bigb_reducer(),
                self.processed_table,
                self.to_bigb_raw_table,
                reduce_by=['yandexuid'],
            )
            self.move_to_bigb_table()

    def parse_log(self, destination_table, transaction):
        self.yql.query(
            query_string=self.log_parser_query.format(
                input_table=self.log_path,
                destination_table=destination_table,
            ),
            transaction=transaction,
        )

    @property
    def log_parser_query(self):
        return """
PRAGMA yt.DefaultMemoryLimit = '2048M';
PRAGMA yson.DisableStrict;

$req = ($msp, $query) -> {{
    RETURN CASE
        WHEN $msp IS NOT NULL AND $msp.Relev >= 8000
        THEN $msp.CorrectedQuery
        ELSE $query
    END
}};


INSERT INTO `{destination_table}` WITH TRUNCATE
SELECT
    CAST(UserId.YandexUid AS Uint64) AS yandexuid,
    _logfeller_timestamp AS `timestamp`,
    CASE
        WHEN $req(Msp, Query) IS NOT NULL AND LENGTH($req(Msp, Query)) < 65536
        THEN Crypta::GetLemmas(SearchRequest::NormalizeSimple(CAST($req(Msp, Query) AS Utf8)), UiLanguage)
        ELSE NULL
    END AS lemmas,
FROM `{input_table}`
WHERE CAST(UserId.YandexUid AS Uint64) IS NOT NULL AND CAST(UserId.YandexUid AS Uint64) != 0
        """

    def make_mapper(self):
        all_lemmas, lemmas_by_interest_lab_id = self.get_lemmas_mapping()
        log_name = self.log_name

        def mapper(row):
            template_row = {
                'yandexuid': row['yandexuid'],
                'timestamp': row['timestamp'],
                'date': date_helpers.to_date_string(date_helpers.from_timestamp_to_moscow_datetime(row['timestamp'])),
                'datetime_bin': date_helpers.to_date_string(
                    date_helpers.from_timestamp_to_moscow_datetime(row['timestamp'] - row['timestamp'] % (30 * 60)),
                    date_format=date_helpers.DATETIME_FORMAT,
                ),
                'log_name': log_name,
                'data_source': 'reqans',
                'data_type': 'lemmas',
                'raw_data': row['lemmas'],
            }

            query_lemmas = set(row['lemmas']) if row['lemmas'] else set()
            if query_lemmas and query_lemmas.intersection(all_lemmas):
                for interest_lab_id, all_interest_lemmas in lemmas_by_interest_lab_id.iteritems():
                    for interest_lemmas in all_interest_lemmas:
                        if interest_lemmas <= query_lemmas:
                            result_row = template_row.copy()
                            result_row.update({
                                'interest_lab_id': interest_lab_id,
                                'data': list(interest_lemmas),
                            })
                            yield result_row

        return mapper

    def get_lemmas_mapping(self):
        all_lemmas = set()
        lemmas_by_segment_id = dict()

        for mapping in itertools.chain(
                reqans_lemmas_to_interests_mapping['mapping'],
                get_phones_segments_lemmas(self.yt),
        ):
            for lemmas in mapping['data']:
                all_lemmas.update(lemmas)
            lemmas_by_segment_id[mapping['segment_id']] = mapping['data']

        return all_lemmas, lemmas_by_segment_id

    def make_to_bigb_reducer(self):
        shortterm_ids = LabSegmentsInfo().lab_segment_id_to_shortterm_interest_id
        current_timestamp = time_utils.get_current_time()

        def reducer(key, rows):
            shortterm_interests = dict()

            for row in rows:
                row_age = (date_helpers.from_timestamp_to_moscow_datetime(current_timestamp) -
                           date_helpers.from_timestamp_to_moscow_datetime(row['timestamp']))
                if row_age.days <= 3:
                    shortterm_id = str(shortterm_ids[row['interest_lab_id']])
                    shortterm_interests[shortterm_id] = max(row['timestamp'], shortterm_interests.get(shortterm_id, 0))

            if shortterm_interests:
                yield {
                    'yandexuid': key['yandexuid'],
                    'shortterm_interests': shortterm_interests,
                }

        return reducer
