#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os

from cached_property import cached_property
import luigi
import yt.yson as yson

from crypta.profile.lib import date_helpers
from crypta.profile.lib.frozen_dict import FrozenDict

from crypta.profile.utils import (
    loggers,
    luigi_utils,
    yt_utils,
)
from crypta.profile.utils.config import config
from crypta.profile.utils.utils import bb_keyword_id_to_field_name


profile_schema = {
    'id': 'string',
    'id_type': 'string',

    'longterm_interests': 'any',
    'audience_segments': 'any',
    'heuristic_segments': 'any',
    'marketing_segments': 'any',

    'heuristic_internal': 'any',
    'heuristic_private': 'any',
    'heuristic_common': 'any',

    'lal_common': 'any',
    'lal_private': 'any',
    'lal_internal': 'any',
}

aggregated_schema = [
    {'name': 'id', 'type': 'string', 'required': False},
    {'name': 'id_type', 'type': 'string', 'required': True},
    {'name': 'rule_lab_ids', 'type': 'any', 'required': False},
]

daily_schema = {
    'yandexuid': 'uint64',
    'rule_id': 'uint64',
}


lal_segments_config = {
    'rule-9a3b0646': {'coverage': 13000000, 'export': ('lal_internal', 1907)},  # avia_goals
    'rule-6b0c80ea': {  # music_goals
        'coverage': 10000000,
        'export': ('lal_internal', 1155),
        'include_input': False,
    },
    'rule-b935006a': {'coverage': 3000000, 'export': ('lal_internal', 1119)},  # tutor_users
    'rule-961b6c67': {'coverage': 10000000, 'export': ('audience_segments', 4778287)},  # cat owners
    'rule-9a530d96': {'coverage': 7000000, 'export': ('audience_segments', 4778317)},  # dog owners
    'rule-2a42178a': {'coverage': 15000000, 'export': ('lal_internal', 1767)},  # interested in ofd
    'rule-3b472d4b': {'coverage': 15000000, 'export': ('lal_internal', 1769)},  # business owners
    'rule-2576d663': {  # want_to_adopt_pet
        'coverage': 2000000,
        'export': ('audience_segments', 4778185),
        'include_input': True
    },
    'rule-db7abc84': {  # yandex drive
        'coverage': 2000000,
        'export': ('lal_internal', 1218),
        'include_input': False,
    },
    'rule-bb12ea82': {  # yandex taxi
        'coverage': 7000000,
        'export': ('lal_internal', 1223),
        'include_input': False,
    },
    'rule-b29e5591': {  # taxi lal
        'coverage': 20000000,
        'export': ('audience_segments', 11612491),
        'include_input': False,
    },
    'rule-09798c11': {  # podcast lal
        'coverage': 3000000,
        'export': ('lal_internal', 1312),
        'include_input': True,
    },
    'rule-11c216e9': {  # courier job lal for toloka
        'coverage': 3000000,
        'export': ('lal_internal', 1379),
        'include_input': True,
    },
    'rule-82096251': {  # solvent gamers lal for Wargaming CRYPTAUP-1756
        'coverage': 1000000,
        'export': ('audience_segments', 12150007),
        'include_input': True,
    },
    'rule-ba033062': {  # CRYPTAUP-1989
        'coverage': 1000000,
        'export': ('audience_segments', 13865149),
        'include_input': True,
    },
    'rule-20423152': {  # CRYPTAUP-1989
        'coverage': 1000000,
        'export': ('audience_segments', 13865161),
        'include_input': True,
    },
    'rule-20764093': {  # CRYPTAUP-1989
        'coverage': 1000000,
        'export': ('audience_segments', 13865170),
        'include_input': True,
    },
    'rule-4dc363be': {
        'coverage': 7000000,
        'export': ('lal_internal', 1252),
        'include_input': False,
    },
    'rule-59a1956e': {
        'coverage': 1000000,
        'export': ('lal_internal', 1251),
        'include_input': False,
    },
    'rule-9a18b332': {
        'coverage': 3000000,
        'export': ('audience_segments', 16368178),
        'include_input': False,
    },
    'rule-c300e4c9': {
        'coverage': 3000000,
        'export': ('audience_segments', 16368208),
        'include_input': False,
    },
    'rule-8cee4324': {  # CRYPTA-13000
        'coverage': 3000000,
        'export': ('audience_segments', 18114985),
        'include_input': False,
    },
    'rule-6ab0c583': {  # CRYPTA-13000
        'coverage': 1000000,
        'export': ('audience_segments', 18115000),
        'include_input': False,
    },
    'rule-94d69e93': {  # CRYPTA-13000
        'coverage': 3000000,
        'export': ('audience_segments', 18115006),
        'include_input': False,
    },
    'rule-64654919': {  # CRYPTA-13000
        'coverage': 1000000,
        'export': ('audience_segments', 18115015),
        'include_input': False,
    },
    'rule-b7750e8d': {  # CRYPTA-13000
        'coverage': 3000000,
        'export': ('audience_segments', 18115036),
        'include_input': False,
    },
    'rule-c95c6508': {  # CRYPTA-13000
        'coverage': 3000000,
        'export': ('audience_segments', 18115039),
        'include_input': False,
    },
    'rule-48753747': {  # CRYPTA-13068
        'coverage': 3000000,
        'export': ('audience_segments', 18173683),
        'include_input': False,
    },
    'rule-058503ce': {  # CRYPTA-13019
        'coverage': 3000000,
        'export': ('audience_segments', 18179326),
        'include_input': False,
    },
    'rule-aa971244': {  # CRYPTA-13175
        'coverage': 3000000,
        'export': ('audience_segments', 19026661),
        'include_input': False,
    },
    'rule-2b273877': {  # CRYPTA-13175
        'coverage': 3000000,
        'export': ('audience_segments', 19026733),
        'include_input': False,
    },
    'rule-ac7e278e': {  # CRYPTA-13175
        'coverage': 1000000,
        'export': ('audience_segments', 19026742),
        'include_input': False,
    },
    'rule-109ee171': {  # CRYPTA-13175
        'coverage': 1000000,
        'export': ('audience_segments', 18825262),
        'include_input': False,
    },
    'rule-0db9ab79': {  # CRYPTA-14024
        'coverage': 1000000,
        'export': ('audience_segments', 19524622),
        'include_input': False,
    },
    'rule-d15d73ed': {  # CRYPTA-14125
        'coverage': 1000000,
        'export': ('lal_internal', 2218),
        'include_input': False,
    },
    'rule-0e4e1be0': {  # CRYPTA-14394
        'coverage': 150000,
        'export': ('lal_internal', 2240),
        'include_input': False,
    },
    'rule-50b8091e': {  # CRYPTA-14382
        'coverage': 1000000,
        'export': ('lal_internal', 2245),
        'include_input': True,
    },
    'rule-a01501dc': {  # CRYPTA-13350
        'coverage': 20000000,
        'export': ('audience_segments', 18769423),
        'include_input': True,
    },
    'rule-eb639466': {  # CRYPTA-13350
        'coverage': 10000000,
        'export': ('audience_segments', 19070356),
        'include_input': True,
    },
    'rule-d756d624': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 19221322),
        'include_input': True,
    },
    'rule-e9e7b199': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 19221388),
        'include_input': True,
    },
    'rule-d81658b0': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 19221436),
        'include_input': True,
    },
    'rule-4aabc731': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 18778810),
        'include_input': True,
    },
    'rule-b182ecae': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 18770356),
        'include_input': True,
    },
    'rule-8a2614b3': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 19222048),
        'include_input': True,
    },
    'rule-96ae217b': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 19221808),
        'include_input': True,
    },
    'rule-14233baa': {  # CRYPTA-13350
        'coverage': 15000000,
        'export': ('lal_internal', 2249),
        'include_input': True,
    },
    'rule-db151c17': {  # CRYPTA-13350
        'coverage': 10000000,
        'export': ('audience_segments', 19819275),
        'include_input': True,
    },
    'rule-644b3370': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 19261993),
        'include_input': True,
    },
    'rule-8633c189': {  # CRYPTA-13350
        'coverage': 20000000,
        'export': ('audience_segments', 19262083),
        'include_input': True,
    },
    'rule-7623454b': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 19080322),
        'include_input': True,
    },
    'rule-6bd78eb2': {  # CRYPTA-13350
        'coverage': 10000000,
        'export': ('audience_segments', 19047703),
        'include_input': True,
    },
    'rule-17d67574': {  # CRYPTA-13350
        'coverage': 10000000,
        'export': ('audience_segments', 18748870),
        'include_input': True,
    },
    'rule-6905e40e': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 18837913),
        'include_input': True,
    },
    'rule-4e505b96': {  # CRYPTA-13350
        'coverage': 20000000,
        'export': ('audience_segments', 19262278),
        'include_input': True,
    },
    'rule-0366d411': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 18757417),
        'include_input': True,
    },
    'rule-d7189b52': {  # CRYPTA-13350
        'coverage': 5000000,
        'export': ('audience_segments', 18768664),
        'include_input': True,
    },
    'rule-80099426': {  # CRYPTA-13350
        'coverage': 15000000,
        'export': ('audience_segments', 17965138),
        'include_input': True,
    },
    'rule-4168a0d5': {  # CRYPTA-13350
        'coverage': 15000000,
        'export': ('audience_segments', 19848140),
        'include_input': True,
    },
    'rule-9be76cc3': {  # CRYPTA-13350
        'coverage': 10000000,
        'export': ('audience_segments', 19237117),
        'include_input': True,
    },
    'rule-3b83807a': {  # CRYPTA-14748
        'coverage': 8000000,
        'export': ('audience_segments', 19964178),
        'include_input': False,
    },
    'rule-40473c87': {  # CRYPTA-14922
        'coverage': 18000,
        'export': ('audience_segments', 20132349),
        'include_input': True,
    },
    'rule-77180e45': {  # CRYPTA-14944
        'coverage': 46000000,
        'export': ('audience_segments', 20185807),
        'include_input': True,
    },
    'rule-17637459': {  # CRYPTA-14944
        'coverage': 38000000,
        'export': ('audience_segments', 20185911),
        'include_input': True,
    },
    'rule-81148002': {  # CRYPTA-14944
        'coverage': 44000000,
        'export': ('audience_segments', 20186107),
        'include_input': True,
    },
    'rule-0e4ab70d': {  # CRYPTA-14944
        'coverage': 34000000,
        'export': ('audience_segments', 20186131),
        'include_input': True,
    },
    'rule-09368e37': {  # CRYPTA-14944
        'coverage': 32000000,
        'export': ('audience_segments', 20186141),
        'include_input': True,
    },
    'rule-6333841a': {  # CRYPTA-14944
        'coverage': 26000000,
        'export': ('audience_segments', 20186155),
        'include_input': True,
    },
    'rule-18abcecb': {  # CRYPTA-14944
        'coverage': 44000000,
        'export': ('audience_segments', 20186174),
        'include_input': True,
    },
    'rule-77356514': {  # CRYPTA-14944
        'coverage': 32000000,
        'export': ('audience_segments', 20186189),
        'include_input': True,
    },
    'rule-6342cbb0': {  # CRYPTA-14944
        'coverage': 38000000,
        'export': ('audience_segments', 20186198),
        'include_input': True,
    },
    'rule-925937ba': {  # CRYPTA-14944
        'coverage': 32000000,
        'export': ('audience_segments', 20186210),
        'include_input': True,
    },
    'rule-6752e685': {  # CRYPTA-14944
        'coverage': 42000000,
        'export': ('audience_segments', 20186211),
        'include_input': True,
    },
    'rule-86a68798': {  # CRYPTA-15027
        'coverage': 300000,
        'export': ('audience_segments', 20384033),
        'include_input': False,
    },
    'rule-9321ce9c': {  # CRYPTA-15027
        'coverage': 300000,
        'export': ('audience_segments', 20384032),
        'include_input': False,
    },
}


def add_segment_to_record(output_record, keyword_id, segment_id):
    field_name = bb_keyword_id_to_field_name[keyword_id]
    if keyword_id in (281, 544, 545, 546):
        if field_name not in output_record:
            output_record[field_name] = dict()
        output_record[field_name][str(segment_id)] = 1.0
    elif keyword_id == 216:
        if field_name not in output_record:
            output_record[field_name] = dict()
        output_record[field_name][str(segment_id)] = yson.YsonUint64(1)
    elif keyword_id in (601, 557, 547, 548, 549):
        if field_name not in output_record:
            output_record[field_name] = list()
        output_record[field_name].append(yson.YsonUint64(segment_id))


apply_days_of_activity_threshold_template = """
PRAGMA yt.MaxRowWeight="32M";

$rule_revision_ids_to_rule_ids = AsDict(
    {rule_revision_ids_to_rule_ids}
);

-- do not check days before specified date if max_days id specified for rule id
$min_date_by_rule_id = AsDict(
    {min_date_by_rule_id}
);

$min_active_days_thresholds_by_rule_id = AsDict(
    {min_active_days_thresholds_by_rule_id}
);

$apply_min_max_days_thresholds = ($rule_id, $days) -> {{
    $filtered_days = CASE
        WHEN DictContains($min_date_by_rule_id, $rule_id)
        THEN ListFilter($days, ($date) -> {{RETURN $date >= $min_date_by_rule_id[$rule_id]}})
        ELSE $days
    END;
    $number_of_days = ListLength($filtered_days);
    $threshold = CASE
        WHEN $min_active_days_thresholds_by_rule_id[$rule_id] IS NOT NULL
            THEN $min_active_days_thresholds_by_rule_id[$rule_id]
            ELSE 2
        END;
    RETURN $number_of_days >= $threshold;
}};

$rule_ids_table = (
    SELECT DISTINCT
        yandexuid,
        $rule_revision_ids_to_rule_ids[rule_id] AS rule_id, -- here rule_id column initially contains rule_revision_id
        `date` ?? TableName() AS `date`
    FROM CONCAT(
        {daily_processed_tables}
    )
    WHERE DictContains($rule_revision_ids_to_rule_ids, rule_id)
);

$rule_ids_table_with_thresholds = (
    SELECT yandexuid, rule_id
    FROM $rule_ids_table
    GROUP BY yandexuid, rule_id
    HAVING $apply_min_max_days_thresholds(rule_id, AGGREGATE_LIST_DISTINCT(`date`))
);

INSERT INTO `{output_table}` WITH TRUNCATE
SELECT
    CAST(yandexuid AS String) AS id,
    'yandexuid' AS id_type,
    AGGREGATE_LIST_DISTINCT(rule_id) AS rule_lab_ids
FROM $rule_ids_table_with_thresholds
GROUP BY yandexuid
ORDER BY id, id_type;
"""


def convert_to_exports(key, rows, rule_lab_id_to_segment_export, rule_ids_to_exclude_from_aggregated_table):
    output_record = {'id': key['id'], 'id_type': key['id_type']}
    rule_lab_ids = set()
    for row in rows:
        for rule_lab_id in row['rule_lab_ids']:
            rule_lab_ids.add(rule_lab_id)

    rule_lab_ids = rule_lab_ids.difference(rule_ids_to_exclude_from_aggregated_table)
    for rule_lab_id in rule_lab_ids:
        # temporary hack for rule_revision_id == 0
        if not rule_lab_id:
            continue

        if rule_lab_id in rule_lab_id_to_segment_export:
            keyword_id, segment_id = rule_lab_id_to_segment_export[rule_lab_id]
            add_segment_to_record(output_record, keyword_id, segment_id)
    yield output_record


class DailyRulesTarget(luigi.Target):
    def __init__(self, date, class_name, rule_revision_ids, yt_client=None):
        self._external_yt_client = yt_client
        self.date = date
        self.target_table = os.path.join(
            config.DAILY_STANDARD_HEURISTIC_DIRECTORY,
            class_name,
            self.date,
        )
        self.rule_revision_ids = rule_revision_ids

    @property
    def yt(self):
        if self._external_yt_client is not None:
            return self._external_yt_client

        return yt_utils.get_yt_client()

    def get_rule_ids(self):
        return set(self.yt.get_attribute(self.target_table, 'rule_ids', default=[])) if self.yt.exists(self.target_table) else set()

    @property
    def rule_revision_ids_to_be_prepared(self):
        return frozenset(set(self.rule_revision_ids) - self.get_rule_ids())

    @cached_property
    def table(self):
        return self.target_table

    def exists(self):
        return not self.rule_revision_ids_to_be_prepared


class DailyRulesProcessor(luigi_utils.BaseYtTask):
    priority = 100
    task_group = 'constructor_segments'
    date = luigi.Parameter()
    rule_revision_ids = luigi.Parameter(significant=False)

    def __init__(self, *args, **kwargs):
        super(DailyRulesProcessor, self).__init__(*args, **kwargs)
        self.output_target = DailyRulesTarget(self.date, self.__class__.__name__, self.rule_revision_ids)

    def output(self):
        return self.output_target

    @property
    def rule_revision_ids_to_be_prepared(self):
        return self.output_target.rule_revision_ids_to_be_prepared

    def run(self):
        if self.rule_revision_ids_to_be_prepared:
            self.logger.info('self.rule_revision_ids_to_be_prepared: {}'.format(self.rule_revision_ids_to_be_prepared))

        with loggers.TimeTracker(self.__class__.__name__):
            with self.yt.TempTable() as tmp_table:
                self.compute_pre_tx(self.input().table, tmp_table)
                with self.yt.Transaction() as transaction:
                    self.compute(self.input().table, tmp_table, transaction)

                    if self.yt.exists(self.output().table):
                        self.yt.run_merge([tmp_table, self.output().table], self.output().table)
                    else:
                        self.yt.move(tmp_table, self.output().table, recursive=True)

                    self.yt.set_attribute(self.output().table, 'rule_ids', sorted(self.rule_revision_ids))
                    self.yt.set_attribute(self.output().table, 'generate_date', self.date)

    def compute_pre_tx(self, input_table, output_table):
        pass

    def compute(self, input_table, output_table, tx):
        raise NotImplementedError


class DailyLogsAggregator(luigi_utils.BaseYtTask):
    task_group = 'constructor_segments'
    date = luigi.Parameter()

    tasks = luigi.Parameter(significant=False)
    n_daily_logs_to_take = luigi.IntParameter(significant=False)
    min_days_thresholds_by_rule_id = luigi.Parameter(significant=False)
    max_days_thresholds_by_rule_id = luigi.Parameter(significant=False)
    rule_revision_id_to_rule_id = luigi.Parameter(significant=False)
    task_params = luigi.Parameter(default=None, significant=False)

    def __init__(self, *args, **kwargs):
        super(DailyLogsAggregator, self).__init__(*args, **kwargs)

        # support for multiple task types in one aggregator (example: aggregate urls + hosts simultaneously)
        if not isinstance(self.tasks, tuple):
            self.tasks = [self.tasks]
        self.task_params = self.task_params or [{}]
        if not isinstance(self.task_params, tuple):
            self.task_params = [self.task_params]

        self.tasks_to_run = []
        for task_index, task_type in enumerate(self.tasks):
            for date in date_helpers.generate_back_dates(self.date, self.n_daily_logs_to_take):
                task_arguments = {'date': date}
                task_arguments.update(self.task_params[task_index])
                self.tasks_to_run.append(task_type(**FrozenDict(task_arguments)))

    def requires(self):
        return {
            'DayProcessorTasks': self.tasks_to_run,
        }

    def output(self):
        return luigi_utils.YtTableMultipleAttributeTarget(
            os.path.join(
                config.AGGREGATED_STANDARD_HEURISTIC_DIRECTORY,
                self.__class__.__name__,
            ),
            {
                'generate_date': self.date,
                'rule_ids': sorted(self.rule_revision_id_to_rule_id.keys()),
            }
        )

    def run(self):
        input_tables = []
        for target in self.input()['DayProcessorTasks']:
            if 'rule_id' not in self.yt.get_yt_schema_dict_from_table(target.target_table):
                raise ValueError('Table {} does not has rule_id field'.format(target.target_table))
            input_tables.append('`{}`'.format(target.target_table))

        min_days_thresholds_by_rule_id = dict(self.min_days_thresholds_by_rule_id)
        self.logger.info('min_days_thresholds_by_rule_id = {}'.format(min_days_thresholds_by_rule_id))

        with self.yt.Transaction() as transaction:
            thresholds = '\n'.join(
                ['("{}", {}),'.format(key, value) for key, value in min_days_thresholds_by_rule_id.iteritems()]
            )
            rule_revision_ids_to_rule_ids = '\n'.join(
                ['({}ul, "{}"),'.format(key, value) for key, value in self.rule_revision_id_to_rule_id.iteritems()]
            )

            min_date_by_rule_id = []
            for rule_id, max_days in self.max_days_thresholds_by_rule_id.iteritems():
                # skip default (35) and unsupported (>35) values
                if max_days >= config.NUMBER_OF_DAYS_TO_CALCULATE_RULES:
                    continue

                date_string = date_helpers.get_date_from_past(self.date, max_days)
                min_date_by_rule_id.append('("{}", "{}"),'.format(rule_id, date_string))
            min_date_by_rule_id = '\n'.join(min_date_by_rule_id)

            self.yql.query(
                apply_days_of_activity_threshold_template.format(
                    output_table=self.output().table,
                    daily_processed_tables=',\n'.join(input_tables),
                    min_active_days_thresholds_by_rule_id=thresholds,
                    rule_revision_ids_to_rule_ids=rule_revision_ids_to_rule_ids,
                    min_date_by_rule_id=min_date_by_rule_id,
                ),
                transaction=transaction,
            )

            self.yt.set_attribute(
                self.output().table,
                'generate_date',
                self.date,
            )

            self.yt.set_attribute(
                self.output().table,
                'rule_ids',
                sorted(self.rule_revision_id_to_rule_id.keys()),
            )
