from collections import defaultdict
import functools
import json

from cached_property import cached_property
import luigi
import yt.yson as yson

from crypta.lab.proto import constructor_pb2
from crypta.profile.lib import date_helpers
from crypta.profile.lib.frozen_dict import FrozenDict
from crypta.profile.runners.segments.lib.constructor_segments.common.utils import (
    DailyLogsAggregator,
    profile_schema,
    convert_to_exports,
    lal_segments_config,
)
from crypta.profile.runners.segments.lib.constructor_segments.daily_rule_processors.browser import (
    GetStandardSegmentsByBrowserUrlsAndHostsDayProcessor,
    GetStandardSegmentsByBrowserTitlesDayProcessor,
)
from crypta.profile.runners.segments.lib.constructor_segments.daily_rule_processors.catalogia.catalogia import GetStandardSegmentsByCatalogiaDailyProcessor
from crypta.profile.runners.segments.lib.constructor_segments.daily_rule_processors.efir.efir import GetStandardSegmentsByEfirData
from crypta.profile.runners.segments.lib.constructor_segments.daily_rule_processors.mobile_apps.apps import GetStandardSegmentsByMobileApp
from crypta.profile.runners.segments.lib.constructor_segments.daily_rule_processors.music import GetStandardSegmentsByMusicLikes
from crypta.profile.runners.segments.lib.constructor_segments.daily_rule_processors.precalculated_tables.precalculated_tables import GetStandardSegmentsByPrecalculatedTables
from crypta.profile.runners.segments.lib.constructor_segments.daily_rule_processors.metrica import (
    GetStandardSegmentsByMetricaUrlsAndHostsDayProcessor,
    GetStandardSegmentsByMetricaTitlesDayProcessor,
    GetStandardSegmentsByMetricaCountersAndGoalsDailyProcessor,
)
from crypta.profile.runners.segments.lib.constructor_segments.daily_rule_processors.search_requests import (
    GetStandardSegmentsBySearchRequestsDayProcessor,
    GetStandardSegmentsBySearchResultsDayProcessor,
)
from crypta.profile.runners.segments.lib.constructor_segments.daily_rule_processors.yandex_referrer import GetStandardSegmentsByYandexReferrerUrlsAndHostsDayProcessor
from crypta.profile.utils import (
    luigi_utils,
    utils,
)
from crypta.profile.utils.api import get_api
from crypta.profile.utils.config import config

DAILY_TASK_SOURCES = {
    GetStandardSegmentsByMetricaUrlsAndHostsDayProcessor: [
        constructor_pb2.RuleCondition.Source.METRICA_SITES,
        constructor_pb2.RuleCondition.Source.SITES
    ],
    GetStandardSegmentsByBrowserUrlsAndHostsDayProcessor: [
        constructor_pb2.RuleCondition.Source.BROWSER_SITES,
        constructor_pb2.RuleCondition.Source.PUBLIC_SITES,
        constructor_pb2.RuleCondition.Source.SITES
    ],
    GetStandardSegmentsByYandexReferrerUrlsAndHostsDayProcessor: [
        constructor_pb2.RuleCondition.Source.YANDEX_REFERRER,
        constructor_pb2.RuleCondition.Source.PUBLIC_SITES
    ],
    GetStandardSegmentsByMetricaTitlesDayProcessor: [
        constructor_pb2.RuleCondition.Source.METRICA_TITLES,
        constructor_pb2.RuleCondition.Source.WORDS
    ],
    GetStandardSegmentsByBrowserTitlesDayProcessor: [
        constructor_pb2.RuleCondition.Source.BROWSER_TITLES,
        constructor_pb2.RuleCondition.Source.WORDS,
        constructor_pb2.RuleCondition.Source.PUBLIC_WORDS
    ],
    GetStandardSegmentsBySearchRequestsDayProcessor: [
        constructor_pb2.RuleCondition.Source.SEARCH_REQUESTS,
        constructor_pb2.RuleCondition.Source.WORDS,
        constructor_pb2.RuleCondition.Source.PUBLIC_WORDS
    ],
    GetStandardSegmentsBySearchResultsDayProcessor: [constructor_pb2.RuleCondition.Source.SEARCH_RESULTS_HOSTS],
    GetStandardSegmentsByCatalogiaDailyProcessor: [constructor_pb2.RuleCondition.Source.CATALOGIA],
    GetStandardSegmentsByEfirData: [
        constructor_pb2.RuleCondition.Source.EFIR_CHANNELS,
        constructor_pb2.RuleCondition.Source.EFIR_PROGRAMS
    ],
    GetStandardSegmentsByMetricaCountersAndGoalsDailyProcessor: [constructor_pb2.RuleCondition.Source.METRICA_COUNTERS_AND_GOALS],
}

AGGREGATE_TASK_SOURCES = {
    GetStandardSegmentsByMobileApp: [constructor_pb2.RuleCondition.Source.APPS],
    GetStandardSegmentsByPrecalculatedTables: [constructor_pb2.RuleCondition.Source.PRECALCULATED_TABLES],
    GetStandardSegmentsByMusicLikes: [constructor_pb2.RuleCondition.Source.MUSIC_LIKES],
}

APPROVED = 'APPROVED'


class ConstructorSegmentsConfig(object):
    def __init__(self, logger, yt, api):
        self.logger = logger
        self.yt = yt
        self.api = api

        self.rule_revision_id_to_rule_id = {}
        self.source_types_to_rule_conditions = defaultdict(list)

        self.min_days_thresholds_by_rule_id = dict()
        self.max_days_thresholds_by_rule_id = dict()

        self.lal_config = list()
        self.rule_ids_to_exclude_from_aggregated_table = set()

    @cached_property
    def existing_hosts(self):
        visitor_count_by_host = {}
        for row in self.yt.read_table(config.YANDEXUID_METRICA_BROWSER_COUNTER_TABLE):
            visitor_count_by_host[row['site']] = (row['metrica_visitors_count'], row['bar_visitors_count'])

        return visitor_count_by_host

    @cached_property
    def existing_apps(self):
        return set(
            row['app']
            for row in self.yt.read_table(config.APP_IDF_TABLE)
        )

    @cached_property
    def rules(self):
        rules = self.api.lab.getAllRules().result()
        # TODO(unretrofied): CRYPTA-16120 debug logging
        self.logger.debug("Get rules from API: %s", rules)
        return rules

    @cached_property
    def rule_id_to_segment_export(self):
        rule_id_to_segment_export = {}
        for export in self.api.lab.getExportsWithRuleId().result():
            if export.state.lower() not in ('disabled', 'deleted') \
                    and (export.type.lower() != 'shortterm' and export.keywordId != 602):
                rule_id_to_segment_export[export.ruleId] = (export.keywordId, export.segmentId)
        return rule_id_to_segment_export

    def get_lal_exports(self):
        # example of lal config {"includeInput": True, "exponent": 1, "maxCoverage": 0, "ruleId": "rule-65217c0e"}
        for segment in self.api.lab.getAllSegments().result():
            for export in segment.exports.exports:
                if export.lal:
                    try:
                        data = json.loads(export.lal)
                        lal = {
                            'rule_id': data['ruleId'],
                            'include_input': data['includeInput'],
                            'segment_type': utils.bb_keyword_id_to_field_name[export.keywordId],
                            'segment_id': export.segmentId,
                        }

                        include_input = lal['include_input']

                        if not isinstance(include_input, bool):
                            raise TypeError('includeInput is not of type bool, include_input: {}, type: {}, export_id: {}'.format(include_input, type(include_input), export.id))

                        if data['maxCoverage'] != 0:
                            lal['max_coverage'] = data['maxCoverage']
                        else:
                            lal['exponent'] = data['exponent']
                        self.lal_config.append(lal)

                        # There are rules that only used as lal cores (having param includeInput == False). They should not be included in aggregated standard segments table.
                        if export.ruleId == data['ruleId'] and not data['includeInput']:
                            self.rule_ids_to_exclude_from_aggregated_table.add(export.ruleId)

                    except (ValueError, KeyError):
                        self.logger.info('Broken lal config for export: {}, config: {}'.format(export.id, export.lal))

    def read(self):
        for rule in self.rules:
            if rule.id not in self.rule_id_to_segment_export and rule.id not in lal_segments_config:
                # do not process rules that do not have active export and are not used for lal creation
                continue

            # default threshold of 2 to remove noise, but leave enough users above threshold
            self.min_days_thresholds_by_rule_id[rule.id] = rule.minDays

            # 35 is a 7-divided month to avoid noise from different number of workdays and weekends
            self.max_days_thresholds_by_rule_id[rule.id] = rule.days
            self.add_rule_conditions(rule.conditions, rule.id)

    def add_rule_conditions(self, rule_conditions, rule_id):
        for rule_condition in rule_conditions:
            if rule_condition.state != APPROVED:
                continue

            rule_revision_id = yson.YsonUint64(rule_condition.revision)
            self.rule_revision_id_to_rule_id[rule_revision_id] = rule_id
            self.source_types_to_rule_conditions[rule_condition.source].append(rule_condition)

    def get_rule_conditions(self, sources):
        return sum([self.source_types_to_rule_conditions[constructor_pb2.RuleCondition.Source.Name(source)] for source in sources], [])

    def get_and_put_rule_condition(self, rule_lab_id, source_name):
        rule_condition = self.api.lab.getRuleCondition(id=rule_lab_id, source=source_name, state=APPROVED).result()
        self.api.lab.putRuleCondition(id=rule_lab_id, source=source_name, values=rule_condition.values).result()

    def prepare_rules(self, update_rejected=True):
        daily_tasks = {}
        aggregate_tasks = {}

        for tasks, cls_to_sources in (
            (daily_tasks, DAILY_TASK_SOURCES),
            (aggregate_tasks, AGGREGATE_TASK_SOURCES),
        ):
            for cls, sources in cls_to_sources.items():
                rule_conditions = self.get_rule_conditions(sources)
                if not rule_conditions:
                    continue

                tasks[cls], rejected_rule_conditions = cls.prepare_rules(rule_conditions, self)
                tasks[cls]["rule_revision_ids"] = frozenset(rule_condition.revision for rule_condition in rule_conditions)

                if update_rejected:
                    for rule_condition in rejected_rule_conditions:
                        self.get_and_put_rule_condition(self.rule_revision_id_to_rule_id[rule_condition.revision], rule_condition.source)

        return daily_tasks, aggregate_tasks


class BuildConstructorSegments(luigi_utils.BaseYtTask):
    date = luigi.Parameter()
    number_of_days = config.NUMBER_OF_DAYS_TO_CALCULATE_RULES
    task_group = 'constructor_segments'

    def __init__(self, *args, **kwargs):
        super(BuildConstructorSegments, self).__init__(*args, **kwargs)
        self.segments_config = ConstructorSegmentsConfig(self.logger, self.yt, get_api())
        self.segments_config.read()
        self.segments_config.get_lal_exports()

    def output(self):
        return luigi_utils.YtTableMultipleAttributeTarget(
            config.STANDARD_HEURISTIC_RESULT_TABLE,
            {
                'generate_date': str(self.date),
                'rule_ids': sorted(self.segments_config.rule_revision_id_to_rule_id.keys()),
                'lal_config': self.segments_config.lal_config,
            }
        )

    def run(self):
        self.logger.info('rule_id_to_segment_export={}'.format(
            dict(self.segments_config.rule_id_to_segment_export)
        ))
        with self.yt.Transaction():
            self.yt.create_empty_table(
                self.output().table,
                schema=profile_schema,
            )
            self.yt.run_reduce(
                functools.partial(
                    convert_to_exports,
                    rule_lab_id_to_segment_export=self.segments_config.rule_id_to_segment_export,
                    rule_ids_to_exclude_from_aggregated_table=self.segments_config.rule_ids_to_exclude_from_aggregated_table,
                ),
                [
                    self.input()['yandexuids'].table,
                    self.input()['apps'].table,
                    self.input()['music_likes'].table,
                    self.input()['precalculated_tables'].table,
                ],
                self.output().table,
                reduce_by=('id', 'id_type')
            )
            self.yt.run_sort(
                self.output().table,
                sort_by='id'
            )
            self.yt.set_attribute(self.output().table, 'generate_date', self.date)
            self.yt.set_attribute(self.output().table, 'rule_ids', sorted(self.segments_config.rule_revision_id_to_rule_id.keys()))
            self.yt.set_attribute(self.output().table, 'lal_config', self.segments_config.lal_config)

        for directory in self.yt.list(config.DAILY_STANDARD_HEURISTIC_DIRECTORY, absolute=True):
            old_nodes = luigi_utils.get_old_nodes(
                yt=self.yt,
                folder=directory,
                last_date=date_helpers.get_date_from_past(self.date, int(self.number_of_days)),
            )
            luigi_utils.remove_old_nodes(self.yt, self.logger, old_nodes)

    def requires(self):
        # lots of frozendicts to prevent luigi error "param is not hashable: dict"
        daily_tasks, aggregate_tasks = self.segments_config.prepare_rules()
        aggregate_tasks[DailyLogsAggregator] = {
            "tasks": daily_tasks.keys(),
            "n_daily_logs_to_take": self.number_of_days,
            "min_days_thresholds_by_rule_id": FrozenDict(self.segments_config.min_days_thresholds_by_rule_id),
            "max_days_thresholds_by_rule_id": FrozenDict(self.segments_config.max_days_thresholds_by_rule_id),
            "rule_revision_id_to_rule_id": FrozenDict(self.segments_config.rule_revision_id_to_rule_id),
            "task_params": daily_tasks.values(),
        }

        return {
            key: cls(date=self.date, **aggregate_tasks[cls])
            for key, cls in {
                'apps': GetStandardSegmentsByMobileApp,
                'music_likes': GetStandardSegmentsByMusicLikes,
                'precalculated_tables': GetStandardSegmentsByPrecalculatedTables,
                'yandexuids': DailyLogsAggregator,
            }.items()
            if cls in aggregate_tasks
        }
