import collections

import luigi
import yt.yson as yson

from crypta.profile.lib.frozen_dict import FrozenDict
from crypta.profile.lib.segments import condition_helpers
from crypta.profile.runners.segments.lib.constructor_segments.common import utils
from crypta.profile.runners.segments.lib.constructor_segments.daily_rule_processors.urls import url_rules
from crypta.profile.runners.segments.lib.constructor_segments.daily_rule_processors.words import word_rules
from crypta.profile.utils.clients.clickhouse import ClickhouseClient
from crypta.profile.utils.config import config


class GetStandardSegmentsByMetricaUrlsAndHostsDayProcessor(url_rules.DailyUrlRulesProcessor):
    source = "metrica"
    input_dir = config.METRICS_PARSED_DIR
    index_dir = config.METRICS_URLS_INDEX_DIR
    is_metrica = True


class GetStandardSegmentsByMetricaTitlesDayProcessor(word_rules.DailyWordRulesProcessor):
    input_dir = config.METRICS_PARSED_DIR
    index_dir = config.METRICS_WORDS_INDEX_DIR
    yql_word_filter_data_size_per_job = "1G"


clickhouse_query_template = """
SELECT yandexuid
FROM visits_all
WHERE CounterID IN ({counter_ids}) AND StartDate == toDate('{start_date}') {additional_conditions}
GROUP BY UserID AS yandexuid
"""

output_template = """
INSERT INTO `{output_table}` WITH TRUNCATE
SELECT yandexuid, rule_id
FROM `{input_table}`
GROUP BY yandexuid, rule_id
"""


class FakeYtTarget(luigi.Target):
    def __init__(self):
        self.table = None

    def exists(self):
        return True


class FakeInput(luigi.ExternalTask):
    def output(self):
        return FakeYtTarget()


class GetStandardSegmentsByMetricaCountersAndGoalsDailyProcessor(utils.DailyRulesProcessor):
    task_group = 'constructor_segments'

    metrica_counter_ids_conditions = luigi.Parameter(significant=False)
    metrica_goal_ids_conditions = luigi.Parameter(significant=False)

    priority = 100

    def __init__(self, *args, **kwargs):
        super(GetStandardSegmentsByMetricaCountersAndGoalsDailyProcessor, self).__init__(*args, **kwargs)

    def requires(self):
        return FakeInput()

    def compute(self, input_table, output_table, tx):
        clickhouse = ClickhouseClient(logger=self.logger)

        with self.yt.TempTable() as temp_table:
            self.yt.create_empty_table(
                temp_table,
                schema=utils.daily_schema,
            )

            for rule_revision_id, counter_ids in self.metrica_counter_ids_conditions.iteritems():
                if self.rule_revision_ids_to_be_prepared:
                    if rule_revision_id not in self.rule_revision_ids_to_be_prepared:
                        continue

                additional_conditions = ''
                if rule_revision_id in self.metrica_goal_ids_conditions:
                    goal_ids = self.metrica_goal_ids_conditions[rule_revision_id]
                    additional_conditions = ' AND hasAny(Goals.ID, [{goal_ids}])'.format(goal_ids=', '.join(goal_ids))
                query = clickhouse_query_template.format(
                    start_date=self.date,
                    counter_ids=', '.join(counter_ids),
                    additional_conditions=additional_conditions,
                )

                self.logger.info(query)

                result = clickhouse.make_query(query)
                if not result:
                    self.logger.error('Empty result')

                records = []
                for yandexuid in result.splitlines():
                    if int(yandexuid) == 0:
                        continue
                    records.append({
                        'yandexuid': yson.YsonUint64(int(yandexuid)),
                        'rule_id': yson.YsonUint64(rule_revision_id),
                    })

                self.yt.write_table(self.yt.TablePath(temp_table, append=True), records)

            self.yql.query(
                output_template.format(
                    input_table=temp_table,
                    output_table=output_table,
                ),
                transaction=tx,
            )

    @classmethod
    def prepare_rules(cls, rule_conditions, segments_config):
        metrica_counter_ids_conditions = collections.defaultdict(set)
        metrica_goal_ids_conditions = collections.defaultdict(set)

        for rule_condition in rule_conditions:
            counters_and_goals = condition_helpers.normalize_counters_and_goals(rule_condition.values)

            for counter_id, goal_id in counters_and_goals:
                metrica_counter_ids_conditions[rule_condition.revision].add(str(counter_id))

                if goal_id is not None:
                    metrica_goal_ids_conditions[rule_condition.revision].add(str(goal_id))

        return {
            "metrica_counter_ids_conditions": FrozenDict(metrica_counter_ids_conditions),
            "metrica_goal_ids_conditions": FrozenDict(metrica_goal_ids_conditions),
        }, []
