#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime
import os

import luigi

from crypta.profile.lib import date_helpers

from crypta.profile.utils.config import config
from crypta.profile.utils.yt_utils import get_yt_client
from crypta.profile.utils.luigi_utils import BaseYtTask, AttributeExternalInput, YtTarget


process_bb_dump_query = """
$catalogia_category_to_rule_revision_ids = AsDict(
{catalogia_category_to_rule_revision_ids}
);

$rule_revision_id_to_segment_ids = AsDict(
{rule_revision_id_to_segment_ids}
);

$convert_to_interest_lab_id = ($rule_revision_id) -> {{
    RETURN $rule_revision_id_to_segment_ids[$rule_revision_id];
}};

$round_to_datetime_bin = ($timestamp) -> {{
    RETURN $timestamp - $timestamp % (30 * 60);
}};

$format_to_datetime = DateTime::Format("%Y-%m-%d %H:%M:%S");
$format_to_date = DateTime::Format("%Y-%m-%d");

$preprocessed_data = (
    SELECT
        yandexuid,
        `timestamp`,
        ListFlatMap(
            ListFlatMap($catalogia_category_to_rule_revision_ids[category], $convert_to_interest_lab_id),
            ($x) -> {{RETURN $x;}}
        ) AS interest_lab_ids,
    FROM `{input_table}`
    WHERE yandexuid != 0 AND DictContains($catalogia_category_to_rule_revision_ids, category)
);

INSERT INTO `{processed_table}` WITH TRUNCATE
SELECT
    yandexuid,
    interest_lab_id,
    MAX(`timestamp`) AS `timestamp`,
    $format_to_date(CAST(MAX(`timestamp`) AS Datetime)) AS `date`,
    $format_to_datetime(CAST(CAST($round_to_datetime_bin(MAX(`timestamp`)) AS Uint32) AS Datetime)) AS datetime_bin,
FROM (
    SELECT yandexuid, `timestamp`, interest_lab_id
    FROM $preprocessed_data
    FLATTEN LIST BY interest_lab_ids AS interest_lab_id
)
GROUP BY yandexuid, interest_lab_id
ORDER BY yandexuid, interest_lab_id, `date`, datetime_bin;
"""


class YtTargetWithSetAttribute(YtTarget):
    def __init__(self, table, attribute_name, attribute_value, allow_empty=False):
        super(YtTargetWithSetAttribute, self).__init__(
            table,
            allow_empty=allow_empty,
            yt_client=None,
            columns=None,
        )
        self.attribute_name = attribute_name
        self.attribute_value = set([int(elem) for elem in attribute_value])

    def exists(self):
        yt_client = get_yt_client()
        if not super(YtTargetWithSetAttribute, self).exists():
            return False

        actual_attribute_value = yt_client.get_attribute(
            self.table,
            self.attribute_name,
            None,
        )

        if actual_attribute_value is None:
            return False

        actual_attribute_value = set([int(elem) for elem in actual_attribute_value])
        return self.attribute_value <= actual_attribute_value


class BBDumpProcessor(BaseYtTask):
    date = luigi.Parameter()
    catalogia_category_to_rule_revision_ids = luigi.Parameter(significant=False)
    rule_revision_id_to_segment_ids = luigi.Parameter(significant=False)

    task_group = 'bigb_dump_for_interests'

    def requires(self):
        return AttributeExternalInput(
            os.path.join(config.BB_PARSED_DIR, self.date),
            attribute_name='closed',
            attribute_value=True,
        )

    def output(self):
        return YtTargetWithSetAttribute(
            os.path.join(
                config.INTERESTS_PROCESSED_FOLDER,
                self.date,
                'processed_bb_dump',
            ),
            attribute_name='rule_revision_ids',
            attribute_value=self.rule_revision_id_to_segment_ids.keys(),
        )

    def run(self):
        catalogia_category_to_rule_revision_ids_parts = []

        for category, rule_revision_ids in self.catalogia_category_to_rule_revision_ids.iteritems():
            catalogia_category_to_rule_revision_ids_parts.append(
                'AsTuple({}ul, AsList({}))'.format(
                    category,
                    ', '.join(['{}ul'.format(rule_revision_id) for rule_revision_id in rule_revision_ids])
                )
            )

        rule_revision_id_to_segment_ids_parts = []

        for rule_revision_id, segment_ids in self.rule_revision_id_to_segment_ids.iteritems():
            rule_revision_id_to_segment_ids_parts.append(
                'AsTuple({}ul, AsList({}))'.format(
                    rule_revision_id,
                    ', '.join(['"{}"'.format(segment_id) for segment_id in segment_ids]),
                )
            )

        with self.yt.Transaction() as transaction:
            self.yql.query(
                process_bb_dump_query.format(
                    input_table=self.input().table,
                    processed_table=self.output().table,
                    rule_revision_id_to_segment_ids=',\n'.join(rule_revision_id_to_segment_ids_parts),
                    catalogia_category_to_rule_revision_ids=',\n'.join(catalogia_category_to_rule_revision_ids_parts),
                ),
                transaction=transaction,
            )

            self.yt.set_attribute(
                self.output().table,
                'generate_datetime',
                date_helpers.to_date_string(
                    datetime.datetime.now(),
                    date_format=date_helpers.YT_DATETIME_FORMAT,
                ),
            )

            self.yt.set_attribute(
                self.output().table,
                self.output().attribute_name,
                sorted(self.rule_revision_id_to_segment_ids.keys()),
            )
