#!/usr/bin/env python
# -*- coding: utf-8 -*-

from yt.wrapper import ypath

from crypta.profile.utils.utils import get_matching_table
from crypta.profile.utils.config import config
from crypta.profile.utils.segment_utils.processors import DayProcessor, LogProcessor
from crypta.profile.utils.segment_utils.builders import RegularSegmentBuilder
from crypta.profile.utils.luigi_utils import ExternalInput
from crypta.profile.runners.segments.lib.coded_segments.data.deep_visits import organizations_categories_to_segment_ids
from crypta.profile.runners.segments.lib.coded_segments.common.organization_visitors import OrganizationCategoryDictionary


def org_visits_deep_external_input(date):
    dirpath = config.ORG_VISITS_DEEP if date < "2022-05-05" else config.ORG_VISITS_DEEP_V2
    return ExternalInput(ypath.ypath_join(dirpath, date))


day_processor_query = """
INSERT INTO `{output_table}` WITH TRUNCATE
SELECT devid, AGGREGATE_LIST(DISTINCT permalink) AS permalinks
FROM `{input_table}`
GROUP BY devid
"""


class DeepVisitsDayProcessor(DayProcessor):
    def requires(self):
        return org_visits_deep_external_input(self.date)

    def process_day(self, inputs, output_path):
        self.yql.query(
            day_processor_query.format(
                input_table=inputs.table,
                output_table=output_path,
            ),
            transaction=self.transaction,
        )


get_segments_query_template = """
$merged = (
    SELECT devid as id, permalink
    FROM (
        SELECT devid, ListUniq(ListFlatMap(AGGREGATE_LIST(permalinks), ($x)->{{RETURN $x;}})) AS permalinks
        FROM (
            SELECT devid, Yson::ConvertToInt64List(permalinks) AS permalinks
            FROM `{merged_visits_table}`
        )
        GROUP BY devid
    )
    FLATTEN BY permalinks AS permalink
);

$crypta_ids = (
    SELECT
        CAST(matching.target_id as Uint64) as crypta_id,
        segment.permalink as permalink
    FROM `{idfa_crypta_id}` as matching
    INNER JOIN $merged as segment
    USING(id)

    UNION ALL

    SELECT
        CAST(matching.target_id as Uint64) as crypta_id,
        segment.permalink as permalink
    FROM `{gaid_crypta_id}` as matching
    INNER JOIN $merged as segment
    USING(id)
);


$categories = (
    SELECT crypta_id, organization_categories.categories_with_parents AS categories
    FROM $crypta_ids AS crypta_ids
    INNER JOIN `{organization_categories}` AS organization_categories
    USING (permalink)
);

$script = @@
organizations_categories_to_segment_ids = {categories_to_segments}
interesting_org_categories = set(organizations_categories_to_segment_ids.keys())

def get_categories(categories):
    categories_intersection = interesting_org_categories.intersection(categories)
    if categories_intersection:
        return [str(category) for category in categories_intersection]
    else:
        return None
@@;

$get_categories = Python2::get_categories(Callable<(List<Int64>?)->List<String>?>, $script);

INSERT INTO `{output_table}` WITH TRUNCATE
SELECT
    CAST(crypta_id AS String) AS id,
    'crypta_id' AS id_type,
    segment_name
FROM (
    SELECT
        crypta_id,
        segment_name
    FROM (
        SELECT
            crypta_id,
            $get_categories(Yson::ConvertToInt64List(categories)) AS segment_names
        FROM $categories
        WHERE $get_categories(Yson::ConvertToInt64List(categories)) IS NOT NULL
    )
    FLATTEN LIST BY segment_names AS segment_name
)
GROUP BY crypta_id, segment_name;
"""


class DeepVisits(RegularSegmentBuilder):
    name_segment_dict = {
        str(category): segment_id for category, segment_id in organizations_categories_to_segment_ids.iteritems()
    }

    keyword = 547
    number_of_days = 365

    def requires(self):
        return {
            'OrgDictionary': OrganizationCategoryDictionary(self.date),
            'OrgVisits': LogProcessor(
                DeepVisitsDayProcessor,
                self.date,
                self.number_of_days,
            ),

        }

    def build_segment(self, inputs, output_path):
        self.yql.query(
            query_string=get_segments_query_template.format(
                merged_visits_table=inputs['OrgVisits'].table,
                organization_categories=inputs['OrgDictionary'].table,
                categories_to_segments=organizations_categories_to_segment_ids,
                idfa_crypta_id=get_matching_table('idfa', 'crypta_id'),
                gaid_crypta_id=get_matching_table('gaid', 'crypta_id'),
                output_table=output_path
            ),
            transaction=self.transaction,
        )
