#!/usr/bin/env python
# -*- coding: utf-8 -*-

from cached_property import cached_property

from crypta.lib.python.custom_ml.classification import (
    CustomClassificationModelTrainHelper,
    CustomClassificationParams,
)
from crypta.lib.python.custom_ml.training_config import MAX_SAMPLE_SIZE
from crypta.lib.python.custom_ml.tools.training_utils import get_model_tag
from crypta.profile.runners.segments.lib.coded_segments.ml_tools.utils import resources
from crypta.profile.utils.config import config
from crypta.profile.utils.utils import revert_dict


make_market_rfm_train_sample_query_template = """
$union_indevice = (
    SELECT
        indevice_matching.yandexuid AS yandexuid,
        CASE
            WHEN market.target == 1 THEN 'positive'
            ELSE 'negative'
        END AS segment_name,
        String::SplitToList(market.last_blue_order_time, ' ')[0] AS retro_date
    FROM `{input_table}` AS market
    INNER JOIN `{indevice_yandexuid_table}` AS indevice_matching
    ON market.email_md5 == indevice_matching.id
    WHERE indevice_matching.id_type == 'email_md5'
UNION ALL
    SELECT
        indevice_matching.yandexuid AS yandexuid,
        CASE
            WHEN market.target == 1 THEN 'positive'
            ELSE 'negative'
        END AS segment_name,
        String::SplitToList(market.last_blue_order_time, ' ')[0] AS retro_date
    FROM `{input_table}` AS market
    INNER JOIN `{indevice_yandexuid_table}` AS indevice_matching
    ON market.phone_md5 == indevice_matching.id
    WHERE indevice_matching.id_type == 'phone_md5'
);

$sample_by_puid = (
    SELECT
        indevice_matching.id AS id,
        indevice_matching.id_type AS id_type,
        MAX_BY(market.segment_name, market.retro_date) AS segment_name,
        MAX(market.retro_date) AS retro_date
    FROM $union_indevice AS market
    INNER JOIN `{indevice_yandexuid_table}` AS indevice_matching
    USING(yandexuid)
    WHERE indevice_matching.id_type == 'puid'
    GROUP BY indevice_matching.id, indevice_matching.id_type
);

$train_sample = (
    SELECT
        train_sample.segment_name AS segment_name,
        indevice_yandexuid.yandexuid AS yandexuid,
        COALESCE(CAST(user_data.CryptaID AS Uint64), RandomNumber(user_data.CryptaID)) AS crypta_id,
        user_data.Vectors AS Vectors,
        user_data.Attributes AS Attributes,
        user_data.Segments AS Segments
    FROM $sample_by_puid AS train_sample
    INNER JOIN `{indevice_yandexuid_table}` AS indevice_yandexuid
    ON train_sample.id == indevice_yandexuid.id AND train_sample.id_type == indevice_yandexuid.id_type
    INNER JOIN `{user_data_table}` VIEW raw AS user_data
    ON indevice_yandexuid.yandexuid == CAST(user_data.yuid AS Uint64)
);

$train_sample = (
   SELECT DISTINCT
        yandexuid,
        segment_name,
        crypta_id,
        Vectors,
        Attributes,
        Segments
    FROM $train_sample
);

$yuids_cnt = (
    SELECT COUNT(*)
    FROM $train_sample
);

INSERT INTO `{output_table}`
WITH TRUNCATE

SELECT *
FROM $train_sample
TABLESAMPLE BERNOULLI(100. * {max_sample_size} / $yuids_cnt)
ORDER BY yandexuid;
"""


class MarketRfmModelTrainHelper(CustomClassificationModelTrainHelper):
    def __init__(self, yt, logger, date, storage_sample_path, train_sample_path):
        super(MarketRfmModelTrainHelper, self).__init__(yt=yt, logger=logger, date=date)
        self.storage_sample_path = storage_sample_path
        self.train_sample_path = train_sample_path
        self.model_name = 'market_rfm'

    @cached_property
    def model_params(self):
        resource_type = resources[self.model_name]

        return CustomClassificationParams(
            train_sample_path=self.train_sample_path,
            resource_type=resource_type,
            model_tag=get_model_tag(resource_type),
            metrics_group_name=self.get_metrics_group_name(self.model_name),
            model_description_in_sandbox='Model for Market RFM segments classification',
            segment_id_to_name=self.segment_id_to_name,
            segment_name_to_id=revert_dict(self.segment_id_to_name),
            ordered_thresholds=None,
            make_train_sample_query=make_market_rfm_train_sample_query_template.format(
                input_table=self.storage_sample_path,
                indevice_yandexuid_table=config.INDEVICE_YANDEXUID,
                user_data_table=config.USER_DATA_TABLE,
                output_table=self.train_sample_path,
                max_sample_size=MAX_SAMPLE_SIZE,
            ),
        )
