#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

import luigi

from crypta.profile.runners.matching.lib.income.merge_auto_ru import MergeAutoRu
from crypta.profile.runners.matching.lib.income.merge_job_search import MergeJobSearch
from crypta.profile.utils.config import config
from crypta.profile.utils.luigi_utils import (
    BaseYtTask,
    ExternalInput,
    YtDailyRewritableTarget,
)

prepare_income_query_template = """
$parse_date = DateTime::Parse('%Y-%m-%d');
$date_to_seconds = ($update_time) -> {{
    RETURN CAST(DateTime::ToSeconds(DateTime::MakeDatetime($parse_date($update_time))) AS Uint64)
}};

$percentile_A = {percentile_A};
$percentile_B1 = {percentile_B1};
$percentile_B2 = {percentile_B2};
$percentile_C1 = {percentile_C1};

$income_segment = ($salary) -> {{
    RETURN CASE
        WHEN $salary <= $percentile_A THEN 'A'
        WHEN $salary > $percentile_A AND $salary <= $percentile_B1 THEN 'B1'
        WHEN $salary > $percentile_B1 AND $salary <= $percentile_B2 THEN 'B2'
        WHEN $salary > $percentile_B2 AND $salary <= $percentile_C1 THEN 'C1'
        ELSE 'C2'
    END
}};

INSERT INTO `{auto_ru_output}` WITH TRUNCATE

SELECT
    puid AS id,
    'puid' AS id_type,
    $date_to_seconds(update_time) AS update_time,
    income_segment,
    CASE
        WHEN weight > 1 THEN 'auto_ru_for_training'
        ELSE 'auto_ru_for_voting'
    END AS source
FROM `{merged_auto_ru_table}`
ORDER BY id, id_type;

INSERT INTO `{job_search_output}` WITH TRUNCATE

SELECT
    puid AS id,
    'puid' AS id_type,
    $date_to_seconds(update_time) AS update_time,
    income_segment,
    CASE
        WHEN weight > 1 THEN 'job_search_for_training'
        ELSE 'job_search_for_voting'
    END AS source
FROM `{merged_job_search_table}`
ORDER BY id, id_type;

INSERT INTO `{beeline_output}` WITH TRUNCATE

SELECT
    puid AS id,
    'puid' AS id_type,
    $date_to_seconds(update_time) AS update_time,
    income_5_segment AS income_segment,
    CASE
        WHEN Digest::MurMurHash(puid) % 10 IN (2, 3, 4, 5, 6) THEN 'beeline_for_training'
        ELSE 'beeline_for_voting'
    END AS source
FROM `{beeline_table}`
ORDER BY id, id_type;

INSERT INTO `{vtb_output}` WITH TRUNCATE

SELECT
    puid AS id,
    'puid' AS id_type,
    $date_to_seconds(update_time) AS update_time,
    income_segment AS income_segment,
    'vtb' AS source
FROM `{vtb_table}`
ORDER BY id, id_type;

INSERT INTO `{delta_credit_output}` WITH TRUNCATE

SELECT
    puid AS id,
    'puid' AS id_type,
    $date_to_seconds(update_time) AS update_time,
    $income_segment(salary) AS income_segment,
    'delta_credit' AS source
FROM `{delta_credit_table}`
ORDER BY id, id_type;

INSERT INTO `{tcs_output}` WITH TRUNCATE

SELECT
    puid AS id,
    'puid' AS id_type,
    $date_to_seconds(update_time) AS update_time,
    $income_segment(salary) AS income_segment,
    'tcs' AS source
FROM `{tcs_table}`
ORDER BY id, id_type;

INSERT INTO `{rosbank_output}` WITH TRUNCATE

SELECT
    puid AS id,
    'puid' AS id_type,
    $date_to_seconds(update_time) AS update_time,
    $income_segment(salary) AS income_segment,
    'rosbank' AS source
FROM `{rosbank_table}`
ORDER BY id, id_type;

INSERT INTO `{migcredit_output}` WITH TRUNCATE

SELECT
    puid AS id,
    'puid' AS id_type,
    $date_to_seconds(update_time) AS update_time,
    $income_segment(salary) AS income_segment,
    'migcredit' AS source
FROM `{migcredit_table}`
ORDER BY id, id_type;

INSERT INTO `{hcb_output}` WITH TRUNCATE

SELECT
    puid AS id,
    'puid' AS id_type,
    $date_to_seconds(update_time) AS update_time,
    CASE
        WHEN salary_backet in (1, 2) THEN 'A'
        WHEN salary_backet in (3, 4, 5) THEN 'B1'
        WHEN salary_backet == 6 THEN 'B2'
        WHEN salary_backet in (7, 8, 9, 10, 11, 12) THEN 'C1'
        ELSE 'C2'
    END AS income_segment,
    'hcb' AS source
FROM `{hcb_table}`
ORDER BY id, id_type;
"""


class PrepareIncomeForSocdemStorage(BaseYtTask):
    date = luigi.Parameter()
    juggler_host = config.CRYPTA_ML_JUGGLER_HOST
    task_group = 'import_socdem_data'

    def requires(self):
        return {
            'merged_job_search': MergeJobSearch(self.date),
            'merged_auto_ru': MergeAutoRu(self.date),
            'beeline_sample': ExternalInput(os.path.join(config.TELECOMS_FOLDER, 'beeline_train')),
            'vtb_sample': ExternalInput(os.path.join(config.SCORING_FOLDER, 'vtb_2021-08-26')),
            'delta_credit_sample': ExternalInput(os.path.join(config.SCORING_FOLDER, 'delta_credit_2019-02-01_train')),
            'tcs_sample': ExternalInput(os.path.join(config.SCORING_FOLDER, 'tcs_2017-06-01')),
            'hcb_sample': ExternalInput(os.path.join(config.SCORING_FOLDER, 'hcb_2019-03-01')),
            'rosbank_sample': ExternalInput(os.path.join(config.SCORING_FOLDER, 'rosbank_2019-06-01')),
            'migcredit_sample': ExternalInput(os.path.join(config.SCORING_FOLDER, 'migcredit_2019-05-01')),
        }

    def output(self):
        return {
            'job_search': YtDailyRewritableTarget(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'puid', 'job_search'),
                date=self.date,
            ),
            'auto_ru': YtDailyRewritableTarget(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'puid', 'auto_ru'),
                date=self.date,
            ),
            'beeline': YtDailyRewritableTarget(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'puid', 'beeline'),
                date=self.date,
            ),
            'vtb': YtDailyRewritableTarget(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'puid', 'vtb'),
                date=self.date,
            ),
            'delta_credit': YtDailyRewritableTarget(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'puid', 'delta_credit'),
                date=self.date,
            ),
            'tcs': YtDailyRewritableTarget(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'puid', 'tcs'),
                date=self.date,
            ),
            'hcb': YtDailyRewritableTarget(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'puid', 'hcb'),
                date=self.date,
            ),
            'rosbank': YtDailyRewritableTarget(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'puid', 'rosbank'),
                date=self.date,
            ),
            'migcredit': YtDailyRewritableTarget(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'puid', 'migcredit'),
                date=self.date,
            ),
        }

    def run(self):
        with self.yt.Transaction() as transaction:
            income_thresholds = next(self.yt.read_table(self.input()['merged_job_search']['thresholds'].table))

            self.yql.query(
                query_string=prepare_income_query_template.format(
                    percentile_A=int(income_thresholds['A_upper_threshold']),
                    percentile_B1=int(income_thresholds['B1_upper_threshold']),
                    percentile_B2=int(income_thresholds['B2_upper_threshold']),
                    percentile_C1=int(income_thresholds['C1_upper_threshold']),
                    merged_job_search_table=self.input()['merged_job_search']['merged_job_search'].table,
                    merged_auto_ru_table=self.input()['merged_auto_ru'].table,
                    beeline_table=self.input()['beeline_sample'].table,
                    vtb_table=self.input()['vtb_sample'].table,
                    delta_credit_table=self.input()['delta_credit_sample'].table,
                    tcs_table=self.input()['tcs_sample'].table,
                    hcb_table=self.input()['hcb_sample'].table,
                    rosbank_table=self.input()['rosbank_sample'].table,
                    migcredit_table=self.input()['migcredit_sample'].table,
                    job_search_output=self.output()['job_search'].table,
                    auto_ru_output=self.output()['auto_ru'].table,
                    beeline_output=self.output()['beeline'].table,
                    vtb_output=self.output()['vtb'].table,
                    delta_credit_output=self.output()['delta_credit'].table,
                    tcs_output=self.output()['tcs'].table,
                    hcb_output=self.output()['hcb'].table,
                    rosbank_output=self.output()['rosbank'].table,
                    migcredit_output=self.output()['migcredit'].table,
                ),
                transaction=transaction,
            )

            for _, target in self.output().iteritems():
                self.yt.set_attribute(
                    target.table,
                    'generate_date',
                    self.date,
                )
