#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

import luigi

from crypta.profile.utils.config import config
from crypta.profile.utils.luigi_utils import OldNodesByNameCleaner, YtTarget, AttributeExternalInput, BaseYtTask

parse_job_search_query_template = """
$parsed_job_search = (
    SELECT yandexuid,
        `timestamp`,
        url,
        Coalesce(Dsv::Parse(Url::Parse(url).Query, '&'), DictCreate(String, String)) AS url_params,
        CASE
            WHEN Url::GetDomain(host, 3) in ('rabota.yandex.ru', 'ru.jooble.org') THEN Url::GetDomain(host, 3)
            ELSE Url::GetDomain(host, 2)
        END AS host
    FROM `{metrics_parsed_log}`
    WHERE Url::GetDomain(host, 2) in ('hh.ru', 'rabota.ru', 'zarplata.ru', 'superjob.ru', 'joblab.ru',
        'gorodrabot.ru') OR Url::GetDomain(host, 3) in ('rabota.yandex.ru', 'ru.jooble.org')
);

$salaries_from_job_search = (
    SELECT yandexuid,
        `timestamp`,
        host,
        url,
        Yson::Serialize(Yson::FromStringDict(url_params)) AS url_params,
        CAST(url_params['salary'] AS Uint64) AS salary
    FROM $parsed_job_search
    WHERE host == 'hh.ru' AND  CAST(url_params['salary'] AS Uint64) is not Null AND
        (url_params['currency_code'] is Null OR url_params['currency_code'] == 'RUR')
UNION ALL
    SELECT yandexuid,
        `timestamp`,
        host,
        url,
        Yson::Serialize(Yson::FromStringDict(url_params)) AS url_params,
        CAST(url_params['payment_value'] AS Uint64) AS salary
    FROM $parsed_job_search
    WHERE host == 'superjob.ru' AND  CAST(url_params['payment_value'] AS Uint64) is not Null
UNION ALL
    SELECT yandexuid,
        `timestamp`,
        host,
        url,
        Yson::Serialize(Yson::FromStringDict(url_params)) AS url_params,
        CASE
            WHEN CAST(url_params['salary'] AS Uint64) is not Null THEN CAST(url_params['salary'] AS Uint64)
            ELSE CAST(url_params['sf'] AS Uint64)
        END AS salary
    FROM $parsed_job_search
    WHERE host == 'rabota.ru' AND (Cast(url_params['salary'] AS Uint64) is not Null OR
        CAST(url_params['sf'] AS Uint64) is not Null) AND (url_params['cu'] is Null OR url_params['cu'] == '2')
UNION ALL
    SELECT yandexuid,
        `timestamp`,
        host,
        url,
        Yson::Serialize(Yson::FromStringDict(url_params)) AS url_params,
        CAST(url_params['salary_from'] AS Uint64) AS salary
    FROM $parsed_job_search
    WHERE host == 'rabota.yandex.ru' AND CAST(url_params['salary_from'] AS Uint64) is not Null AND
        (url_params['currency'] is Null OR url_params['currency'] == 'RUR')
UNION ALL
    SELECT yandexuid,
        `timestamp`,
        host,
        url,
        Yson::Serialize(Yson::FromStringDict(url_params)) AS url_params,
        CAST(url_params['salary'] AS Uint64) AS salary
    FROM $parsed_job_search
    WHERE host == 'zarplata.ru' AND CAST(url_params['salary'] AS Uint64) is not Null
UNION ALL
    SELECT yandexuid,
        `timestamp`,
        host,
        url,
        Yson::Serialize(Yson::FromStringDict(url_params)) AS url_params,
        CAST(url_params['srzpmin'] AS Uint64) AS salary
    FROM $parsed_job_search
    WHERE host == 'joblab.ru' AND CAST(url_params['srzpmin'] AS Uint64) is not Null
UNION ALL
    SELECT yandexuid,
        `timestamp`,
        host,
        url,
        Yson::Serialize(Yson::FromStringDict(url_params)) AS url_params,
        CAST(url_params['s'] AS Uint64) AS salary
    FROM $parsed_job_search
    WHERE host == 'gorodrabot.ru' AND CAST(url_params['s'] AS Uint64) is not Null
UNION ALL
    SELECT yandexuid,
        `timestamp`,
        host,
        url,
        Yson::Serialize(Yson::FromStringDict(url_params)) AS url_params,
        CAST(url_params['salary'] AS Uint64) AS salary
    FROM $parsed_job_search
    WHERE host == 'ru.jooble.org' AND CAST(url_params['salary'] AS Uint64) is not Null
);

INSERT INTO `{output_table}` WITH TRUNCATE

SELECT yandexuid,
    `timestamp`,
    CAST(DateTime::MakeDate(DateTime::FromSeconds(CAST(`timestamp` AS Uint32))) AS String) AS `date`,
    SOME(host) AS host,
    SOME(url) AS url,
    SOME(url_params) AS url_params,
    SOME(salary) AS salary
FROM $salaries_from_job_search
GROUP BY yandexuid, `timestamp`
HAVING SOME(salary) >= {min_salary} AND SOME(salary) <= {max_salary}
ORDER BY yandexuid, `timestamp`;
"""


class ParseJobSearch(BaseYtTask):
    date = luigi.Parameter()
    juggler_host = config.CRYPTA_ML_JUGGLER_HOST
    task_group = 'import_socdem_data'

    def requires(self):
        return {
            'cleaner': OldNodesByNameCleaner(
                date=self.date,
                folder=config.PARSED_JOB_SEARCH_FOLDER,
                lifetime=config.NUMBER_OF_DAYS_TO_KEEP_PARSED_LOGS,
            ),
            'parsed_metrics': AttributeExternalInput(
                table=os.path.join(
                    config.METRICS_PARSED_DIR,
                    self.date,
                ),
                attribute_name='closed',
                attribute_value=True,
                columns=('yandexuid', 'host', 'url', 'timestamp'),
            ),
        }

    def output(self):
        return YtTarget(os.path.join(config.PARSED_JOB_SEARCH_FOLDER, self.date))

    def run(self):
        with self.yt.Transaction() as transaction:
            self.yql.query(
                query_string=parse_job_search_query_template.format(
                    metrics_parsed_log=self.input()['parsed_metrics'].table,
                    output_table=self.output().table,
                    min_salary=4000,
                    max_salary=400000,
                ),
                transaction=transaction,
            )
