#!/usr/bin/env python
# -*- coding: utf-8 -*-

from datetime import timedelta
import os

from crypta.lib.python import templater
from crypta.lib.python.yt import yt_helpers
from crypta.profile.lib import date_helpers
from crypta.profile.tasks.monitoring.__base__ import Monitoring
from crypta.profile.utils.config import config
from crypta.profile.utils.luigi_utils import (
    ExternalInput,
    YtTarget,
)


process_chevent_log_query_template = """
PRAGMA AnsiInForEmptyOrNullableItemsCollections;

$country = ($region_id) -> {{RETURN Geo::RoundRegionById(CAST($region_id AS Int32), "country").en_name}};

$chevent_gender = ($gender) -> {{
RETURN CASE
    WHEN $gender == 0 THEN 'm'
    WHEN $gender == 1 THEN 'f'
    ELSE 'unknown'
END
}};

$chevent_age = ($age) -> {{
RETURN CASE
    WHEN $age == 0 THEN '0-17'
    WHEN $age == 1 THEN '18-24'
    WHEN $age == 2 THEN '25-34'
    WHEN $age == 3 THEN '35-44'
    WHEN $age == 5 THEN '45-54'
    WHEN $age == 6 THEN '55-99'
    ELSE 'unknown'
END
}};

$chevent_income = ($income) -> {{
RETURN CASE
    WHEN $income == 0 THEN 'A'
    WHEN $income == 1 THEN 'B1'
    WHEN $income == 2 THEN 'B2'
    WHEN $income == 3 THEN 'C1'
    WHEN $income == 4 THEN 'C2'
    ELSE 'unknown'
END
}};

$adfox_gender = ($gender) -> {{
RETURN CASE
    WHEN $gender == 1 THEN 'm'
    WHEN $gender == 2 THEN 'f'
    ELSE 'unknown'
END
}};

$adfox_age = ($age) -> {{
RETURN CASE
    WHEN $age == 1 THEN '0-17'
    WHEN $age == 2 THEN '18-24'
    WHEN $age == 3 THEN '25-34'
    WHEN $age == 4 THEN '35-44'
    WHEN $age == 6 THEN '45-54'
    WHEN $age == 7 THEN '55-99'
    ELSE 'unknown'
END
}};

$adfox_income = ($income) -> {{
RETURN CASE
    WHEN $income == 1 THEN 'A'
    WHEN $income == 2 THEN 'B'
    WHEN $income == 3 THEN 'C'
    ELSE 'unknown'
END
}};

$chevent_hits = (
    SELECT
        uniqid,
        uniqidsource,
        eventtime,
        $chevent_gender(gender) AS gender,
        $chevent_age(age) AS age,
        $chevent_income(income) AS income,
        CASE
            WHEN `options_flat-page` == True THEN 'rsya'
            ELSE 'search'
        END AS rsya_or_search,
        CASE
            WHEN devicetype < 4 THEN 'phone'
            WHEN devicetype == 4 THEN 'tablet'
            ELSE 'desk'
        END AS device_type,
        CASE
            WHEN $country(regionid) == 'Russia' THEN 'Russia'
            WHEN $country(regionid) == 'Kazakhstan' THEN 'Kazakhstan'
            WHEN $country(regionid) == 'Turkey' THEN 'Turkey'
            WHEN $country(regionid) == 'Belarus' THEN 'Belarus'
            WHEN $country(regionid) == 'Ukraine' THEN 'Ukraine'
            ELSE 'others'
        END AS country,
        CASE
            WHEN `impressionoptions_is-ssp` THEN 'ssp'
            WHEN `impressionoptions_in-app` THEN 'sdk'
            WHEN devicetype < 4 THEN 'mobile_web'
            ELSE 'desktop_web'
        END AS traffic_qualifier,
        options like '%itp%' AS is_itp,
    FROM `{chevent_cooked_log}`
    WHERE fraudbits == 0 AND placeid in (542, 1542)
);

$with_offline_socdem = (
    SELECT
        yandexuid,
        TableName() AS `date`,
        Yson::ConvertToStringDict(exact_socdem)['gender'] AS gender,
        Yson::ConvertToStringDict(exact_socdem)['age_segment'] AS age,
        Yson::ConvertToStringDict(exact_socdem)['income_5_segment'] AS income
    FROM RANGE(`{profiles}`, `{start_date}`, `{end_date}`)
);

$with_offline_socdem = (
    SELECT
        yandexuid,
        MAX_BY(gender, `date`) AS gender,
        MAX_BY(age, `date`) AS age,
        MAX_BY(income, `date`) AS income
    FROM $with_offline_socdem
    GROUP BY yandexuid
);

$hits_with_offline_data = (
    SELECT
        chevent.*,
        offline_socdem.gender AS offline_gender,
        offline_socdem.age AS offline_age,
        offline_socdem.income AS offline_income,
    FROM $chevent_hits AS chevent
    LEFT JOIN $with_offline_socdem AS offline_socdem
    ON chevent.uniqid == offline_socdem.yandexuid
);

$chevent_users = (
    SELECT
        uniqid,
        uniqidsource,
        rsya_or_search,
        MAX_BY(age, eventtime) AS age,
        MAX_BY(gender, eventtime) AS gender,
        MAX_BY(income, eventtime) AS income,
        SOME(offline_age) AS offline_age,
        SOME(offline_gender) AS offline_gender,
        SOME(offline_income) AS offline_income,
        MAX_BY(country, eventtime) AS country,
        MAX_BY(device_type, eventtime) AS device_type,
        MAX_BY(traffic_qualifier, eventtime) AS traffic_qualifier,
        MAX_BY(is_itp, eventtime) AS is_itp,
    FROM $hits_with_offline_data
    GROUP BY uniqid, uniqidsource, rsya_or_search
);

$adfox_hits = (
SELECT
    $adfox_gender(krypta_sex_value) AS gender,
    $adfox_age(krypta_age_value) AS age,
    $adfox_income(krypta_revenue_value) AS income,
    UserAgent::Parse(useragent).ITP AS is_itp,
    eff_uid,
    `timestamp`,
    CASE
        WHEN ya_device_type == 1 THEN 'desk'
        WHEN ya_device_type == 2 THEN 'phone'
        WHEN ya_device_type == 2 THEN 'tablet'
        ELSE 'other'
    END AS device_type,
    CASE
        WHEN $country(ya_geo_id) == 'Russia' THEN 'Russia'
        WHEN $country(ya_geo_id) == 'Kazakhstan' THEN 'Kazakhstan'
        WHEN $country(ya_geo_id) == 'Turkey' THEN 'Turkey'
        WHEN $country(ya_geo_id) == 'Belarus' THEN 'Belarus'
        WHEN $country(ya_geo_id) == 'Ukraine' THEN 'Ukraine'
        ELSE 'others'
    END AS country
    FROM `{adfox_log}`
    WHERE load=1
);

$adfox_users = (
    SELECT
        eff_uid,
        MAX_BY(gender, `timestamp`) AS gender,
        MAX_BY(age, `timestamp`) AS age,
        MAX_BY(income, `timestamp`) AS income,
        MAX_BY(country, `timestamp`) AS country,
        MAX_BY(device_type, `timestamp`) AS device_type,
        MAX_BY(is_itp, `timestamp`) AS is_itp
    FROM $adfox_hits
    GROUP BY eff_uid
);

$stats = (
    SELECT
        'hits' AS stats_type,
        rsya_or_search,
        age,
        gender,
        income,
        country,
        device_type,
        traffic_qualifier,
        is_itp,
        COUNT(*) AS `count`
    FROM $chevent_hits
    GROUP BY rsya_or_search, age, gender, income, country, device_type, traffic_qualifier, is_itp
UNION ALL
    SELECT
        'users' AS stats_type,
        rsya_or_search,
        age,
        gender,
        income,
        country,
        device_type,
        traffic_qualifier,
        is_itp,
        COUNT(*) AS `count`
    FROM $chevent_users
    GROUP BY rsya_or_search, age, gender, income, country, device_type, traffic_qualifier, is_itp
UNION ALL
    SELECT
        'hits' AS stats_type,
        'adfox' AS rsya_or_search,
        age,
        gender,
        income,
        country,
        device_type,
        'adfox' AS traffic_qualifier,
        is_itp,
        COUNT(*) AS `count`
    FROM $adfox_hits
    GROUP BY age, gender, income, country, device_type, is_itp
UNION ALL
    SELECT
        'users' AS stats_type,
        'adfox' AS rsya_or_search,
        age,
        gender,
        income,
        country,
        device_type,
        'adfox' AS traffic_qualifier,
        is_itp,
        COUNT(*) AS `count`
    FROM $adfox_users
    GROUP BY age, gender, income, country, device_type, is_itp
);

INSERT INTO `{stats_table}`
WITH TRUNCATE

SELECT
    stats.*,
    '{date}' AS `date`
FROM $stats AS stats
ORDER BY `date`, stats_type, rsya_or_search, country, device_type, traffic_qualifier, is_itp;

$realtime_stats = (
    SELECT
        'hits' AS stats_type,
        'gender' AS socdem_type,
        gender AS socdem_group,
        COUNT(*) AS `count`,
    FROM $hits_with_offline_data
    WHERE uniqidsource == 'yandex-uid' AND offline_gender is Null
    GROUP BY gender
UNION ALL
    SELECT
        'hits' AS stats_type,
        'age' AS socdem_type,
        age AS socdem_group,
        COUNT(*) AS `count`,
    FROM $hits_with_offline_data
    WHERE uniqidsource == 'yandex-uid' AND offline_age is Null
    GROUP BY age
UNION ALL
    SELECT
        'hits' AS stats_type,
        'income' AS socdem_type,
        income AS socdem_group,
        COUNT(*) AS `count`,
    FROM $hits_with_offline_data
    WHERE uniqidsource == 'yandex-uid' AND offline_income is Null
    GROUP BY income
UNION ALL
    SELECT
        'users' AS stats_type,
        'gender' AS socdem_type,
        gender AS socdem_group,
        COUNT(*) AS `count`,
    FROM $chevent_users
    WHERE uniqidsource == 'yandex-uid' AND offline_gender is Null
    GROUP BY gender
UNION ALL
    SELECT
        'users' AS stats_type,
        'age' AS socdem_type,
        age AS socdem_group,
        COUNT(*) AS `count`,
    FROM $chevent_users
    WHERE uniqidsource == 'yandex-uid' AND offline_age is Null
    GROUP BY age
UNION ALL
    SELECT
        'users' AS stats_type,
        'income' AS socdem_type,
        income AS socdem_group,
        COUNT(*) AS `count`,
    FROM $chevent_users
    WHERE uniqidsource == 'yandex-uid' AND offline_income is Null
    GROUP BY income
);

INSERT INTO `{realtime_stats_table}`
WITH TRUNCATE

SELECT
    stats.*,
    '{date}' AS `date`
FROM $realtime_stats AS stats
ORDER BY `date`, stats_type, socdem_type, socdem_group;
"""


get_aggregate_table_template = """
$updated_aggregate_table = (
    SELECT *
    FROM range(`{{stats_dir}}`, `{{last_date_in_agg_table}}`, `{{end_agg_date}}`)
{% if is_aggregate_table_exist %}
UNION ALL
    SELECT *
    FROM `{{aggregate_table}}`
    WHERE date >= '{{start_agg_date}}'
{% endif %}
);

INSERT INTO `{{aggregate_table}}`
WITH TRUNCATE

SELECT *
FROM $updated_aggregate_table
ORDER BY date
"""


class CheventLogMonitoring(Monitoring):
    name = 'chevent_log'

    def requires(self):
        return {
            'CheventCookedLog': ExternalInput(os.path.join(config.BS_CHEVENT_COOKED_LOG, self.date)),
            'AdfoxEventLog': ExternalInput(os.path.join(config.ADFOX_EVENT_LOG, self.date)),
        }

    def output(self):
        return {
            'socdem_stats': YtTarget(os.path.join(
                config.CHEVENT_LOG_STATS_DIRECTORY, 'datalens_socdem_stats', self.date,
            )),
            'realtime_socdem_stats': YtTarget(os.path.join(
                config.CHEVENT_LOG_STATS_DIRECTORY, 'datalens_realtime_socdem_stats', self.date,
            )),
            'aggregated_socdem_stats': YtTarget(os.path.join(
                config.CHEVENT_LOG_STATS_DIRECTORY, 'datalens_aggregated', 'socdem_stats',
            )),
            'aggregated_realtime_socdem_stats': YtTarget(os.path.join(
                config.CHEVENT_LOG_STATS_DIRECTORY, 'datalens_aggregated', 'realtime_socdem_stats',
            )),
        }

    def run(self):
        end_date = date_helpers.get_yesterday(self.date)
        start_date = date_helpers.get_date_from_past(end_date, config.NUMBER_OF_INTERMEDIATE_PROFILES_TABLES_TO_KEEP)

        with self.yt.Transaction() as transaction:
            self.yql.query(
                query_string=process_chevent_log_query_template.format(
                    chevent_cooked_log=self.input()['CheventCookedLog'].table,
                    adfox_log=self.input()['AdfoxEventLog'].table,
                    date=self.date,
                    start_date=start_date,
                    end_date=end_date,
                    profiles=config.YANDEXUID_DAILY_EXPORT_DIRECTORY,
                    stats_table=self.output()['socdem_stats'].table,
                    realtime_stats_table=self.output()['realtime_socdem_stats'].table,
                ),
                transaction=transaction,
            )

            for stats in ['socdem_stats', 'realtime_socdem_stats']:
                yt_helpers.set_ttl(
                    table=self.output()[stats].table,
                    ttl_timedelta=timedelta(days=365),
                    yt_client=self.yt,
                )

            for stats_dir_name, aggregated_stats in zip(
                ['datalens_socdem_stats', 'datalens_realtime_socdem_stats'],
                ['aggregated_socdem_stats', 'aggregated_realtime_socdem_stats'],
            ):
                stats_dir = os.path.join(config.CHEVENT_LOG_STATS_DIRECTORY, stats_dir_name)
                end_agg_date = self.date
                start_agg_date = date_helpers.get_date_from_past(
                    end_agg_date,
                    months=config.CHEVENT_LOG_AGGREGATION_MONTH_COUNT,
                )
                last_date_in_agg_table = self.yt.get_attribute(
                    path=self.output()[aggregated_stats].table,
                    attribute='max_date',
                    default=start_agg_date,
                )

                if last_date_in_agg_table < self.date:
                    self.yql.query(
                        query_string=templater.render_template(
                            get_aggregate_table_template,
                            vars={
                                'stats_dir': stats_dir,
                                'aggregate_table': self.output()[aggregated_stats].table,
                                'is_aggregate_table_exist': self.yt.exists(self.output()[aggregated_stats].table),
                                'start_agg_date': start_agg_date,
                                'last_date_in_agg_table': date_helpers.get_date_from_future(last_date_in_agg_table, days=1),
                                'end_agg_date': end_agg_date,
                            },
                        ),
                        transaction=transaction,
                        title='YQL get_aggregate_table_template for {}'.format(stats_dir),
                    )
                    self.yt.set_attribute(
                        path=self.output()[aggregated_stats].table,
                        attribute='max_date',
                        value=self.date,
                    )
                else:
                    self.logger.info('Stats table is already up to date')
