#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os

from crypta.lib.python import templater
from crypta.profile.lib import date_helpers
from crypta.profile.utils import (
    luigi_utils,
    utils,
)
from crypta.profile.utils.config import config
from crypta.profile.utils.segment_utils import (
    builders,
    processors,
)

CHECKOUT_VISITS_QUERY = """
$ozon_checkout = "%ozon.ru/gocheckout%";
$wb_checkout = "%wildberries.ru/lk/basket%";

$raw_counts = (
    SELECT
        yandexuid,
        TableName() AS `date`,
        IF(url LIKE $ozon_checkout, 1, 0) AS ozon,
        IF(url LIKE $wb_checkout, 1, 0) AS wildberries
    FROM `{{input_table}}`
    WHERE url LIKE $ozon_checkout OR url LIKE $wb_checkout
);

INSERT INTO `{{output_table}}` WITH TRUNCATE
SELECT
    yandexuid,
    `date`,
    MAX(ozon) AS ozon,
    MAX(wildberries) AS wildberries
FROM $raw_counts
GROUP BY yandexuid, `date`;
"""

PICK_UP_POINTS_VISITS = u"""
$wb_name = '%wildberries%';
$ozon_name = '%ozon%';

$pick_up_points_categories = AsSet(
    31652, --Пункт выдачи
    31643, --Почтовый терминал
    30306, --Интернет-магазин
);
$permalinks = (
    SELECT
        permalink,
        CASE
            WHEN String::ToLower(name_ru) LIKE $wb_name THEN 'wildberries'
            ELSE 'ozon'
        END AS name
    FROM `{{organization_categories}}`
    WHERE
        (
            String::ToLower(name_ru) LIKE $wb_name
            OR String::ToLower(name_ru) LIKE $ozon_name
        )
        AND DictLength(SetIntersection(ToSet(Yson::ConvertToUint64List(categories)), $pick_up_points_categories)) != 0
    ORDER BY permalink
);

{% for marketplace in ("ozon", "wildberries") %}
${{marketplace}}_permalinks = (
    SELECT permalink
    FROM $permalinks
    WHERE name == '{{marketplace}}'
);
{% endfor %}

$merged = (
    SELECT
        devid,
        `date`,
        permalink,
    FROM (
        SELECT
            devid,
            `date`,
            ListUniq(ListFlatMap(AGGREGATE_LIST(permalinks), ($x)->{RETURN $x;})) AS permalinks,
        FROM (
            SELECT
                devid,
                permalinks,
                TableName() AS `date`
            FROM `{{deep_visits}}`
        )
        GROUP BY devid, `date`
    )
    FLATTEN BY permalinks AS permalink
);

$filtered = (
    SELECT
        devid,
        `date`,
        IF(permalink in $ozon_permalinks, 1, 0) AS ozon,
        IF(permalink in $wildberries_permalinks, 1, 0) AS wildberries,
    FROM $merged
    WHERE permalink in $ozon_permalinks OR permalink in $wildberries_permalinks
);

INSERT INTO `{{output_table}}` WITH TRUNCATE
SELECT
    devid,
    `date`,
    MAX(ozon) AS ozon,
    MAX(wildberries) AS wildberries,
FROM $filtered
GROUP BY devid, `date`
"""

BUILD_SEGMENT_QUERY = u"""
--convert all ids to crypta_id
$visits_crypta_ids = (
{% for id_type in ("idfa", "gaid") %}
    SELECT
        matching.target_id AS crypta_id,
        segment.`date` AS `date`,
        segment.ozon AS ozon,
        segment.wildberries AS wildberries,
    {% if id_type == "idfa" %}
    FROM `{{idfa_crypta_id}}` AS matching
    {% else %}
    FROM `{{gaid_crypta_id}}` AS matching
    {% endif %}
    INNER JOIN `{{visits}}` AS segment
    ON matching.id == segment.devid
    {{"\nUNION ALL\n" if not loop.last}}
{% endfor %}
);

{% for source_type in ("bar", "metrics") %}
${{source_type}}_crypta_ids = (
    SELECT
        matching.target_id AS crypta_id,
        segment.ozon AS ozon,
        segment.wildberries AS wildberries,
        segment.`date` AS `date`,
    FROM `{{yandexuid_crypta_id}}` AS matching
    {% if source_type == "bar" %}
    INNER JOIN `{{bar}}` AS segment
    {% else %}
    INNER JOIN `{{metrics}}` AS segment
    {% endif %}
    ON matching.id == CAST(segment.yandexuid AS String)
);

{% endfor %}

--count actions
{% for source_type in ("bar", "metrics", "visits") %}
${{source_type}}_count = (
    SELECT
        crypta_id,
        CAST(SUM(ozon) AS Double) AS ozon_visits_count,
        CAST(SUM(wildberries) AS Double) AS wildberries_visits_count,
    FROM (
        SELECT
            crypta_id,
            MAX(ozon) AS ozon,
            MAX(wildberries) AS wildberries,
        FROM ${{source_type}}_crypta_ids
        GROUP BY crypta_id, `date`
    )
    GROUP BY crypta_id
);

{% endfor %}

$market_count = (
    SELECT
        CAST(CryptaId AS String) AS crypta_id,
        SUM(ItemUnitPriceRub * ItemQuantity) AS cost
    FROM `{{market}}`
    WHERE CryptaId IS NOT NULL
        AND `Timestamp` >= {{start_timestamp}}
    GROUP BY CryptaId
);

--count percentiles
{% for source_type in ("bar", "metrics", "visits") %}
    {% for marketplace in ("ozon", "wildberries") %}
        {% for percent in ("95", "50") -%}
        ${{marketplace}}_{{source_type}}_{{percent}}_percentile = (
            SELECT
                PERCENTILE({{marketplace}}_visits_count, 0.{{percent}}),
            FROM ${{source_type}}_count
);

        {% endfor %}
    {% endfor %}
{% endfor %}

{% for percent in ("95", "50") %}
$market_{{percent}}_percentile = (
            SELECT
                PERCENTILE(cost, 0.{{percent}})
            FROM $market_count
);

{% endfor %}

--filter according to percentiles
INSERT INTO `{{output_table}}` WITH TRUNCATE
SELECT
    id,
    'crypta_id' AS id_type,
    segment_name,
FROM (
{% for source_type in ("bar", "metrics", "visits") %}
    {% for marketplace in ("ozon", "wildberries") %}
        {% for percent in ("95", "50") %}
        SELECT
            crypta_id AS id,
            '{{marketplace}}_{{percent}}' AS segment_name,
        FROM ${{source_type}}_count
        WHERE {{marketplace}}_visits_count >= ${{marketplace}}_{{source_type}}_{{percent}}_percentile
            {% if percent == "50" %}
            AND {{marketplace}}_visits_count < ${{marketplace}}_{{source_type}}_95_percentile
            {% else %}
            {% endif %}
            {{"\nUNION ALL\n" if not loop.last}}
        {%- endfor -%}
        {{"\nUNION ALL\n" if not loop.last}}
    {%- endfor -%}
    {{"\nUNION ALL\n"}}
{%- endfor -%}

{% for percent in ("95", "50") %}
        SELECT
            crypta_id AS id,
            'market_{{percent}}' AS segment_name,
        FROM $market_count
        WHERE cost >= $market_{{percent}}_percentile
            {% if percent == "50" %}
            AND cost < $market_95_percentile
            {% else %}
            {% endif %}
{{"\nUNION ALL\n" if not loop.last}}
{% endfor %}
)
GROUP BY id, segment_name;
"""


class MarketplacesLtvUsersBaseDayProcessor(processors.DayProcessor):
    def process_day(self, inputs, output_path):
        query = templater.render_template(
            CHECKOUT_VISITS_QUERY,
            vars={
                'input_table': inputs.table,
                'output_table': output_path,
            },
            strict=True,
        )
        self.yql.query(
            query,
            transaction=self.transaction,
        )


class ProcessedMetricsForMarketplacesLtvUsers(MarketplacesLtvUsersBaseDayProcessor):
    def requires(self):
        return luigi_utils.ExternalInput(os.path.join(config.METRICS_PARSED_DIR, self.date))


class ProcessedBarForMarketplacesLtvUsers(MarketplacesLtvUsersBaseDayProcessor):
    def requires(self):
        return luigi_utils.ExternalInput(os.path.join(config.BAR_PARSED_DIR, self.date))


class ProcessedVisitsMarketplacesLtvUsers(processors.DayProcessor):
    def requires(self):
        return {
            'organization_categories': luigi_utils.ExternalInput(config.ORGANIZATION_CATEGORIES),
            'deep_visits': luigi_utils.ExternalInput(os.path.join(config.DAY_PROCESSORS_OUTPUT_FOLDER, 'DeepVisitsDayProcessor', self.date)),
        }

    def process_day(self, inputs, output_path):
        query = templater.render_template(
            PICK_UP_POINTS_VISITS,
            vars={
                'organization_categories': inputs['organization_categories'].table,
                'deep_visits': inputs['deep_visits'].table,
                'output_table': output_path,
            },
            strict=True,
        )

        self.yql.query(
            query,
            transaction=self.transaction,
        )


class MarketplacesLtvUsers(builders.RegularSegmentBuilder):

    name_segment_dict = {
        'ozon_95': (549, 2233),
        'wildberries_95': (549, 2231),
        'ozon_50': (549, 2230),
        'wildberries_50': (549, 2228),
        'market_95': (549, 2232),
        'market_50': (549, 2229),
    }

    number_of_days = 90

    def requires(self):
        return {
            'metrics': processors.LogProcessor(ProcessedMetricsForMarketplacesLtvUsers, self.date, self.number_of_days),
            'bar': processors.LogProcessor(ProcessedBarForMarketplacesLtvUsers, self.date, self.number_of_days),
            'visits': processors.LogProcessor(ProcessedVisitsMarketplacesLtvUsers, self.date, self.number_of_days),
            'market': luigi_utils.ExternalInput(config.MARKET_TRANCATIONS),
        }

    def build_segment(self, inputs, output_path):
        query = templater.render_template(
            BUILD_SEGMENT_QUERY,
            vars={
                'output_table': output_path,
                'idfa_crypta_id': utils.get_matching_table('idfa', 'crypta_id'),
                'gaid_crypta_id': utils.get_matching_table('gaid', 'crypta_id'),
                'yandexuid_crypta_id': utils.get_matching_table('yandexuid', 'crypta_id'),
                'visits': inputs['visits'].table,
                'bar': inputs['bar'].table,
                'metrics': inputs['metrics'].table,
                'market': inputs['market'].table,
                'start_timestamp': date_helpers.from_date_string_to_timestamp(date_helpers.get_date_from_past(self.date, self.number_of_days))
            },
            strict=True,
        )

        self.yql.query(
            query,
            transaction=self.transaction,
        )
