#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

import luigi

from crypta.profile.utils.config import config
from crypta.profile.utils.luigi_utils import BaseYtTask, ExternalInput, YtDailyRewritableTarget, AttributeExternalInput
from crypta.profile.utils.segment_utils.builders import RegularSegmentBuilder
from crypta.profile.utils.segment_utils.processors import DayProcessor, LogProcessor


get_phone_models_by_price_segments_yql_query = u"""
PRAGMA yt.DefaultMemoryLimit = '1G';

$phone_category_id = 91491;
$phone_prices = (
SELECT
    CAST(offers.model_id AS Uint64) AS model_id,
    (CAST(SUBSTRING(price, 4) AS Uint64) / 10000000) AS price,
    CAST(name AS String) AS model_name,
    CAST(offers.vendor_id AS Uint64) AS vendor_id,
    ware_md5
FROM
    `//home/market/production/indexer/stratocaster/offers/recent` AS offers
INNER JOIN
    `//home/market/production/mstat/dictionaries/models/latest` AS models
ON CAST(offers.model_id AS Uint64) = CAST(models.id AS Uint64)
WHERE
    CAST(offers.category_id AS Int64) == $phone_category_id
    AND String::StartsWith(price, 'RUR')
    AND models.name IS NOT NULL
);

$fixed_model_name = ($model_name) -> {{
    RETURN String::ReplaceAll(String::ReplaceAll($model_name, 'Смартфон ', ''), 'Телефон ', '')
}};

$phone_price_medians_and_price_segments = (
SELECT
    model_id,
    $fixed_model_name(model_name) AS model_name,
    vendor_id,
    String::JoinFromList(Crypta::GetLemmas(SearchRequest::NormalizeSimple(CAST($fixed_model_name(model_name)  AS Utf8)), 'en'), ' ') AS lemmas,
    CAST(MEDIAN(price) AS Uint64) AS rounded_median_price,
    COUNT(DISTINCT ware_md5) AS number_of_offers,
    CASE
      WHEN CAST(MEDIAN(price) AS Uint64) <= 15000 THEN "0_15"
      WHEN CAST(MEDIAN(price) AS Uint64) <= 25000 THEN "15_25"
      WHEN CAST(MEDIAN(price) AS Uint64) <= 35000 THEN "25_35"
      WHEN CAST(MEDIAN(price) AS Uint64) <= 50000 THEN "35_50"
      WHEN CAST(MEDIAN(price) AS Uint64) <= 100000 THEN "50_100"
      ELSE "100+"
    END AS price_segment
FROM $phone_prices
GROUP BY model_id, model_name, vendor_id
ORDER BY model_name
);

INSERT INTO `{output_table}` WITH TRUNCATE
SELECT price_segment, AGGREGATE_LIST(DISTINCT model_name) AS model_names
FROM $phone_price_medians_and_price_segments
WHERE number_of_offers >= 5
GROUP BY price_segment
ORDER BY price_segment;

INSERT INTO `{output_lemmas_table}` WITH TRUNCATE
SELECT price_segment, AGGREGATE_LIST(DISTINCT lemmas) AS model_lemmas
FROM $phone_price_medians_and_price_segments
WHERE number_of_offers >= 5
GROUP BY price_segment
ORDER BY price_segment;
"""


class PhoneModelsByPriceSegmentGetter(BaseYtTask):
    date = luigi.Parameter()
    task_group = 'coded_segments'

    def requires(self):
        return {
            'market_offers': ExternalInput('//home/market/production/indexer/stratocaster/offers/recent'),
            'market_models': ExternalInput('//home/market/production/mstat/dictionaries/models/latest'),
        }

    def run(self):
        with self.yt.Transaction() as transaction:
            self.yql.query(
                query_string=get_phone_models_by_price_segments_yql_query.format(
                    output_table=self.output()['names'].table,
                    output_lemmas_table=self.output()['lemmas'].table,
                ),
                transaction=transaction,
            )

            self.yt.set_attribute(
                self.output()['names'].table,
                'generate_date',
                self.date
            )

            self.yt.set_attribute(
                self.output()['lemmas'].table,
                'generate_date',
                self.date
            )

    def output(self):
        return {
            'names': YtDailyRewritableTarget(
                config.PHONE_MODELS_BY_PRICE_SEGMENT,
                date=self.date,
            ),
            'lemmas': YtDailyRewritableTarget(
                config.PHONE_MODELS_LEMMAS_BY_PRICE_SEGMENT,
                date=self.date,
            ),
        }


class PhoneBuyersMapper(object):
    def __init__(self, phone_models_by_price_segments, all_phone_tokens):
        self.phone_models_by_price_segments = phone_models_by_price_segments
        self.all_phone_tokens = all_phone_tokens

    def __call__(self, row):
        title_words = set(row['lemmas']) if row['lemmas'] else set()
        if title_words and title_words.intersection(self.all_phone_tokens):
            for price_segment, model_names_tokens in self.phone_models_by_price_segments.iteritems():
                for model_name_tokens in model_names_tokens:
                    if title_words.intersection(model_name_tokens) == model_name_tokens:
                        yield {
                            'id': str(row['yandexuid']),
                            'id_type': 'yandexuid',
                            'segment_name': price_segment,
                        }


def reducer(key, rows):
    yield key


class ProcessedMetricsForPhoneBuyers(DayProcessor):

    def get_phone_models(self):
        phone_models_by_price_segments = {}
        all_phone_tokens = set()

        for row in self.yt.read_table(config.PHONE_MODELS_LEMMAS_BY_PRICE_SEGMENT):
            model_names_tokens = []
            for model_lemmas_string in row['model_lemmas']:
                if model_lemmas_string:
                    model_name_tokens = set(model_lemmas_string.split())
                    if model_name_tokens:
                        model_names_tokens.append(model_name_tokens)
                        all_phone_tokens.update(model_name_tokens)
            phone_models_by_price_segments[row['price_segment']] = model_names_tokens

        if not phone_models_by_price_segments or len(phone_models_by_price_segments) != 6:
            raise Exception('Broken phone model dict {}'.format(phone_models_by_price_segments))

        return phone_models_by_price_segments, all_phone_tokens

    def requires(self):
        return AttributeExternalInput(
            table=os.path.join(
                config.METRICS_PARSED_DIR,
                self.date,
            ),
            attribute_name='closed',
            attribute_value=True,
            columns=('yandexuid', 'lemmas'),
        )

    def process_day(self, inputs, output_path):
        phone_models_by_price_segments, all_phone_tokens = self.get_phone_models()
        self.yt.run_map_reduce(
            PhoneBuyersMapper(phone_models_by_price_segments, all_phone_tokens),
            reducer,
            inputs.table,
            output_path,
            reduce_by=('id', 'id_type', 'segment_name'),
        )


class PhoneBuyers(RegularSegmentBuilder):
    name_segment_dict = {
        '0_15': (547, 2045),
        '15_25': (547, 2046),
        '25_35': (547, 2047),
        '35_50': (547, 2048),
        '50_100': (549, 1997),
        '100+': (547, 2049),
    }

    number_of_days = 7

    def requires(self):
        return {
            'ProcessedMetrics': LogProcessor(
                ProcessedMetricsForPhoneBuyers,
                self.date,
                self.number_of_days,
            ),
            'PhoneModelsByPriceSegmentGetter': PhoneModelsByPriceSegmentGetter(self.date),
        }

    def build_segment(self, inputs, output_path):
        self.yt.unique(
            inputs['ProcessedMetrics'].table,
            output_path,
            unique_by=['id', 'id_type', 'segment_name'],
        )
