#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
from collections import defaultdict, Counter
from functools import partial

import luigi
import yt.yson as yson

from crypta.profile.utils.config import config
from crypta.profile.utils.utils import is_valid_uint64
from crypta.profile.utils.luigi_utils import ExternalInput, BaseYtTask, YtDailyRewritableTarget
from crypta.profile.utils.segment_utils.builders import RegularSegmentBuilder
from crypta.profile.utils.clients.clickhouse import ClickhouseClient
from crypta.profile.utils.segment_utils.processors import LogProcessor, DayProcessor


KINOPOISK_YESTERDAY_SESSIONS = '//home/ott-analytics/sessions/yesterday_sessions'


film_genres_yql_query = """
$flatten = ($nested_list) -> {{
    RETURN ListFlatMap($nested_list, ($elem) -> {{RETURN $elem}});
}};

INSERT INTO `{output_table}` WITH TRUNCATE
SELECT
    film_id,
    SOME(series.kp_serial_id) AS is_serial,
    AGGREGATE_LIST(DISTINCT film_genres.genre_eng) AS genres,
    ListHas($flatten(AGGREGATE_LIST(String::SplitToList(film_genres.countries_eng, ','))), 'Russia') AS is_russian,
    ListLength($flatten(AGGREGATE_LIST(String::SplitToList(film_genres.countries_eng, ',')))) > 1 AS is_multinational
FROM `{film_genres}` AS film_genres
LEFT JOIN
(
    SELECT DISTINCT kp_serial_id
    FROM `{episode_air_date}`
) AS series
ON film_genres.film_id == series.kp_serial_id
GROUP BY film_genres.film_id AS film_id
"""

film_lovers_clickhouse_query = """
SELECT UserID, film_id
FROM hits_all
WHERE CounterID == 22663942 AND EventDate == toDate('{date}') AND startsWith(path(URL), '/film')
GROUP BY UserID, splitByString('-', splitByString('/', path(URL))[3])[-1] AS film_id
"""

kinopoisk_sessions_yql_query = '''
INSERT INTO `{output_table}` WITH TRUNCATE
SELECT
    CAST(kinopoisk_id as Uint64) as film_id,
    CASE
        WHEN puid != '-' THEN puid
        ELSE yandexuid
    END as id,
    CASE
        WHEN puid != '-' THEN 'puid'
        ELSE 'yandexuid'
    END as id_type
FROM `{sessions}`
'''


def film_genres_reducer(key, rows, film_id_sets, serials_info):
    series_segments = {'russian_lovers', 'foreign_lovers'}
    film_counter_by_genres = Counter()
    serial_type_counter = Counter()

    for row in rows:
        film_id = row['film_id']
        for segment_name in film_id_sets:
            if film_id in film_id_sets[segment_name] and film_id not in serials_info['is_serial']:
                film_counter_by_genres[segment_name] += 1

        for segment_name in series_segments:
            if film_id in serials_info[segment_name] and film_id in serials_info['is_serial']:
                serial_type_counter[segment_name + '_series'] += 1
            elif film_id in serials_info[segment_name] and film_id not in serials_info['is_serial']:
                film_counter_by_genres[segment_name + '_film'] += 1

    idx = str(key['id'])
    id_type = key['id_type']
    for segment_name in film_counter_by_genres:
        if film_counter_by_genres[segment_name] > 0:
            yield {
                'id': idx,
                'id_type': id_type,
                'segment_name': segment_name,
            }

    for segment_name in serial_type_counter:
        if serial_type_counter[segment_name] > 0:
            yield {
                'id': idx,
                'id_type': id_type,
                'segment_name': segment_name,
            }


class ProcessedMetricsForFilmLovers(DayProcessor):
    def process_day(self, inputs, output_path):
        clickhouse = ClickhouseClient(logger=self.logger)
        result = clickhouse.make_query(
            film_lovers_clickhouse_query.format(date=self.date)
        )
        if not result:
            self.logger.error('Empty result')
        records = []
        for line in result.splitlines():
            try:
                yandexuid, film_id = line.rstrip().split('\t', 1)
                if is_valid_uint64(yandexuid) and is_valid_uint64(film_id):
                    records.append({'id': int(yandexuid), 'id_type': 'yandexuid', 'film_id': int(film_id)})
            except:
                self.logger.exception(line)

        with self.yt.TempTable() as temporary_table:
            self.yt.create_empty_table(
                temporary_table,
                schema={
                    'id': 'uint64',
                    'id_type': 'string',
                    'film_id': 'uint64',
                },
            )
            self.yt.write_table(temporary_table, records)
            self.yt.run_sort(temporary_table, output_path, sort_by='id')


class ProcessedKinopoiskSessionsForFilmLovers(DayProcessor):
    def requires(self):
        return ExternalInput(KINOPOISK_YESTERDAY_SESSIONS)

    def process_day(self, inputs, output_path):
        self.yql.query(
            kinopoisk_sessions_yql_query.format(
                sessions=self.input().table,
                output_table=output_path,
            )
        )


class FilmIdsByGenres(BaseYtTask):
    date = luigi.Parameter()

    def requires(self):
        return {
            'FilmGenres': ExternalInput(config.KINOPOISK_FILM_GENRES),
            'Serials': ExternalInput(config.KINOPOISK_SERIALS),
        }

    def output(self):
        return YtDailyRewritableTarget(os.path.join(config.LONG_TERM_DATA_FOLDER, 'kinopoisk/genres'), date=self.date)

    def run(self):
        self.yql.query(
            film_genres_yql_query.format(
                film_genres=self.input()['FilmGenres'].table,
                episode_air_date=self.input()['Serials'].table,
                output_table=self.output().table,
            )
        )
        self.yt.set_attribute(self.output().table, 'generate_date', self.date)


class FilmLoversByGenres(RegularSegmentBuilder):
    name_segment_dict = {
        'horror_film_lovers': (547, 1939),
        'family_film_lovers': (547, 1922),
        'military_film_lovers': (547, 1941),
        'action_film_lovers': (547, 1933),
        'adult_film_lovers': (547, 1925),
        'adventure_film_lovers': (547, 1945),
        'biography_film_lovers': (547, 1937),
        'comedy_film_lovers': (547, 1944),
        'crime_film_lovers': (547, 1923),
        'documentary_film_lovers': (547, 1949),
        'drama_film_lovers': (547, 1931),
        'fantasy_film_lovers': (547, 1951),
        'noir_film_lovers': (547, 1932),
        'game_show_film_lovers': (547, 1927),
        'history_film_lovers': (547, 1940),
        'music_film_lovers': (547, 1942),
        'musical_film_lovers': (547, 1934),
        'mystery_film_lovers': (547, 1950),
        'news_film_lovers': (547, 1935),
        'reality-tv_film_lovers': (547, 1930),
        'romance_film_lovers': (547, 1936),
        'sci-fi_film_lovers': (547, 1953),
        'short_film_lovers': (547, 1947),
        'sport_film_lovers': (547, 1924),
        'talk-show_film_lovers': (547, 1928),
        'thriller_film_lovers': (547, 1929),
        'western_film_lovers': (547, 1943),
        'anime_film_lovers': (547, 1948),
        'ceremony_film_lovers': (547, 1926),
        'concert_film_lovers': (547, 1938),
        'russian_lovers_series': (547, 1946),
        'foreign_lovers_series': (547, 1952),
        'russian_lovers_film': (547, 1728),
        'foreign_lovers_film': (547, 1727),
    }

    genre_name_dict = {
        'Horror': 'horror_film_lovers',
        'Animation': 'family_film_lovers',
        'Family': 'family_film_lovers',
        'War': 'military_film_lovers',
        'Action': 'action_film_lovers',
        'Adult': 'adult_film_lovers',
        'Adventure': 'adventure_film_lovers',
        'Biography': 'biography_film_lovers',
        'Comedy': 'comedy_film_lovers',
        'Crime': 'crime_film_lovers',
        'Documentary': 'documentary_film_lovers',
        'Drama': 'drama_film_lovers',
        'Fantasy': 'fantasy_film_lovers',
        'Film-Noir': 'noir_film_lovers',
        'Game-Show': 'game_show_film_lovers',
        'History': 'history_film_lovers',
        'Music': 'music_film_lovers',
        'Musical': 'musical_film_lovers',
        'Mystery': 'mystery_film_lovers',
        'News': 'news_film_lovers',
        'Reality-TV': 'reality-tv_film_lovers',
        'Romance': 'romance_film_lovers',
        'Sci-Fi': 'sci-fi_film_lovers',
        'Short': 'short_film_lovers',
        'Sport': 'sport_film_lovers',
        'Talk-Show': 'talk-show_film_lovers',
        'Thriller': 'thriller_film_lovers',
        'Western': 'western_film_lovers',
        'anime': 'anime_film_lovers',
        'ceremony': 'ceremony_film_lovers',
        'concert': 'concert_film_lovers',
    }

    number_of_days = 30

    def requires(self):
        return {
            'ProcessedHits': LogProcessor(
                ProcessedMetricsForFilmLovers,
                self.date,
                self.number_of_days,
            ),
            'ProcessedSessions': LogProcessor(
                ProcessedKinopoiskSessionsForFilmLovers,
                self.date,
                self.number_of_days,
            ),
            'FilmIdsByGenres': FilmIdsByGenres(date=self.date),
        }

    def build_segment(self, inputs, output_path):
        film_id_sets = defaultdict(set)
        serials_info = defaultdict(set)

        for record in self.yt.read_table(self.input()['FilmIdsByGenres'].table):
            film_id = yson.YsonUint64(int(record['film_id']))
            for genre in record['genres']:
                if genre in self.genre_name_dict:
                    film_id_sets[self.genre_name_dict[genre]].add(film_id)
            if record['is_serial']:
                serials_info['is_serial'].add(film_id)
            if record['is_russian']:
                serials_info['russian_lovers'].add(film_id)
            if record['is_multinational']:
                serials_info['foreign_lovers'].add(film_id)

        self.yt.run_map_reduce(
            None,
            partial(film_genres_reducer, film_id_sets=dict(film_id_sets), serials_info=dict(serials_info)),
            [self.input()['ProcessedHits'].table, self.input()['ProcessedSessions'].table],
            output_path,
            reduce_by=('id', 'id_type'),
            spec={
                'reducer': {'memory_limit': 2 * 1024 * 1024 * 1024},
            },
        )
