#!/usr/bin/python
# -*- coding: utf-8 -*-

import os

from crypta.profile.utils.config import config
from crypta.profile.utils.luigi_utils import ExternalInput
from crypta.profile.utils.segment_utils.builders import RegularSegmentBuilder
from crypta.profile.utils.segment_utils.processors import (
    DayProcessor,
    LogProcessor,
)

segment_to_artist_id_dict = {
    'tima': {6125147},
    'tima_belor': {6125147},
    'mumiytroll': {41098},
    'disco80': {
        47272,    # Alphaville
        205640,   # C. C. Catch
        245717,   # Bad Boys Blue
        213819,   # Pupo
        84674,    # Sandra
        184265,   # LondonBeat
        1120272,  # Лещенко
        167155,   # Добрынн
        218287,   # Газманов
        1433606,  # Маргулис
        1652736,  # Державин
        4097209,  # Сталкер
        1586750,  # Минаев
        1542180,  # Маркин
        161247,   # Руки вверх
        2650418,  # Альянс
        1158148,  # Дюна
        2940609,  # Здравствуй, песня
        110740,   # Ricchi E Poveri
        101354,   # Riccardo Fogli
        6743,     # Umberto Tozzi
        1283,     # Toto Cutugno
        57433,    # Al Bano
        92640,    # Romina Power
        7257,     # Adriano Celentano
        7898,     # Demis Roussos
        28112,    # Joe Dassin
        7887,     # Chris De Burgh
        10533,    # Chris Rea
        89578,    # Roxette
        38683,    # Smokie
        118882,   # Boney M.
        171428,   # Samantha Fox
        51962,    # Bananarama
        782398,   # Sabrina Salerno
        425898,   # Savage
        39231,    # Joy
        170490,   # F.R. David
        21202,    # Secret Service
        167093,   # Кар-Мэн
        233945,   # Ласковый май
        430384,   # Любэ
        4632736,  # Инна Маликова & Новые Самоцветы
        9367,     # ABBA
        245717,   # Bad Boys Blue
        118885,   # Modern Talking
    },
    'ysl': {
        662,     # Ленни Кравиц
        14240,   # Robbie Williams
        1111,    # U2
        36833,   # Red Hot Chili Peppers
        51388,   # The Black Crowes
        41319,   # Garbage
        36814,   # Sugar Ray
        29232,   # Velvet Revolver.
        14606,   # Aerosmith
        291,     # A-ha
        12048,   # Muse
        1859,    # Bon Jovi
        671,     # Coldplay
        5559,    # Bryan Adams
        90,      # Scorpions
        3034,    # INXS
        185930,  # Spin Doctors
        58027,   # The Goo Goo Dolls
        25049,   # Soundgarden
        11311,   # Guns N’ Roses
        48108,   # R.E.M.
    }
}

possible_artist_ids = set()

for _, ids in segment_to_artist_id_dict.iteritems():
    possible_artist_ids.update(ids)


def music_log_mapper(row):
    if 10.0 <= row['play_time'] <= 1000.0 and row['artist_ids']:
        artist_ids = set()
        for artist_id in row['artist_ids']:
            if artist_id.isdigit():
                artist_ids.add(int(artist_id))

        if not artist_ids.intersection(possible_artist_ids):
            return
        for id_type in ('puid', 'yandexuid', 'uuid'):
            if row[id_type] and row[id_type] != '(null)':
                for segment_name, segment_artist_ids in segment_to_artist_id_dict.iteritems():
                    if segment_artist_ids.intersection(artist_ids):
                        yield {
                            'id': row[id_type],
                            'id_type': id_type,
                            'play_id': row['play_id'],
                            'segment_name': segment_name,
                        }


def count_play_ids(key, rows):
    n_play_ids = len(set(row['play_id'] for row in rows))
    ans = dict(key)
    ans['n_play_ids'] = n_play_ids
    yield ans


class ProcessedMusicStreamsLogForArtists(DayProcessor):
    def requires(self):
        return ExternalInput(
            os.path.join(config.MUSIC_STREAMS_LOG_FOLDER, self.date),
            columns=('puid', 'yandexuid', 'uuid', 'play_time', 'artist_ids', 'play_id'),
        )

    def process_day(self, inputs, output_path):
        self.yt.run_map_reduce(
            music_log_mapper,
            count_play_ids,
            inputs.table,
            output_path,
            reduce_by=['id', 'id_type', 'segment_name'],
        )


prepare_segment_query = """
PRAGMA yt.InferSchema;

INSERT INTO `{output_table}` WITH TRUNCATE
SELECT id, id_type, segment_name
FROM `{music_streams_log}`
GROUP BY id, id_type, segment_name
HAVING SUM(n_play_ids) > 2
"""


class Artists(RegularSegmentBuilder):
    name_segment_dict = {
        'disco80': (549, 1283),
        'tima': (549, 1654),
        'tima_belor': (549, 1183),
        'mumiytroll': (549, 1794),
        'ysl': (557, 17457577),
    }

    number_of_days = 35

    def requires(self):
        return {
            'ProcessedMusicStreamsLog': LogProcessor(
                ProcessedMusicStreamsLogForArtists,
                self.date,
                self.number_of_days,
            ),
        }

    def build_segment(self, inputs, output_path):
        self.yql.query(
            prepare_segment_query.format(
                music_streams_log=inputs['ProcessedMusicStreamsLog'].table,
                output_table=output_path,
            ),
            transaction=self.transaction,
        )
