# coding=utf-8
from __future__ import print_function

import datetime  # noqa
import logging
import os
import subprocess

from sandbox import sdk2
from sandbox.projects.common import file_utils as fu
from sandbox.projects.music.search.resources import MusicSuggestTablesPreparer
from sandbox.projects.suggest.dicts import SuggestDictTask
from sandbox.sandboxsdk.environments import PipEnvironment


class YtTableDownloader:

    def __init__(self, yt_client):
        self.yt_client = yt_client
        self.popularity_sum = 0
        self.unique_regions = set()
        self._downloaded = False

    def download(self, queries):
        for row in self.yt_client.read_table(queries):
            if 'base' in row:
                items = (row['query'], row['base'], str(row['popularity']))
            else:
                popularity = row['popularity']
                self.popularity_sum += popularity

                regional_freqs = row['regional_freqs']
                if regional_freqs:
                    regions = {int(p.split(':')[0]) for p in regional_freqs.split(',')}
                    self.unique_regions.update(regions)

                items = (row['query'], '', str(popularity), regional_freqs)
            yield '\t'.join(items) + '\n'
        self._downloaded = True

    def get_popularity(self):
        return self.popularity_sum

    def get_unique_regions(self):
        return self.unique_regions


class BuildMusicDicts(sdk2.Task, SuggestDictTask):
    """ Build music dictionaries from yt data """

    class Parameters(sdk2.Task.Parameters):
        yt_token_vault_name = sdk2.parameters.String(
            'Name of the vault record with YT token',
            name='vault_token_name',
            default='yt_token'
        )
        serp_size = sdk2.parameters.Integer('Serp size for suggest-data-builder', default=10)
        autodeploy = sdk2.parameters.Bool('Autodeploy', default=True)
        queries_dir = sdk2.parameters.String(
            'Target path for processed queries',
            default='//home/search-functionality/suggest/music',
        )
        music_export_dir = sdk2.parameters.String(
            'Music suggest export path',
            default='//home/music/suggest'
        )
        suggest_types = sdk2.parameters.List(
            'Suggest types',
            description='Suggest dict types to build',
            default=['tracks', 'albums', 'artists', 'playlists', 'podcast-episodes', 'podcasts'],
        )
        suggest_subtypes = sdk2.parameters.List(
            'Suggest subtypes',
            description='Suggest subtypes to build',
            default=['kids', 'podcasts', 'spoken'],
        )
        yt_preparer = sdk2.parameters.Resource(
            'YT tables preparer (latest released to stable if empty)',
            required=False,
            resource_type=MusicSuggestTablesPreparer,
        )

    class Requirements(sdk2.Requirements):
        disk_space = 200 * 1024
        ram = 256 * 1024

        environments = [
            PipEnvironment('yandex-yt'),
            PipEnvironment('yandex-yt-yson-bindings-skynet'),
        ]

    _TYPES_SUBTYPES = {
        'tracks': ['kids', 'spoken'],
        'albums': ['kids', 'spoken'],
        'artists': set(),
        'playlists': ['kids', 'podcasts', 'spoken'],
        'podcasts': ['kids'],
        'podcast-episodes': ['kids'],
    }

    def on_execute(self):
        proxy = 'hahn.yt.yandex.net'
        token = sdk2.Vault.data(self.Parameters.yt_token_vault_name)
        self.setup_yt_client(proxy=proxy, token=token)

        self.music_yt_folder = self.get_music_yt_folder()
        logging.info('Using folder ' + self.music_yt_folder)

        self.prepare_music_export(self.music_yt_folder)

        self.dicts_path = os.path.join(os.getcwd(), 'dicts')
        os.makedirs(self.dicts_path)

        for dict_name in self.Parameters.suggest_types:
            self.build_dict_with_sub_dicts(dict_name)

        self.write_source_info()
        self.publish_dict(
            'music',
            'Suggest dictionaries for Yandex.Music',
            self.dicts_path,
            ttl=4,
            autodeploy=self.Parameters.autodeploy)

    def get_music_yt_folder(self):
        root = self.Parameters.music_export_dir
        prefix = os.path.join(root, datetime.datetime.now().strftime('%Y-%m-%dT'))

        for folder in self.yt_client.search(root=root, node_type="map_node"):
            if not folder or folder == root:
                continue

            folder = str(folder)
            if folder.startswith(prefix):
                return folder

    def prepare_music_export(self, source_dir):
        logging.info('Preparing music export for suggest_data_builder')
        env = os.environ.copy()
        env['YT_PROXY'] = 'hahn.yt.yandex.net'
        env['YT_TOKEN'] = sdk2.Vault.data(self.Parameters.yt_token_vault_name)

        if self.Parameters.yt_preparer is None:
            preparer_resource = sdk2.Resource.find(
                type=MusicSuggestTablesPreparer,
                attrs={'released': 'stable'},
            ).first()
            preparer = str(sdk2.ResourceData(preparer_resource).path)
        else:
            preparer = str(sdk2.ResourceData(self.Parameters.yt_preparer).path)

        with sdk2.helpers.ProcessLog(self, logger='prepare-music-export-') as pl:
            command = [
                preparer,
                '--source-dir', source_dir,
                '--queries-dir', self.Parameters.queries_dir,
            ]
            for type_ in self.Parameters.suggest_types:
                command.extend(['--types', type_])
            for subtype in self.Parameters.suggest_subtypes:
                command.extend(['--subtypes', subtype])
            logging.info('Executing command: %s', command)
            subprocess.check_call(
                command,
                stdout=pl.stdout,
                stderr=pl.stderr,
                env=env,
            )

    def build_dict_with_sub_dicts(self, dict_name):
        logging.info('Building dict %s', dict_name)
        self.build_dict(dict_name)

        available_subtypes = self._TYPES_SUBTYPES.get(dict_name, [])
        for sub_dict_name in self.Parameters.suggest_subtypes:
            full_dict_name = '%s-%s' % (sub_dict_name, dict_name)
            if sub_dict_name in available_subtypes:
                logging.info('Building dict %s', full_dict_name)
                self.build_dict(full_dict_name)
            else:
                logging.info('Dict of type %s is not supported', full_dict_name)

    def build_dict(self, dict_name):
        queries_table = os.path.join(self.Parameters.queries_dir, dict_name)
        self.write_files(dict_name, queries_table)

    def prepare_tables(self, source_table, queries_table, dict_name):
        logging.info('Running YT operations')
        proxy = 'hahn.yt.yandex.net'
        token = sdk2.Vault.data(self.Parameters.yt_token_vault_name)

        env = os.environ.copy()
        env['YT_PROXY'] = proxy
        env['YT_TOKEN'] = token

        with sdk2.helpers.ProcessLog(self, logger='prepare-' + dict_name) as pl:
            command = [
                str(self.yt_preparer),
                '--source-table', source_table,
                '--queries-table', queries_table,
            ]
            logging.info('Executing command: %s', command)
            subprocess.check_call(
                command,
                stdout=pl.stdout,
                stderr=pl.stderr,
                env=env,
            )

    def write_files(self, dict_name, queries_table):
        logging.info('Writing files')
        dict_path = os.path.join(os.getcwd(), dict_name)
        os.makedirs(dict_path)

        downloader = YtTableDownloader(self.yt_client)
        queries_path = os.path.join(dict_path, 'queries')
        fu.write_lines(queries_path, downloader.download(queries_table))

        streams = []
        for r in downloader.get_unique_regions():
            streams.append('%d\t%d' % (r, downloader.get_popularity()) + '\n')
        streams.append(('ALL\t%d' % downloader.get_popularity()) + '\n')

        streams_path = os.path.join(dict_path, 'streams')
        fu.write_lines(streams_path, streams)

        groups_path = os.path.join(dict_path, 'groups')
        fu.write_lines(groups_path, ['\n'])

        dict_folder = os.path.join(self.dicts_path, dict_name)
        os.makedirs(dict_folder)
        dict_prefix = os.path.join(dict_folder, dict_name)

        logging.info('Running dictionary builder')
        self.run_data_builder(
            dict_prefix,
            queries_path,
            groups_path,
            streams_path,
            word_index=True,
            top_size=self.Parameters.serp_size)

    def write_source_info(self):
        with open(os.path.join(self.dicts_path, "source_info.txt"), mode='w') as f:
            f.write("Path to data\t" + self.music_yt_folder + "\n")
            f.write("Timestamp\t" + datetime.datetime.now().strftime("%Y-%m-%d %T") + "\n")
            f.write("Sandbox task ID\t" + str(self.id) + "\n")
