# -*- coding: utf-8 -*-
import os

from itertools import islice
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import process
from sandbox.projects.VideoSearch import video_resource_types as video_resource_types
from sandbox.projects.common.mediasearch.trie import MediaReleaseTrie


class YTClusterParameter(parameters.SandboxStringParameter):
    name = "yt_cluster"
    description = "YT Cluster"
    required = True
    default_value = "hahn"
    group = 'Fast Delivery'


class YTDirectoryParameter(parameters.SandboxStringParameter):
    name = "yt_directory"
    description = "YT Directory"
    required = True
    default_value = "//home/videodev/fastdelivery"
    group = 'Fast Delivery'


class MaxUrlsParameter(parameters.SandboxIntegerParameter):
    name = "max_urls"
    description = "Maximum number of urls per query"
    required = True
    default_value = 50
    group = 'Fast Delivery'


class RelevParameter(parameters.ListRepeater, parameters.SandboxStringParameter):
    name = "relev"
    description = "Additional \"relev\" key-value pairs to send (key1=value1)"
    required = False
    group = 'Extra CGI params'


class RearrParameter(parameters.ListRepeater, parameters.SandboxStringParameter):
    name = "rearr"
    description = "Additional \"rearr\" key-value pairs to send (key1=value1)"
    required = False
    group = 'Extra CGI params'


class PronParameter(parameters.ListRepeater, parameters.SandboxStringParameter):
    name = "pron"
    description = "Additional \"pron\" key-value pairs to send (key1=value1)"
    required = False
    group = 'Extra CGI params'


class VideoReleaseVFastDeliveryTrie(MediaReleaseTrie):
    """
        Builds videofastdelivery.trie for Yandex.Video service
    """

    type = "VIDEO_RELEASE_VIDEOFASTDELIVERY_TRIE"
    metrics_url = "https://metrics-qgaas.metrics.yandex-team.ru/api/basket/{basketId}/query"

    environment = [
        environments.PipEnvironment('yandex-yt'),
        environments.PipEnvironment('yandex-yt-yson-bindings-skynet')
    ]

    input_parameters = (
        YTClusterParameter,
        YTDirectoryParameter,
        MaxUrlsParameter,
        RelevParameter,
        RearrParameter,
        PronParameter
    ) + MediaReleaseTrie.input_parameters

    release_subject = "video/middle/videofastdelivery-data-{timestamp}"
    release_comment = "videofastdelivery.trie"
    release_resources = (
        video_resource_types.VIDEO_MIDDLESEARCH_VIDEOFASTDELIVERY_TRIE,
    )

    def parse_cgi_param(self, value):
        result = list()
        if value is not None:
            for p in value:
                v = value.split('=')
                result.append([v[0].strip(), v[1].strip()])
        return result

    def _generate_trie_data(self):
        import yt.wrapper as yt

        YT_DEDUPLICATOR = 'yt_deduplicator.py'

        relev = self.parse_cgi_param(self.ctx[RelevParameter.name])
        rearr = self.parse_cgi_param(self.ctx[RearrParameter.name])
        pron = self.parse_cgi_param(self.ctx[PronParameter.name])
        max_urls = self.ctx[MaxUrlsParameter.name]

        yt_cluster = self.ctx[YTClusterParameter.name]
        yt_directory = self.ctx[YTDirectoryParameter.name]
        yt_token = self.get_vault_data('VIDEODEV', 'yt_token')

        yt_client = yt.YtClient(proxy=yt_cluster, token=yt_token, config={
            'read_retries': {'enable': False},
            'pickling': {
                'python_binary': '/skynet/python/bin/python'
            }
        })

        with yt_client.TempTable() as tmp_table:
            process.run_process(
                cmd=[
                    '/skynet/python/bin/python',
                    os.path.join(os.path.dirname(__file__), YT_DEDUPLICATOR),
                    '--yt-proxy', yt_cluster,
                    '--yt-token', yt_token,
                    '--src-directory', yt_directory,
                    '--dst-table', tmp_table
                ],
                log_prefix=YT_DEDUPLICATOR,
                wait=True,
                check=True,
                environment=dict(os.environ, YT_TOKEN=yt_token)
            )

            for row in yt_client.read_table(tmp_table):
                if 'urls' in row['data']:
                    conditionFlag = 'default'
                    if 'conditionFlag' in row['data']:
                        conditionFlag = row['data']['conditionFlag']
                    row['data'] = {conditionFlag : row['data']}
                for condition, data in row['data'].items():
                    row['data'][condition]['pron'] = pron + row['data'][condition].get('pron', [])
                    row['data'][condition]['relev'] = relev + row['data'][condition].get('relev', [])
                    row['data'][condition]['rearr'] = rearr + row['data'][condition].get('rearr', [])
                    if len(row['data'][condition].get('urls', [])) > max_urls:
                        row['data'][condition]['urls'] = dict(islice(row['data'][condition]['urls'].items(), max_urls))
                yield (row['tld'], row['query'], row['data'])

    def get_nanny_oauth_token(self):
        return self.get_vault_data('VIDEO-ROBOT', 'robot-video-crawl-nanny-oauth')


__Task__ = VideoReleaseVFastDeliveryTrie
