# -*- coding: utf-8 -*-

import json
import logging

from sandbox import sdk2
from sandbox.projects.VideoSearch import video_resource_types
from sandbox.sandboxsdk import environments


class VideoVideohubDumpTagData(sdk2.Task):

    task = 'VIDEO_VIDEOHUB_DUMP_TAG_DATA'

    class Requirements(sdk2.Task.Requirements):
        environments = [
            environments.PipEnvironment('clickhouse-cityhash'),
            environments.PipEnvironment('yandex-yt'),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet', version='0.3.32-0'),
        ]

    class Parameters(sdk2.Task.Parameters):
        yt_proxy = sdk2.parameters.String('YT Proxy', required=True, default='arnold')
        carousel_data_path = sdk2.parameters.String('Carousel Data Path', required=True, default='//home/video-hosting/detailed_tags/carusel_data')
        top_ugc_bloggers_data_path = sdk2.parameters.String('Top UGC Bloggers Data Path', required=True, default='//home/videorecom/recommender/data/top_ugc_bloggers/latest')

    def on_execute(self):
        logging.info('Reading tag data')
        tag_data = self._read_tag_data()
        tag_data_str = json.dumps(tag_data)

        resource = video_resource_types.VIDEO_VH_TAG_DATA(self, 'tag data', 'tag_data.json')
        data = sdk2.ResourceData(resource)

        logging.info('Dumping tag data')
        data.path.write_bytes(tag_data_str)

        data.ready()

    def _read_tag_data(self):
        import yt.wrapper as yt
        yt_token = sdk2.Vault.data('VIDEODEV', 'yt_token')
        client = yt.YtClient(proxy=self.Parameters.yt_proxy, token=yt_token)
        tag_data = []
        for row in client.read_table(self.Parameters.carousel_data_path):
            single_tag_data = {}
            single_tag_data['id'] = self._get_tag_id(row['tag'])
            supertags = json.loads(row['supertags'])
            single_tag_data['supertags'] = [self._get_tag_id(tag) for tag in supertags]
            single_tag_data['type'] = row['type']

            tag_data.append(single_tag_data)

        UGC_VIDEO_SUPERTAGS = ['videohub', 'blogger', 'ugc', 'zen']
        for row in client.read_table(self.Parameters.top_ugc_bloggers_data_path):
            single_tag_data = {}
            single_tag_data['id'] = row['author_id_hash']
            single_tag_data['supertags'] = [self._get_tag_id(tag) for tag in UGC_VIDEO_SUPERTAGS]
            single_tag_data['type'] = 'ugc'

            tag_data.append(single_tag_data)

        return tag_data

    def _get_tag_id(self, tag):
        from clickhouse_cityhash.cityhash import CityHash64
        def base26(num, decor):
            res = decor
            while True:
                res += chr(ord('a') + num % 26)
                num //= 26
                if num == 0:
                    break
            res += decor
            return res

        tag_hash = CityHash64(tag)
        return base26(tag_hash // 2, 'hh')

