# -*- coding: utf-8 -*-

import json
import logging

from sandbox import sdk2
from sandbox.projects.VideoSearch import video_resource_types
from sandbox.sandboxsdk import environments


class VideoVideohubDumpTopContentAuthors(sdk2.Task):

    task = 'VIDEO_VIDEOHUB_DUMP_TOP_CONTENT_AUTHORS'

    class Requirements(sdk2.Task.Requirements):
        environments = [
            environments.PipEnvironment('clickhouse-cityhash'),
            environments.PipEnvironment('yandex-yt'),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet', version='0.3.32-0'),
        ]

    class Parameters(sdk2.Task.Parameters):
        yt_proxy = sdk2.parameters.String('YT Proxy', required=True, default='arnold')
        top_content_authors_data_path = sdk2.parameters.String('top_content_authors_data_path', required=True, default='//home/videorecom/recommender/data/top_content_authors/latest')

    def on_execute(self):
        logging.info('Reading tag data')
        top_authors_data = self._read_top_content_authors_data()
        top_authors_data_str = json.dumps(top_authors_data)

        resource = video_resource_types.VIDEO_VH_TOP_CONTENT_AUTHORS(self, 'top content authors data', 'top_content_authors_data.json')
        data = sdk2.ResourceData(resource)

        logging.info('Dumping top content authors data')
        data.path.write_bytes(top_authors_data_str)

        data.ready()

    def _read_top_content_authors_data(self):
        import yt.wrapper as yt
        yt_token = sdk2.Vault.data('VIDEODEV', 'yt_token')
        client = yt.YtClient(proxy=self.Parameters.yt_proxy, token=yt_token)

        AUTHOR_ID_COLUMN = "author_id_hash"
        COLUMNS = ["is_premium_author_v0", "is_premium_author_v1", "is_premium_author_v2", "is_premium_author_v3", "is_premium_author_v4"]

        top_authors_data = {}
        for column in COLUMNS:
            top_authors_data[column] = []

        for row in client.read_table(self.Parameters.top_content_authors_data_path):
            if not row.get(AUTHOR_ID_COLUMN):
                continue

            for column in COLUMNS:
                if row.get(column) and row[column] > 0:
                    top_authors_data[column].append(row[AUTHOR_ID_COLUMN])

        return top_authors_data
