#import json
#import os

from sandbox import sandboxsdk as sdk

from sandbox.projects import resource_types
from sandbox.projects.common.mediasearch import ban as mediaban
from sandbox.projects.common.nanny import auto_deploy
from sandbox.projects.common.ReleasedResourceConsumer import ReleasedResourceConsumer
from sandbox.projects.VideoSearch import video_resource_types as video_resource_types

#from sandbox.sandboxsdk import errors
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import process


class YtServerParameter(parameters.SandboxStringParameter):
    name = "yt_server"
    description = "YT server"
    required = True
    default_value = "arnold"


class YtVhDupsTableParameter(parameters.SandboxStringParameter):
    name = "yt_vh_dups_table"
    description = "YT vh dups table"
    required = True
    default_value = "//home/videoindex/vhs/dups-trie/prevdata/vh_dups_attrs"


class YtVhAttrsTableParameter(parameters.SandboxStringParameter):
    name = "yt_vh_attrs_table"
    description = "YT vh attrs table"
    required = True
    default_value = "//home/videoindex/vhs/dups-trie/prevdata/vh_attrs.filtered"


def format_vh_dups(data):
    return "#query\t" + data["url"] + "\turl=" + data["original_url"] + "\texps=" + data["exps"] + "\n"


def format_vh_attrs(data):
    return "#query\t" + data["url"] + data["value"].replace("\n", " ") + "\n"


class VideoReleaseVhDupsTrie(auto_deploy.AutoNannyDeployTask, mediaban.VideoBaseReleaseBanTask, ReleasedResourceConsumer):
    """
        Build vh_dups.trie and vh_attrs.trie
    """

    type = 'VIDEO_RELEASE_VH_DUPS_TRIE'

    environment = (sdk.environments.PipEnvironment("yandex-yt"), sdk.environments.PipEnvironment('yandex-yt-yson-bindings-skynet', version='0.3.32-0'))

    input_parameters = (
        YtServerParameter,
        YtVhDupsTableParameter,
        YtVhAttrsTableParameter
    ) + mediaban.VideoBaseReleaseBanTask.input_parameters + ReleasedResourceConsumer.input_parameters

    release_subject = "video/middle/content-data-{timestamp}"
    release_comment = "video content attrs release"
    release_resources = (
        video_resource_types.VIDEO_MIDDLESEARCH_VH_DUPS_TRIE,
        video_resource_types.VIDEO_MIDDLESEARCH_VH_ATTRS_TRIE,
    )

    def get_stable_services(self):
        return [self._SERVICE_ID]

    def get_nanny_oauth_token(self):
        return self.get_vault_data('VIDEO-ROBOT', 'robot-video-crawl-nanny-oauth')

    def _build_ban(self):
        import yt.wrapper as yt
        yt_client = yt.YtClient(proxy=self.ctx[YtServerParameter.name], token=self.get_vault_data('VIDEODEV', 'yt_token'))

        # read vh dups
        vh_dups_trie_path = self.abs_path(video_resource_types.VIDEO_MIDDLESEARCH_VH_DUPS_TRIE.basename)
        vh_dups_path = vh_dups_trie_path + '.raw'

        with open(vh_dups_path, 'w') as dups:
            for data in yt_client.read_table(yt.TablePath(self.ctx[YtVhDupsTableParameter.name])):
                dups.write(format_vh_dups(data))

        has_changes = self._update_resource(video_resource_types.VIDEO_MIDDLESEARCH_VH_DUPS_RAW, path=vh_dups_path)

        # read vh attrs
        vh_attrs_trie_path = self.abs_path(video_resource_types.VIDEO_MIDDLESEARCH_VH_ATTRS_TRIE.basename)
        vh_attrs_path = vh_attrs_trie_path + '.raw'

        with open(vh_attrs_path, 'w') as attrs:
            for data in yt_client.read_table(yt.TablePath(self.ctx[YtVhAttrsTableParameter.name])):
                attrs.write(format_vh_attrs(data))

        has_changes = has_changes or self._update_resource(video_resource_types.VIDEO_MIDDLESEARCH_VH_ATTRS_RAW, path=vh_attrs_path)
        if not has_changes:
            return 0

        # make tries
        indexer_tool = self._tool(resource_types.VIDEO_QUERYDATAINDEXER_EXECUTABLE)

        process.run_process([
            indexer_tool,
            "-S", "vhdups",
            "-N", "exacturl",
            "-i", vh_dups_path,
            "-o", vh_dups_trie_path], outputs_to_one_file=False, log_prefix="indexer")

        process.run_process([
            indexer_tool,
            "-S", "vhattrs",
            "-N", "exacturl",
            "-i", vh_attrs_path,
            "-o", vh_attrs_trie_path], outputs_to_one_file=False, log_prefix="indexer")

        self.create_resource(self.descr, vh_dups_trie_path, video_resource_types.VIDEO_MIDDLESEARCH_VH_DUPS_TRIE)
        self.create_resource(self.descr, vh_attrs_trie_path, video_resource_types.VIDEO_MIDDLESEARCH_VH_ATTRS_TRIE)

        return 10485760


__Task__ = VideoReleaseVhDupsTrie
