# -*- coding: utf-8 -*-

import codecs
import json
import os

from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk.svn import Arcadia

from sandbox.projects import resource_types
from sandbox.projects.common.mediasearch import ban as mediaban
from sandbox.projects.common.nanny import auto_deploy
from sandbox.projects.common.ReleasedResourceConsumer import ReleasedResourceConsumer


class TrieNamespaceParameter(parameters.SandboxStringParameter):
    name = "trie_namespace"
    description = "Resulting trie namespace parameter"
    required = True
    group = 'Technical'


class KeySizeParameter(parameters.SandboxIntegerParameter):
    name = "key_size"
    description = "Key Size, bytes"
    required = True
    default_value = 4096
    group = 'Technical'


class ValueSizeParameter(parameters.SandboxIntegerParameter):
    name = "value_size"
    description = "Value Size, bytes"
    required = True
    default_value = 16384
    group = 'Technical'


class CommitFastCrawlFileParameter(parameters.SandboxBoolParameter):
    name = "commit_fastcrawl_file"
    description = "Commit fast crawl file to arcadia"
    default_value = False
    group = 'Videotop'


class FastCrawlUrlsPathParameter(parameters.SandboxStringParameter):
    name = "fast_crawl_urls_path"
    description = "Svn path to crawl.urls"
    required = True
    default_value = "arcadia.yandex.ru:/arc/trunk/arcadia/yweb/webscripts/video/fastrecrawl/fixedurlcrawl/crawl.urls"
    group = 'Videotop'


class MediaReleaseTrie(auto_deploy.AutoNannyDeployTask, mediaban.VideoBaseReleaseBanTask, ReleasedResourceConsumer):
    """
        Builds trie for Yandex.Video service
    """

    input_parameters = (
        TrieNamespaceParameter,
        KeySizeParameter,
        ValueSizeParameter,
        FastCrawlUrlsPathParameter,
        CommitFastCrawlFileParameter,
    ) + mediaban.VideoBaseReleaseBanTask.input_parameters + ReleasedResourceConsumer.input_parameters

    release_subject = "video/middle/videotop-data-{timestamp}"
    release_comment = "video fast trie"

    def get_stable_services(self):
        return [self._SERVICE_ID]

    def _generate_trie_data(self):
        """
            Should generate trie data: tuples of the form
            (tld, query, data)
            where
            data is a python dict
        """
        raise NotImplementedError

    def _build_ban(self):
        assert len(self.release_resources) == 1, "release_resources for MediaReleaseTrie task must contain single resource: trie resource type"

        trie_resource = self.release_resources[0]
        urls_resource = trie_resource.plain_resource_type

        urls_path = self.abs_path(urls_resource.basename)

        all_urls = []
        with codecs.open(urls_path, 'a', encoding='utf-8') as out_file:
            for tld, query, trie_data in self._generate_trie_data():
                assert isinstance(trie_data, dict), "Trie data must be a dictionary"
                all_urls.extend(trie_data.values())
                out_file.write(u'#query\t{}\t{}\t{}\n'.format(tld, query, json.dumps(trie_data)))

        has_changes = self._update_resource(urls_resource, path=urls_path)

        if not has_changes:
            return 0

        if self.ctx[CommitFastCrawlFileParameter.name]:
            self._commit_fixed_crawl_urls(all_urls)

        indexer_tool = self._tool(resource_types.VIDEO_QUERYDATAINDEXER_EXECUTABLE)
        trie_path = self.abs_path(trie_resource.basename)

        process.run_process([
            indexer_tool,
            "-S", self.ctx[TrieNamespaceParameter.name],
            "-N", "tld,exacturl",
            "-J", "-j",
            "-f", str(self.ctx[KeySizeParameter.name]) + "," + str(self.ctx[ValueSizeParameter.name]),
            "-i", urls_path,
            "-o", trie_path], outputs_to_one_file=False, log_prefix="indexer")

        viewer_tool = self._tool(resource_types.VIDEO_QUERYDATAVIEWER_EXECUTABLE)
        process.run_process([
            viewer_tool,
            "-H",
            "-i", trie_path], outputs_to_one_file=False, log_prefix="viewer")

        self.create_resource(self.descr, trie_path, trie_resource)

        return os.stat(trie_path).st_size

    def _update_resource(self, resource_type, skynet_id=None, path=None):
        result = mediaban.VideoBaseReleaseBanTask._update_resource(self, resource_type, skynet_id=skynet_id, path=path)
        if result:
            self.release_resources = self.release_resources + (resource_type,)
        return result

    def _commit_fixed_crawl_urls(self, all_urls):
        fast_crawl_urls_path = self.ctx[FastCrawlUrlsPathParameter.name]
        fast_crawl_urls_svn_path = os.path.dirname(fast_crawl_urls_path)
        local_fast_crawl_urls_dir = self.abs_path(os.path.basename(fast_crawl_urls_svn_path))
        local_fast_crawl_urls_path = os.path.join(local_fast_crawl_urls_dir, os.path.basename(fast_crawl_urls_path))
        Arcadia.checkout(fast_crawl_urls_svn_path, local_fast_crawl_urls_dir)

        with open(local_fast_crawl_urls_path, 'w') as out_file:
            for url in all_urls:
                out_file.write('%s\n' % url)

        comment = 'commit form sb task:{}:{}'.format(self.type, self.id)
        Arcadia.commit(local_fast_crawl_urls_path, comment, 'zomb-sandbox-rw')
