import hashlib
import gzip
import logging
import os
import time

from sandbox.sandboxsdk import process
from sandbox.sandboxsdk import parameters

from sandbox.projects.common import apihelpers
from sandbox.projects.common.mediasearch import ban as media_ban
from sandbox.projects.images.bans import resources as images_bans_resources


_BUFFER_SIZE = 4096


class ImagesReleaseAntipirateThumbBan(media_ban.ImagesBaseReleaseBanTask):
    """
        Builds antipirate ban resources for Yandex.Images service
    """

    type = "IMAGES_RELEASE_ANTIPIRATE_THUMB_BAN"

    release_subject = "images/middle/antipiratethumb-data-{timestamp}"
    release_comment = "daily images ban file (antipirate)"
    push_signal_name = "antipirate_thumb_ban"

    ban_resource = images_bans_resources.THUMBS_BAN_QUERYSEARCH_TRIE
    version_resource = images_bans_resources.THUMBS_BAN_QUERYSEARCH_VERSION
    data_resource = images_bans_resources.THUMBS_BAN_GRIMHOLD_FILE
    release_resources = (ban_resource, version_resource, data_resource)

    input_parameters = media_ban.ImagesBaseReleaseBanTask.create_input_parameters(enable_semaphore=True)

    def _build_ban(self):
        # download and uncompress gzimhold data
        grimhold_data = self.abs_path(self.data_resource.basename)
        grimhold_hasher = hashlib.md5()

        grimhold_resource = apihelpers.get_last_resource("IMAGE_THUMB_BAN_SAFESEARCH_DATA")
        process.run_process(["sky", "get", grimhold_resource.skynet_id], log_prefix="grimhold_sky_get")

        with open(grimhold_data, "r") as grimhold_data_file:
            while True:
                data = grimhold_data_file.read(_BUFFER_SIZE)
                if not data:
                    break
                grimhold_hasher.update(data)

        released_data = apihelpers.get_last_released_resource(self.data_resource)
        if not self.ctx.get(media_ban.ForceBuildParameter.name):
            if released_data and released_data.file_md5 == grimhold_hasher.hexdigest():
                logging.info("No new data found, exiting...")
                return 0

        self._register_ban(self.descr, grimhold_data, self.data_resource)

        # url2fastban
        thumbs_path = self.abs_path("thumbs.txt")
        hasher_tool = self._tool(images_bans_resources.IMAGES_URL2FASTBAN_EXECUTABLE)

        process.run_process([
            hasher_tool,
            "--mode", "thumb-lines",
            "--input", grimhold_data,
            "--output", thumbs_path], outputs_to_one_file=False, log_prefix="hasher")

        # querydata_indexer
        trie_path = self.abs_path(self.ban_resource.basename)
        version_path = self.abs_path(self.version_resource.basename)
        indexer_tool = self._tool(images_bans_resources.IMAGES_QUERYDATAINDEXER_EXECUTABLE)

        process.run_process([
            indexer_tool,
            "--no-keywords",
            "--data-namespace", "thumbs",
            "--key-semantics", "none",
            "--local-input", thumbs_path,
            "--output", trie_path], outputs_to_one_file=False, log_prefix="indexer")
        self._register_ban(self.descr, trie_path, self.ban_resource)

        with open(version_path, "w") as version_file:
            version_file.write(str(int(time.time())))
        self._register_ban(self.descr, version_path, self.version_resource)
        self._set_release_signal()

        return os.stat(trie_path).st_size

    def _test_ban(self, build_task_id):
        # TODO: create a special task for balancer
        return []


__Task__ = ImagesReleaseAntipirateThumbBan
