import os
import logging

from sandbox.sandboxsdk import process
from sandbox.sandboxsdk import parameters

from sandbox.projects import resource_types
from sandbox.projects.common.mediasearch import ban as media_ban
from sandbox.projects.images.bans import resources as images_bans_resources


class MrServerParameter(parameters.SandboxStringParameter):
    name = "mr_server"
    description = "Mapreduce server"
    required = True
    default_value = "banach"


class MrPrefixParameter(parameters.SandboxStringParameter):
    name = "mr_prefix"
    description = "Mapreduce table prefix"
    required = True
    default_value = "images"


class MrPoolParameter(parameters.SandboxStringParameter):
    name = "mr_pool"
    description = "Mapreduce pool"
    required = False
    default_value = "images_bases"


class ImagesReleaseRottenUrlBan(media_ban.ImagesBaseReleaseBanTask):
    """
        Builds rotten ban resources for Yandex.Images service
    """

    type = "IMAGES_RELEASE_ROTTEN_URL_BAN"
    execution_space = 60 * 1024

    input_parameters = (
        MrServerParameter,
        MrPrefixParameter,
        MrPoolParameter,
    ) + media_ban.ImagesBaseReleaseBanTask.create_input_parameters(max_total_size=3000, enable_semaphore=True)

    release_subject = "images/middle/rottenurl-data-{timestamp}"
    release_comment = "daily images ban file (rotten urls)"

    ban_resource = images_bans_resources.IMAGES_MIDDLESEARCH_ROTTEN_URLS_BLOOM_FILTER
    version_resource = images_bans_resources.IMAGES_MIDDLESEARCH_ROTTEN_URLS_BAN_VERSION
    release_resources = (ban_resource, version_resource)
    push_signal_name = "rotten_url_ban"

    def _build_ban(self):
        # check
        version_path = self.abs_path("rotten-urls.version")
        with open(version_path, "w") as version_file:
            self.__ban404("check", version_file)

        if self._skip_build(version_path, self.version_resource):
            logging.info("Nothing to release")
            return 0

        self._register_ban(self.descr, version_path, self.version_resource)

        # build
        urls_path = self.abs_path("rotten.urls")
        with open(urls_path, "w") as urls_file:
            self.__ban404("download", urls_file)

        hasher_tool = self._tool(images_bans_resources.IMAGES_URL2FASTBAN_EXECUTABLE)

        bloom_path = self.abs_path(self.ban_resource.basename)

        def _newline_count(f, size=64*1024):
            while True:
                b = f.read(size)
                if not b:
                    break
                yield b.count("\n")

        with open(urls_path, "rb") as f:
            urls_count = sum((i for i in _newline_count(f)), 0)
        process.run_process([
            hasher_tool,
            "--mode", "bloom",
            "--url-type", "url",
            "--ban-reason", "common",
            "--ban-type", "url",
            "--input", urls_path,
            "--element-count", str(urls_count),
            "--output", bloom_path,
            ], log_prefix="hasher")

        self._register_ban(self.descr, urls_path, resource_types.OTHER_RESOURCE)
        self._register_ban(self.descr, bloom_path, self.ban_resource)
        self._set_release_signal()

        return os.stat(bloom_path).st_size

    def _test_ban(self, build_task_id):
        return [self._test_task(build_task_id, self.ban_resource, max_removal_delta=0.6)]

    def __ban404(self, cmd, output_file, *args):
        ban404_tool = self._tool(images_bans_resources.IMAGES_BAN404_EXECUTABLE)
        environment = {
            "MR_RUNTIME": "YT",
            "YT_PREFIX": "//home/",
            "MR_TMP": "//tmp",
            "YT_USE_YAMR_STYLE_PREFIX": "1",
            "YT_USE_YAMR_DEFAULTS": "1",
            "YT_TOKEN": self.get_vault_data("IMAGES-BAN", "yt_token"),
        }
        mr_pool = self.ctx[MrPoolParameter.name]
        if mr_pool:
            environment["YT_POOL"] = mr_pool
        process.run_process(
            [
                ban404_tool,
                cmd,
                "--server", self.ctx[MrServerParameter.name],
                "--ban404-prefix", self.ctx[MrPrefixParameter.name],
            ],
            environment=environment,
            stdout=output_file, outputs_to_one_file=False, log_prefix="ban404.{}".format(cmd)
        )


__Task__ = ImagesReleaseRottenUrlBan
