import logging
import os

from sandbox.sandboxsdk import errors
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk.svn import Arcadia

from sandbox.projects import resource_types
from sandbox.projects.common.mediasearch import ban as mediaban
from sandbox.projects.common.nanny import auto_deploy
from sandbox.projects.VideoReleaseVideoban import GrimholdTable
from sandbox.projects.VideoReleaseVideoban import GrimholdYtLine
from sandbox.projects.VideoReleaseVideoban import is_image_url
from sandbox.projects.VideoReleaseVideoban import get_host


class YTServerParameter(parameters.SandboxStringParameter):
    name = "yt_server"
    description = "YT server"
    required = True
    default_value = "arnold"


class YtSoft404Table(parameters.SandboxStringParameter):
    name = "yt_soft_404_ban_table"
    description = "YT soft_404 ban table"
    required = True
    default_value = "//home/videoindex/deletes/videoban.filtered"


class YtVhLicense404Table(parameters.SandboxStringParameter):
    name = "yt_vh_license_404_ban_table"
    description = "YT VH license ban table"
    required = True
    default_value = "//home/videoindex/vhs/docbase/dynamic/old_licenses/current"


class YtManualBanTable(parameters.SandboxStringParameter):
    name = "yt_manual_ban_table"
    description = "YT manual ban table"
    required = True
    default_value = "//home/videoindex/deletes/videoban.manual"


class UseYtVhLicense404Table(parameters.SandboxBoolParameter):
    name = "use_yt_vh_license_404_ban_table"
    description = "Use YT VH license ban table"
    required = True
    default_value = False


def cut_https_scheme(url):
    if url.startswith('https://'):
        return url[8:]
    return url


def format_soft_404_ban(line):
    url, status = line.strip().split('\t', 2)
    # if status == "removed":
    #    return None
    # if url.find('/') == len(url) - 1:
    #    return None
    return "%s\n" % (cut_https_scheme(url))


def format_manual_ban(line):
    url, _ = line.strip().split('\t', 2)
    return "%s\n" % (cut_https_scheme(url))


def format_grimhold_yt_ban(line):
    ban = GrimholdYtLine(line)
    if is_image_url(ban.url) or get_host(ban.url).endswith('.yandex.ru'):
        return None
    if ban.tld == '*' and ban.region == '0' and ban.bantype == 'url' and ban.sourceban == 'manban' and not ban.ban_stub_url:
        return ban.url + '\n'
    return ''


def format_url_ban(line):
    return line


class VideoReleaseVideobanHashes(auto_deploy.AutoNannyDeployTask, mediaban.VideoBaseReleaseBanTask):
    """
        Builds videoban.soft404.vec for Yandex.Video service
    """

    type = "VIDEO_RELEASE_VIDEOBAN_HASHES"

    input_parameters = (
        YTServerParameter,
        YtSoft404Table,
        YtManualBanTable,
        YtVhLicense404Table,
        UseYtVhLicense404Table,
        GrimholdTable,
    ) + mediaban.VideoBaseReleaseBanTask.input_parameters

    release_subject = "video/middle/videoban-hashes-{timestamp}"
    release_comment = "video fast videoban.soft404.vec"
    release_resources = (
        resource_types.VIDEO_MIDDLESEARCH_VIDEOBAN_SOFT404_HASH_VEC,
    )

    def get_stable_services(self):
        return [self._SERVICE_ID]

    def get_nanny_oauth_token(self):
        return self.get_vault_data('VIDEO-ROBOT', 'robot-video-crawl-nanny-oauth')

    def _build_ban(self):
        # robot_soft_404.urls
        soft_404_urls_path = self.abs_path(resource_types.VIDEO_MIDDLESEARCH_SOFT_404_BAN.basename)
        soft_404_urls_path_unsorted = soft_404_urls_path + ".unsorted"

        self._read_yt_table(self.ctx[YtSoft404Table.name], soft_404_urls_path_unsorted, append=True, format_func=format_soft_404_ban)

        if self.ctx[YtManualBanTable.name]:
            self._read_yt_table(self.ctx[YtManualBanTable.name], soft_404_urls_path_unsorted, append=True, format_func=format_manual_ban)

        # yt vh license 404 ban urls
        if self.ctx[UseYtVhLicense404Table.name]:
            self._read_yt_table(self.ctx[YtVhLicense404Table.name], soft_404_urls_path_unsorted, append=True, format_func=format_soft_404_ban)

        # grimhold
        self._read_yt_table(self.ctx[GrimholdTable.name], soft_404_urls_path_unsorted, append=True, format_func=format_grimhold_yt_ban,
                            format='<columns=[Target;Domain;Region;DocType;BanType;Oblivion;Link]>schemaful_dsv')

        # url_ban.lst
        url_ban_path = self.abs_path(resource_types.VIDEO_MIDDLESEARCH_URL_BAN.basename)
        svn_url_ban_path = url_ban_path + ".svn"
        svn_url_ban_arcadia_url = 'arcadia:/arc/trunk/arcadia/yweb/webscripts/video/index/config/urlban.lst'
        Arcadia.export(svn_url_ban_arcadia_url, svn_url_ban_path)
        self._format_file(svn_url_ban_path, soft_404_urls_path_unsorted, append=True, format_func=format_url_ban)

        self._sort_file(soft_404_urls_path_unsorted, soft_404_urls_path)
        has_changes = self._update_resource(resource_types.VIDEO_MIDDLESEARCH_SOFT_404_BAN, path=soft_404_urls_path)

        if not has_changes:
            return 0

        url2fastban_tool = self._tool(resource_types.VIDEO_URL2FASTBAN_EXECUTABLE)
        soft_404_vec_path = self.abs_path(resource_types.VIDEO_MIDDLESEARCH_VIDEOBAN_SOFT404_HASH_VEC.basename)
        process.run_process([
            url2fastban_tool,
            "makeHashes",
            "-i", soft_404_urls_path,
            "-o", soft_404_vec_path], outputs_to_one_file=False, log_prefix="url2fastban_hashes")
        self.create_resource(self.descr, soft_404_vec_path, resource_types.VIDEO_MIDDLESEARCH_VIDEOBAN_SOFT404_HASH_VEC)

        return os.stat(soft_404_vec_path).st_size

    def _read_yt_table(self, table_path, dst_path, append, format_func, format="yamr"):
        yt_exists = self._yt("exists", table_path)
        if yt_exists.wait():
            raise errors.SandboxTaskFailureError("Failed to check existence %s" % (table_path))

        exists = False
        with open(yt_exists.stdout_path, 'r') as f:
            exists = str.strip(f.readline()) == "true"

        if exists:
            yt_read = self._yt("read", "--format", format, table_path)
            if yt_read.wait():
                raise errors.SandboxTaskFailureError("Failed to read %s" % (table_path))
            self._format_file(yt_read.stdout_path, dst_path, format_func=format_func, append=append)

    def _yt(self, cmd, *args):
        yt_tool = self._tool(resource_types.VIDEO_YT_PYTHON_EXECUTABLE)
        yt_args = (
            yt_tool,
            "--proxy", self.ctx[YTServerParameter.name],
        )

        return process.run_process(
            yt_args + (cmd,) + args,
            environment={"YT_TOKEN": self.get_vault_data('VIDEODEV', 'yt_token')},
            outputs_to_one_file=False,
            log_prefix="yt.{}".format(cmd),
            timeout=2000,
            wait=False
        )

    def _update_service(self, service_id):
        logging.info("_update_service: {}".format(str(service_id)))
        auto_deploy.AutoNannyDeployTask._update_service(self, service_id)

    def _sort_file(self, in_filename, out_filename):
        with open(in_filename, 'r') as in_file, open(out_filename, 'w') as out_file:
            lines = in_file.readlines()
            lines.sort()
            out_file.writelines(lines)


__Task__ = VideoReleaseVideobanHashes
