# -*- coding: utf-8 -*-

import os

from sandbox import sdk2
from sandbox.projects.common import binary_task
from sandbox.projects.irt.common import create_irt_data

LANG_RU = "ru"
LANG_TR = "tr"


class BmgenWord2SnormDict(binary_task.LastBinaryTaskRelease, sdk2.Task):
    """
    Generate snormalization dict for Bannerland + result wordforms-norm-snorm dict for BMApi.
    """

    class Parameters(sdk2.Parameters):
        yt_token_vault_name = sdk2.parameters.String(
            "YT token vault name",
            default="yql_robot_bm_admin",
        )
        yt_export_path = sdk2.parameters.String(
            "YT export path for result word_norm_snorm dict on Hahn and Arnold clusters",
            default="//tmp/bl_word2snorm_dict",
        )
        ext_params = binary_task.binary_release_parameters(stable=True)

    def on_execute(self):
        import irt.iron.options as iron_opts
        from irt.bmgen.word2snorm_dict import get_full_snorm_dict, get_output_snorm_dict, update_bs_export_token_info
        from irt.sandbox import Sandbox
        from irt.utils import TemporaryDirectory

        sb_client = Sandbox(sdk2.Vault.data("sandbox_oauth_token"))
        if not sb_client.new_bmgen_resource_needed("snorm_dict"):
            return
        input_resources = sb_client.not_released_bmgen_resources(("freq_wordforms", "norm_dict", "synonyms"))
        if input_resources is None:
            return

        work_norm_path = iron_opts.get("work_norm_path")
        with TemporaryDirectory() as tmp_dir:
            src_path = sb_client.download(input_resources["synonyms"]["id"], tmp_dir)
            synonyms_paths = {lang: os.path.join(tmp_dir, src_path[0], work_norm_path, "syn_cells_{}".format(lang)) for lang in [LANG_RU, LANG_TR]}

            src_path = sb_client.download(input_resources["norm_dict"]["id"], tmp_dir)
            norm_dict_path = os.path.join(tmp_dir, src_path[0], work_norm_path, "norm_dict")

            src_path = sb_client.download(input_resources["freq_wordforms"]["id"], tmp_dir)
            wordforms_path = os.path.join(tmp_dir, src_path[0], work_norm_path, "freq_wordforms")

            snorm_dict = get_full_snorm_dict(norm_dict_path, synonyms_paths)
            output_snorm_dict = get_output_snorm_dict(snorm_dict)

            snorm_dict_data = create_irt_data(
                self,
                "snorm_dict",
                "Словарь cнормализации для Bannerland-а",
                [os.path.join(work_norm_path, "word2snorm_dict")],
                ttl=30,
                check_task_mode=True,
            )
            with open(snorm_dict_data.filenames[0], "wb") as f:
                for word in sorted(output_snorm_dict):
                    f.write("{}\t{}\n".format(word, output_snorm_dict[word]))

            os.environ["YT_TOKEN"] = sdk2.Vault.data(self.Parameters.yt_token_vault_name)
            update_bs_export_token_info(wordforms_path, norm_dict_path, snorm_dict, self.Parameters.yt_export_path)
