# -*- coding: utf-8 -*-
import datetime as dt
import logging
import os
import shutil
import subprocess
import json
import time


import sandbox.common.errors as cerr
import sandbox.common.types.client as ctc
import sandbox.common.types.resource as ctr
import sandbox.common.format as common_format

from sandbox.projects.common.nanny import nanny
from sandbox.projects.common import binary_task
import sandbox.projects.common.dynamic_models.compare as models_compare
import sandbox.projects.common.dynamic_models.const as models_const
import sandbox.projects.common.dynamic_models.archiver as models_archiver
import sandbox.projects.common.dynamic_models.download as models_download
import sandbox.projects.common.dynamic_models.merge as models_merge
import sandbox.projects.common.dynamic_models.bindings.prepare as models_prepare
import sandbox.projects.common.dynamic_models.resources as models_res
import sandbox.projects.common.dynamic_models.utils as models_utils
import sandbox.projects.common.file_utils as fu
import sandbox.projects.common.search.bugbanner2 as bb2
import sandbox.projects.release_machine.mixins.build as rm_build_mixin
import sandbox.projects.resource_types as resource_types
from sandbox.projects.neural_net.resources import resources as nn_resources
from sandbox.projects.common.arcadia import sdk as arcadiasdk
from sandbox.projects.common.constants import constants as sdk_constants
import sandbox.sdk2 as sdk2
from sandbox.sdk2.vcs.svn import Arcadia
from sandbox.projects.websearch.basesearch.resources import DYNAMIC_MODELS_ARCHIVE_L1
from sandbox.projects.websearch.basesearch.resources import DYNAMIC_MODELS_ARCHIVE_SHMICK_BASE


class BuildWebDynamicModels(binary_task.LastBinaryTaskRelease, rm_build_mixin.ComponentReleaseTemplate, bb2.BugBannerTask, nanny.ReleaseToNannyTask2):
    """
        This will be automatically build and deployed to sandbox after commit.
        Check CI release flow for more info
        https://a.yandex-team.ru/projects/relev_tools/ci/releases/timeline?dir=sandbox%2Fprojects%2Fwebsearch%2Fdynamic_models%2FBuildWebDynamicModels&id=deploy_binary_task

        For testing purposes deploy binary to sandbox as a testing version.
        - cd arcadia/sandbox/projects/websearch/dynamic_models/BuildWebDynamicModels/bin
        - ya make -t
        - ./bin upload --attr author=$(whoami) --attr released=testing --attr task_type=BUILD_WEB_DYNAMIC_MODELS
    """
    class Requirements(sdk2.Task.Requirements):
        client_tags = ctc.Tag.Group.LINUX
        disk_space = 50 * 1024  # 50 Gb see RMINCIDENTS-119
        ram = 16 * 1024

    class Parameters(sdk2.Task.Parameters):
        arc_token_owner = sdk2.parameters.String("Group owner of arc token", default="WEB_RELEVANCE", required=True, multiline=False)
        models_torrent = sdk2.parameters.String("Download beta models (rbtorrent)", required=False)
        models_fml_names_torrent = sdk2.parameters.String(
            "Download beta models (rbtorrent with file_name : url)",
            required=False
        )
        paths_to_exclude = sdk2.parameters.String("Paths to exclude from archive, one per line", required=False, multiline=True)

        models_fml_urls = sdk2.parameters.String("Download beta models (file_name : url)", required=False, multiline=True)
        with sdk2.parameters.String("Models for") as models_for:
            models_for.values["mmeta"] = models_for.Value(value="mmeta", default=True)
            models_for.values["base"] = models_for.Value(value="base")
            models_for.values["l1_new_base"] = models_for.Value(value="l1_new_base")
        with sdk2.parameters.Group("Checks") as checks:
            check_archive_size = sdk2.parameters.Bool("Check archive size", default=True)
            strict_check_formula_id = sdk2.parameters.Bool("Checks that all formulas contain ID", default=True)
            check_slices = sdk2.parameters.Bool("Check slices", default=True)
            with check_slices.value[True]:
                fail_without_slices = sdk2.parameters.Bool("Fail on any formula without slices", default=True)
        with sdk2.parameters.Group("Modifications") as modifications:
            deduplicate = sdk2.parameters.Bool("Turn on file-wise deduplication", default=True)
            save_unpacked_archive = sdk2.parameters.Bool("Save unpacked archive", default=False)
            not_to_merge_file_threshold = sdk2.parameters.Integer(
                "For unpacked archive set the threshold file size not to merge (in bytes)", default=40 * 1024 * 1024  # 40 MB
            )
        with sdk2.parameters.Group("Helper resources") as helper_resources:
            mx_ops = sdk2.parameters.Resource(
                'Mx ops', resource_type=resource_types.MX_OPS_EXECUTABLE, required=False
            )
            archiver_tool = sdk2.parameters.Resource(
                'Archiver tool', resource_type=resource_types.ARCHIVER_TOOL_EXECUTABLE, required=False
            )
        tasks_archive_resource = binary_task.binary_release_parameters(stable=True)
        arcadia_url = sdk2.parameters.ArcadiaUrl(
            "Arcadia url",
            default_value='arcadia-arc:/#trunk',
        )
        arcadia_patch = sdk2.parameters.String(
            "Apply patch (diff file rbtorrent, paste.y-t.ru link or plain text or arc:id for review). Doc: https://nda.ya.ru/3QTTV4",
            multiline=True,
            default="",
        )

    class Context(sdk2.Task.Context):
        packed_resource_id = 0
        unpacked_resource_id = 0
        diff = {}
        models = []
        used_exp_models_url = "-"

    def on_enqueue(self):
        super(BuildWebDynamicModels, self).on_enqueue()
        packed_res_type = self.get_resource_type()
        res = packed_res_type(self, self.archive_resource_descr, "models.archive")
        self.Context.packed_resource_id = res.id

    def on_execute(self):
        binary_task.LastBinaryTaskRelease.on_execute(self)
        self.add_bugbanner(bb2.Banners.Formulas)
        archive_path = str(sdk2.ResourceData(sdk2.Resource[self.Context.packed_resource_id]).path)
        archiver_tool_path = self.get_archiver_tool()
        try:
            self.Context.models = self.create_models_archive(archiver_tool_path, archive_path)
        except models_utils.ModelsError as e:
            raise cerr.TaskFailure(str(e))
        self.compare_archives(archiver_tool_path, archive_path)
        self.check_size(archive_path)

    def do_unified_build(self, archiver_tool_path, archive_path):
        target = 'search/runtime_archives/builder'
        binary_name = 'builder'
        output_directory = 'build_output'
        token = sdk2.Vault.data(self.Parameters.arc_token_owner, name='ARC_TOKEN')
        with arcadiasdk.mount_arc_path(self.Parameters.arcadia_url, use_arc_instead_of_aapi=True, arc_oauth_token=token) as arc_root:
            patch = None
            if self.Parameters.arcadia_patch:
                patch = self.Parameters.arcadia_patch
                path_to_patch = sdk2.svn.Arcadia.apply_patch(arc_root, patch, self.path())
                if patch.startswith("arc:"):
                    # When applying (arc:review-id) use zipatch  downloaded from Arcanum
                    patch = path_to_patch
            logging.info('RuntimeArchivesBuilder: start building tool')
            arcadiasdk.do_build(
                build_system=sdk_constants.SEMI_DISTBUILD_BUILD_SYSTEM,
                source_root=arc_root,
                targets=[target],
                results_dir=output_directory,
                clear_build=False,
                patch=patch,
            )
            logging.info('RuntimeArchivesBuilder: done building tool')
            builder_path = os.path.join(os.curdir, output_directory, target, binary_name)
            build_type_str = None
            if self.Parameters.models_for == 'mmeta':
                build_type_str = 'MiddleSearch'
            if self.Parameters.models_for == 'base':
                build_type_str = 'BaseSearch'
            if self.Parameters.models_for == 'l1_new_base':
                build_type_str = 'IntL1'
            if self.Parameters.models_for == 'shmick_base':
                build_type_str = 'ShmickBase'

            meta = {
                'experiment': self.Context.used_exp_models_url,
                'sandbox_task': str(self.id),
                'sandbox_resource_id': str(self.Context.packed_resource_id),
                'time': time.strftime('%Y-%m-%d %H:%M:%S')
            }
            meta_str = json.dumps(meta)

            call_args = [
                builder_path,
                "--arc-folder-path", arc_root,
                "--svn-bin-path", "svn",
                "--archiver-bin-path", archiver_tool_path,
                "--output", archive_path,
                "--build-type", build_type_str,
                "--meta-content", meta_str
            ]

            if self.Parameters.models_torrent:
                merge_with_directory = os.path.abspath("local_torrent_models")
                models_download.sky(merge_with_directory, self.Parameters.models_torrent)
                call_args.extend(["--merge-with-directory", merge_with_directory])
            fml_urls = []
            if self.Parameters.models_fml_names_torrent:
                fml_names_file = models_download.sky("fml_names", self.Parameters.models_fml_names_torrent)[0]
                fml_urls.extend(models_download.parse_fml_urls(fu.read_file(fml_names_file)))
            fml_urls.extend(models_download.parse_fml_urls(self.Parameters.models_fml_urls))
            if fml_urls:
                for name, url in fml_urls:
                    call_args.extend(["--fml-files-to-download", "{}:{}".format(name, url)])
            paths_to_exclude = self.Parameters.paths_to_exclude.split('\n')
            if paths_to_exclude:
                for p in paths_to_exclude:
                    call_args.extend(["--path-to-exclude", p])

            logging.info('RuntimeArchivesBuilder: start building archive')
            subprocess.check_call(call_args)
            logging.info('RuntimeArchivesBuilder: done building archive')

    def create_unpacked_archive_resource(self, models_dir, archiver_tool_path, archive_path):
        # Get all necessary paths for splitting unpacked archive.
        # If file is big enough, it is another file in archive,
        # if it is not, get them into one models.archive. As most models are small and dssm models are big.
        # TODO(danlark@) Try to think of better partition when bermud@'s team will delete 5k files from the archive.
        # It is mostly done for faster release cycle as the same files are not loaded with skynet twice.
        # And big files according to statistics do not often change.
        small_models_dir = os.path.join(models_dir, 'small_models')
        os.mkdir(small_models_dir)
        for file_path in models_utils.walk_files(os.path.normpath(models_dir), recursive=False):
            if os.stat(file_path).st_size <= self.Parameters.not_to_merge_file_threshold:
                os.rename(file_path, os.path.join(small_models_dir, os.path.basename(file_path)))
        models_archiver.create(
            archiver_tool_path,
            os.path.join(models_dir, os.path.basename(archive_path)),
            self.Parameters.deduplicate,
            small_models_dir
        )
        shutil.rmtree(small_models_dir)
        self._remove_empty_dirs(models_dir)

        unpacked_res_type = self.get_resource_type(packed=False)
        res = unpacked_res_type(self, "Unpacked " + self.archive_resource_descr, models_dir)
        self.Context.unpacked_resource_id = res.id

    def create_models_archive(self, archiver_tool_path, archive_path):
        mx_ops_path = self.get_mx_ops()
        models_dir = os.path.abspath("all_models")
        self.do_unified_build(archiver_tool_path, archive_path)
        subprocess.check_call([archiver_tool_path, '-u', '-d', models_dir, archive_path])
        models_prepare.prepare(
            mx_ops_path,
            models_dir,
            production=False,
            check_l3=self.Parameters.models_for == "mmeta",
            check_id=self.Parameters.strict_check_formula_id,
            check_slices=self.Parameters.check_slices,
            meta=None,
            fail_without_slices=self.Parameters.fail_without_slices
        )
        if self.Parameters.save_unpacked_archive:
            self.create_unpacked_archive_resource(models_dir, archiver_tool_path, archive_path)
        return models_archiver.get_list(archiver_tool_path, archive_path)

    @staticmethod
    def _remove_empty_dirs(models_dir):
        rm_counts = 1
        while rm_counts:
            rm_counts = 0
            for root, dir_names, file_names in os.walk(models_dir, topdown=False):
                if not dir_names and not file_names:
                    rm_counts += 1
                    logging.debug("Remove empty dir %s", root)
                    os.rmdir(root)

    def compare_archives(self, archiver, new_archive_path):
        last_released_archive = sdk2.Resource.find(
            type=self.get_resource_type(),
            attrs={"released": "stable"}
        ).order(-sdk2.Resource.id).first()
        if not last_released_archive:
            self.set_info(
                u'Diff was not calculated because last_release_archive was not found.\n',
                do_escape=False,
            )
            return
        last_released_archive_path = str(sdk2.ResourceData(last_released_archive).path)
        self.set_info(
            u'Diff calculated: "current" vs "last released".\n'
            u'Last released resource: {}'.format(last_released_archive.id),
            do_escape=False,
        )
        diff = models_compare.compare_archives(archiver, last_released_archive_path, new_archive_path)
        self.Context.diff = diff

    def check_size(self, archive_path):
        archive_size = os.path.getsize(archive_path)
        archive_size_hr = common_format.size2str(archive_size)
        if self.Parameters.models_for == "base":
            size_limit = models_const.BASE_ARCHIVE_SIZE_LIMIT
        else:
            size_limit = models_const.ARCHIVE_SIZE_LIMIT
        archive_size_limit_hr = common_format.size2str(size_limit)
        self.set_info("Archive size: {}".format(archive_size_hr))
        logging.info("Archive size limit: %s", archive_size_limit_hr)
        if self.Parameters.check_archive_size and archive_size > size_limit:
            raise cerr.TaskFailure("Archive is too big: {} > {}".format(archive_size_hr, archive_size_limit_hr))

    def get_mx_ops(self):
        if self.Parameters.mx_ops:
            mx_ops = self.Parameters.mx_ops
        else:
            mx_ops = sdk2.Resource.find(
                resource_type=resource_types.MX_OPS_EXECUTABLE,
                state=ctr.State.READY,
            ).first()
        return str(sdk2.ResourceData(mx_ops).path)

    def get_archiver_tool(self):
        if self.Parameters.archiver_tool:
            archiver_tool = self.Parameters.archiver_tool
        else:
            archiver_tool = sdk2.Resource.find(
                resource_type=resource_types.ARCHIVER_TOOL_EXECUTABLE,
                state=ctr.State.READY,
            ).first()
        return str(sdk2.ResourceData(archiver_tool).path)

    def get_resource_type(self, packed=True):
        if self.Parameters.models_for == "base":
            return resource_types.DYNAMIC_MODELS_ARCHIVE_BASE if packed else models_res.DynamicModelsArchiveBaseUnpacked
        elif self.Parameters.models_for == "l1_new_base":
            return DYNAMIC_MODELS_ARCHIVE_L1 if packed else models_res.DynamicModelsArchiveUnpacked
        elif self.Parameters.models_for == "shmick_base":
            return DYNAMIC_MODELS_ARCHIVE_SHMICK_BASE if packed else models_res.DynamicModelsArchiveUnpacked
        else:  # mmeta or all
            return resource_types.DYNAMIC_MODELS_ARCHIVE if packed else models_res.DynamicModelsArchiveUnpacked

    @property
    def archive_resource_descr(self):
        description = "Dynamic models archive ({})".format(dt.datetime.now().strftime("%Y-%m-%d"))

        experiment = self._cut_svn_url(self.Context.used_exp_models_url)
        if experiment:
            description += " (exp {0})".format(experiment)

        return description

    @staticmethod
    def _cut_svn_url(svn_url):
        svn_url = svn_url or ""
        return "/".join(svn_url.rsplit("/", 2)[1:])

    @property
    def footer(self):
        return models_compare.generate_diff_footer(self.Context.diff)

    def on_release(self, additional_parameters):
        sdk2.Task.on_release(self, additional_parameters)
        nanny.ReleaseToNannyTask2.on_release(self, additional_parameters)
