# -*- coding: utf-8 -*-

import cgi
import itertools
import logging
import os
import re

import sandbox.common.types.client as ctc

from sandbox.common.utils import size2str
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk import sandboxapi
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.environments import SvnEnvironment
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.task import SandboxTask

import sandbox.projects.common.dynamic_models.archiver as models_archiver
import sandbox.projects.common.dynamic_models.compare as models_compare
import sandbox.projects.common.dynamic_models.const as models_const
import sandbox.projects.common.dynamic_models.download as models_download
import sandbox.projects.common.dynamic_models.merge as models_merge
import sandbox.projects.common.dynamic_models.prepare as models_prepare
import sandbox.projects.common.dynamic_models.resources as models_res
import sandbox.projects.common.dynamic_models.utils as models_utils
from sandbox.projects import resource_types
from sandbox.projects.common import apihelpers
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import link_builder as lb
from sandbox.projects.common import time_utils as tu
from sandbox.projects.common import utils
from sandbox.projects.common.nanny import nanny
from sandbox.projects.release_machine import rm_notify


def is_middle_true(relpath):
    return True


def dynamic_models_archive_arcadia_info(task_ctx):
    logging.info("Release from trunk detected! Try to set next release sub-number ...")
    base = utils.get_or_default(task_ctx, ModelsForBase)
    res_type = resource_types.DYNAMIC_MODELS_ARCHIVE if not base else resource_types.DYNAMIC_MODELS_ARCHIVE_BASE
    last_releases = channel.sandbox.list_releases(
        resource_type=res_type, limit=5, release_status=sandboxapi.RELEASE_STABLE,
    )
    r_item = 'basesearch_models' if task_ctx.get(ModelsForBase.name) else 'middlesearch_models'
    logging.debug("Last 5 releases:\n%s", "\n".join([r.subject for r in last_releases]))
    # mark could be like ('115', '', '') or like ('164', '-', '7')"
    release_id_re = re.compile(r'[^-]*-([0-9]+)(-|/r)?([0-9]*)')
    release_id = release_id_re.findall(r.subject)
    if not release_id:
        return "{}".format(r_item)
    release_marks = (release_id[0] for r in last_releases)
    max_existing_mark = max([(int(m[0]), int(m[2] or 0)) for m in release_marks])
    url = task_ctx.get('models_svn_url_production') or task_ctx.get('models_svn_url_experiment')
    release_num = re.search(r"stable-(\d+)", url)
    if not release_num:
        return "{}".format(r_item)
    new_release_num = int(release_num.group(1)) - max_existing_mark[0]
    tag_num = 1 if new_release_num else max_existing_mark[1] + 1
    return "{}: stable-{}-{}".format(r_item, release_num.group(1), tag_num)


def try_int(s):
    try:
        return int(s)
    except ValueError:
        return s


def parse_tag(name):
    return [try_int(part) for part in name.strip('/').split('-')]


def latest_production_tag():
    logging.info('Get latest production tag svn url')
    tags = Arcadia.list(models_const.MODELS_TAG_DIR_URL, as_list=True)
    eh.ensure(tags, "Can't find production tags")
    latest = max(tags, key=parse_tag)
    logging.info('Latest tag: {0}'.format(latest))
    return models_const.MODELS_TAG_DIR_URL + latest


def models_svn_url_revision(url):
    """
        :url: SVN URL, возможно с @revision
        :return: "url@revision", revision
    """
    parsed_url = Arcadia.parse_url(url)
    revision = parsed_url.revision
    if not revision or revision == "HEAD":
        revision = int(Arcadia.info(url)["commit_revision"])
    if revision:
        revision = int(revision)
    url = Arcadia.replace(url, revision=revision)
    return url, revision


def is_production_path(p):
    return p.endswith(".info") and not os.path.dirname(p) and models_utils.is_production(p)


MODELS_SOURCES_GROUP = "Models sources"


class ModelsForProduction(sp.SandboxBoolParameter):
    """
        Собирать ли продакшн формулы
    """
    name = 'production_models'
    description = 'Add production models from svn'
    default_value = True
    group = MODELS_SOURCES_GROUP


class ModelsUrlForProduction(sp.SandboxSvnUrlParameter):
    """
        SVN URL для продакшн формул
    """
    name = 'models_svn_url_production'
    description = 'Production models svn url (get latest released if empty)'
    default_value = ''
    required = False
    group = MODELS_SOURCES_GROUP


class ModelsForExperiment(sp.SandboxBoolParameter):
    """
        Собирать ли экспериментальные формулы
    """
    name = 'experiment_models'
    description = 'Add experiment models from svn'
    default_value = True
    group = MODELS_SOURCES_GROUP


class ModelsUrlForExperiment(sp.SandboxSvnUrlParameter):
    """
        SVN URL для экспериментальных формул
    """
    name = 'models_svn_url_experiment'
    description = 'Experiment models svn url (get latest if empty)'
    default_value = models_const.MODELS_EXPERIMENT_URL
    required = False
    group = MODELS_SOURCES_GROUP


class ModelsForBase(sp.SandboxBoolParameter):
    """
        Собирать формулы только для базового (L1+L2)
    """
    name = 'only_base_models'
    description = 'Use only L1+L2 models'
    default_value = False


class FilterBaseModelsForMiddle(sp.SandboxBoolParameter):
    """
        Не собирать модели для базового в среднем
    """
    name = 'filter_base_models_for_middle'
    description = 'Filter base models for middle'
    default_value = False


class ModelsUrls(sp.SandboxStringParameter):
    """
        Список URL'ов формул для бет
    """
    name = 'beta_models_links'
    description = 'Download beta models (file_name : url)'
    multiline = True


class ModelsTorrent(sp.SandboxStringParameter):
    """
        rbtorrent с папкой формул
    """
    name = 'models_torrent'
    description = 'Download beta models (rbtorrent)'
    multiline = False


class StrictMode(sp.SandboxBoolParameter):
    """
        Проверять ли что изменились продакшн модели
    """
    name = 'strict_mode'
    description = 'Fail if production models changed'
    default_value = False


class StrictFormulaIdChecks(sp.SandboxBoolParameter):
    """
        Сборка моделей для Video может проваливаться, если эта опция выставлена в True.
        Значение по умолчанию переопределено в Video тасках.
    """
    name = 'strict_check_formula_id'
    description = 'Checks that all formulas contain ID and throws exception if true.'
    default_value = True


class CheckProductionModelsList(sp.SandboxBoolParameter):
    """
        Сборка моделей для Video будет проваливаться, если эта опция выставлена в True.
        Значение по умолчанию переопределено в Video тасках.
    """
    name = 'check_production_models_list'
    description = 'Check that production models contain Hast, Wizard and Fast models.'
    default_value = True


class FailWithoutSlices(sp.SandboxBoolParameter):
    """
        Падать ли в случаe обнаружения формул без слайсов
    """
    name = 'fail_without_slices'
    description = 'Fail if formula without slices'
    default_value = True


class AnnotateSlices(sp.SandboxBoolParameter):
    """
        Аннотировать ли слайсы
    """
    name = 'annotate_slices'
    description = 'Annotate slices'
    default_value = True


class Deduplicate(sp.SandboxBoolParameter):
    """
        Use 'deduplicate' flag in archiver: turn on file-wise deduplication
    """
    name = 'deduplicate_formulas'
    description = 'Turn on file-wise deduplication'
    default_value = True


class SaveUnpackedArchive(sp.SandboxBoolParameter):
    """
        SEARCH-5858
    """
    name = 'save_unpacked_archive'
    description = 'Save unpacked archive'
    default_value = False


class CheckSize(sp.SandboxBoolParameter):
    """
        Allow not to check size for archives for betas. SEARCH-4863
    """
    name = 'check_archive_size'
    description = 'Check archive size (Always check for prod builds!)'
    default_value = True
    sub_fields = {
        "true" : ['max_archive_size']
    }


class MaxArchiveSize(sp.SandboxIntegerParameter):
    """
        Archive max size
    """
    name = 'max_archive_size'
    description = 'Max archive size'
    default_value = models_const.ARCHIVE_SIZE_LIMIT


class IncludeMappingFiles(sp.SandboxBoolParameter):
    """
        Allow not to check size for archives for betas. SEARCH-4863
    """
    name = 'include_mapping_files'
    description = 'Include mapping files'
    default_value = False


class SkipFormulas(sp.SandboxStringParameter):
    """
        Названия production формул, которые собирать не надо.
        Используется для того, чтобы тестировать выдачу с удалением "неиспользуемых" формул.
    """
    name = 'skip_prod_formula_names'
    description = 'Skip production formula names (separated by comma)'
    default_value = ''
    multiline = True


@rm_notify.notify2()
class BuildDynamicModels(nanny.ReleaseToNannyTask, SandboxTask):
    """
        Таск выкачивает модели из SVN по указанному URL, либо по HTTP ссылкам,
        и собрает скаченные модели в архив с помощью tools/archiver
    """

    type = 'BUILD_DYNAMIC_MODELS'

    execution_space = 50000  # 50 Gb, see RMINCIDENTS-119

    input_parameters = (
        ModelsForProduction,
        ModelsUrlForProduction,
        ModelsForExperiment,
        ModelsUrlForExperiment,
        ModelsForBase,
        FilterBaseModelsForMiddle,
        ModelsTorrent,
        ModelsUrls,
        StrictMode,
        CheckProductionModelsList,
        StrictFormulaIdChecks,
        FailWithoutSlices,
        AnnotateSlices,
        SkipFormulas,
        Deduplicate,
        SaveUnpackedArchive,
        CheckSize,
        MaxArchiveSize,
        IncludeMappingFiles,
    )

    environment = (SvnEnvironment(),)

    client_tags = ctc.Tag.Group.LINUX

    @property
    def footer(self):
        return models_compare.generate_diff_footer(
            self.ctx.get('diff'),
            self.ctx.get('latest_archive_release_info', {}).get('release_subject')
        )

    @staticmethod
    def _check():
        test_cases = {
            2054556: "arcadia:/robots/tags/ranking_models/stable-100",
            2891666: "arcadia:/arc/tags/begemot/stable-170-1",
            2252814: "arcadia:/arc/branches/base/stable-100",
        }
        for revision, url in test_cases.iteritems():
            found_revision = models_svn_url_revision(url)[1]
            eh.verify(
                revision == found_revision,
                (
                    "Found wrong revision {} for url '{}', {} expected. "
                    "Can't execute task with broken 'models_svn_url_revision'".format(found_revision, url, revision)
                )
            )

    def on_enqueue(self):
        self._check()
        SandboxTask.on_enqueue(self)
        self._set_model_urls("production", latest_production_tag())
        self._set_model_urls("experiment", models_const.MODELS_EXPERIMENT_URL)
        self.create_models_resource()

    @property
    def release_template(self):
        return self.ReleaseTemplate(subject=dynamic_models_archive_arcadia_info(self.ctx))

    def get_dynamic_models_archive_resource(self):
        out_resource_id = self.ctx.get('out_resource_id')
        if out_resource_id:
            out_resource = channel.sandbox.get_resource(out_resource_id)
        else:
            out_resource = self.create_models_resource()
        return out_resource

    def create_models_resource(self):
        logging.debug("Create models resources")
        resource_type = self.get_resource_type()
        description = self.get_dynamic_models_archive_resource_descr()
        out_resource = self.create_resource(description, 'models.archive', resource_type, arch='any')
        self.ctx['out_resource_id'] = out_resource.id
        self.ctx['out_resource_id_string'] = str(out_resource.id)
        return out_resource

    def get_dynamic_models_archive_resource_descr(self):
        description = 'Dynamic models archive ({})'.format(tu.date_ymd())

        production = self._cut_svn_url(self.ctx.get(ModelsUrlForProduction.name))
        if production:
            description += " (prod {0})".format(production)

        experiment = self._cut_svn_url(self.ctx.get(ModelsUrlForExperiment.name))
        if experiment:
            description += " (exp {0})".format(experiment)

        return description

    @staticmethod
    def _cut_svn_url(svn_url):
        svn_url = svn_url or ""
        return "".join(svn_url.rsplit("/", 2)[1:])

    @staticmethod
    def remove_empty_dirs(models_dir):
        rm_counts = 1
        while rm_counts:
            rm_counts = 0
            for root, dir_names, file_names in os.walk(models_dir, topdown=False):
                if not dir_names and not file_names:
                    rm_counts += 1
                    logging.debug("Remove empty dir %s", root)
                    os.rmdir(root)

    @staticmethod
    def remove_filtered_files(filtered_files):
        for filtered_file in filtered_files:
            logging.debug("Try to remove %s", filtered_file)
            os.remove(filtered_file)

    def filter_models(self, models_dir):
        if utils.get_or_default(self.ctx, ModelsForBase):
            model_type_filter = models_utils.is_base_model
        elif utils.get_or_default(self.ctx, FilterBaseModelsForMiddle):
            model_type_filter = models_utils.is_middle_model
        else:
            model_type_filter = is_middle_true
        filtered_files = self._get_waste_files(models_dir, model_type_filter)
        self.remove_filtered_files(filtered_files)
        self.remove_empty_dirs(models_dir)

    @staticmethod
    def _get_waste_files(models_dir, model_type_filter):
        models_dir = os.path.normpath(models_dir)
        filtered_files = set()
        for file_path in models_utils.walk_files(models_dir):
            relpath = os.path.relpath(file_path, models_dir)
            if not model_type_filter(relpath):
                filtered_files.add(file_path)
        return filtered_files

    def _skip_formulas(self, models_dir):
        files_to_skip = utils.get_or_default(self.ctx, SkipFormulas).replace(" ", "")
        if files_to_skip:
            files_to_skip = set(files_to_skip.split(","))
            logging.debug("Try to skip files: %s", files_to_skip)
        filtered_files = self._get_waste_files(models_dir, lambda p: os.path.basename(p) not in files_to_skip)
        self.remove_filtered_files(filtered_files)

    def _get_models(self, svn_dir, model_type):
        logging.info('Get %s models', model_type)
        dir_path = os.path.join(svn_dir, model_type)
        models_download.svn(dir_path, self.ctx[self._svn_url_key(model_type)], self.ctx[self._svn_rev_key(model_type)])
        return dir_path

    def _set_model_urls(self, model_type, default):
        svn_url_key = self._svn_url_key(model_type)
        url = self.ctx.get(svn_url_key) or default
        logging.debug("Url is {}".format(url))
        url, revision = models_svn_url_revision(url)
        self.ctx[svn_url_key] = url
        self.ctx[self._svn_rev_key(model_type)] = revision

    def _svn_url_key(self, model_type):
        return "models_svn_url_{}".format(model_type)

    def _svn_rev_key(self, model_type):
        return "models_svn_revision_{}".format(model_type)

    def models_from_svn(self):
        """
            Загрузить модели из svn

            :return: множество путей к корневым директориям с моделями
        """

        svn_dir = self.path('models_svn')
        models_dirs = set()

        add_production = utils.get_or_default(self.ctx, ModelsForProduction)
        if add_production:
            prod_dir = self._get_models(svn_dir, "production")
            self._skip_formulas(prod_dir)
            models_dirs.add(prod_dir)

        add_experiment = utils.get_or_default(self.ctx, ModelsForExperiment)
        if add_experiment:
            exp_dir = self._get_models(svn_dir, "experiment")
            models_dirs.add(exp_dir)

        return models_dirs

    def create_models_archive(self):
        """
            Загрузить нужные модели и создать архив
        """
        models_dir = self.path("all_models")

        # get latest mx_ops executable
        mxops_id = apihelpers.get_last_resource(resource_types.MX_OPS_EXECUTABLE)
        logging.info("Mx ops resource id: %s", mxops_id)
        mxops_path = channel.task.sync_resource(mxops_id)
        # get latest archiver executable
        archiver_id = apihelpers.get_last_resource(resource_types.ARCHIVER_TOOL_EXECUTABLE)
        logging.info("Archiver resource id: %s", archiver_id)
        archiver_path = channel.task.sync_resource(archiver_id)

        # get all svn models
        models_dirs = self.models_from_svn()
        models_dirs_ignore_conflicts = set()

        # get custom models from other sources
        torrent = self.ctx.get(ModelsTorrent.name)
        if torrent:
            upload_dir = self.path('models_skynet')
            models_dirs_ignore_conflicts.add(upload_dir)
            models_download.sky(upload_dir, torrent)

        fml_urls = models_download.parse_fml_urls(self.ctx.get(ModelsUrls.name))
        if fml_urls:
            upload_dir = self.path('models_download')
            models_dirs_ignore_conflicts.add(upload_dir)
            models_download.download_urls(upload_dir, fml_urls)

        for dir_with_models in models_dirs:
            models_merge.merge(models_dir, dir_with_models)

        for dir_with_models in models_dirs_ignore_conflicts:
            models_merge.merge(models_dir, dir_with_models, ignore_conflicts=True)

        self.filter_models(models_dir)

        meta_info = {
            'production': self.ctx.get(ModelsUrlForProduction.name),
            'experiment': self.ctx.get(ModelsUrlForExperiment.name),
            'sandbox_task': str(self.id),
            'sandbox_resource_id': str(self.get_dynamic_models_archive_resource().id),
        }
        if torrent:
            meta_info['models_torrent'] = torrent
        if fml_urls:
            meta_info['models_urls'] = fml_urls

        self.add_models_mapping(models_dir)
        models_prepare.prepare(
            mxops_path,
            models_dir,
            production=utils.get_or_default(self.ctx, CheckProductionModelsList),
            check_l3=not utils.get_or_default(self.ctx, ModelsForBase),
            check_id=utils.get_or_default(self.ctx, StrictFormulaIdChecks),
            check_slices=utils.get_or_default(self.ctx, AnnotateSlices),
            streams=models_const.PRODUCTION_STREAMS,
            meta=meta_info,
            fail_without_slices=self.ctx.get(FailWithoutSlices.name, FailWithoutSlices.default_value),
        )

        logging.debug("Create archive resource")
        # create archive resource
        archive_res = self.get_dynamic_models_archive_resource()
        logging.debug("Create archive")
        models_archiver.create(archiver_path, archive_res.path, utils.get_or_default(self.ctx, Deduplicate), models_dir)
        if utils.get_or_default(self.ctx, SaveUnpackedArchive):
            try:
                logging.debug("Create resource with unpacked models")
                self.create_resource(
                    "Unpacked " + archive_res.description,
                    models_dir, self.get_unpacked_resource_type()
                )
            except Exception:
                pass

        self.compare_archives(archiver_path, archive_res.path)

        return models_archiver.get_list(archiver_path, archive_res.path)

    def add_models_mapping(self, models_dir):
        if utils.get_or_default(self.ctx, IncludeMappingFiles):
            try:
                target_file = "webmodels" if utils.get_or_default(self.ctx, ModelsForBase) else "web-mmetamodels"
                prod_models_mapping_url = self.ctx.get(ModelsUrlForProduction.name)
                if not prod_models_mapping_url:
                    return
                tag_path, tag_folder = prod_models_mapping_url.rsplit("@")[0].strip("/").rsplit("/", 1)
                tag_prefix, tag_num = tag_folder.rsplit("-", 1)
                tag_num = int(tag_num)
                self._save_models_mapping(models_dir, target_file, tag_path, tag_folder, tag_num)
                self._save_prev_mappings(models_dir, tag_num, tag_path, tag_prefix, target_file)
            except Exception as e:
                eh.log_exception("Unable to save mapping files", e)

    def _save_prev_mappings(self, models_dir, tag_num, tag_path, tag_prefix, target_file):
        all_tag_folders = {i.strip("/") for i in Arcadia.list(tag_path)}
        default_folder = "{}-{}".format(tag_prefix, tag_num)  # in case of no prev folders
        for tag in range(tag_num - 4, tag_num):
            prev_folder = "{}-{}".format(tag_prefix, tag)
            if prev_folder in all_tag_folders:
                default_folder = prev_folder
                self._save_models_mapping(models_dir, target_file, tag_path, prev_folder, tag)
        self.set_info("Default tag folder: {}".format(default_folder))
        self._save_models_mapping(models_dir, target_file, tag_path, default_folder)

    def _save_models_mapping(self, models_dir, target_file_prefix, tag_path, source_folder, tag=None):
        source_filename = "test_base" if utils.get_or_default(self.ctx, ModelsForBase) else "test_middle"
        target_filename = "{}_{}".format(target_file_prefix, tag) if tag is not None else target_file_prefix
        logging.debug("Save mappings from %s to %s", source_folder, target_filename)
        Arcadia.export(
            os.path.join(tag_path, source_folder, source_filename),
            os.path.join(models_dir, target_filename)
        )

    def compare_archives(self, archiver, new_archive):
        old_archive_res_id = self.ctx['latest_archive_release_info']['resource_id']
        if not old_archive_res_id:
            return
        old_archive_release_subj = self.ctx['latest_archive_release_info']['release_subject']
        old_archive = self.sync_resource(old_archive_res_id)
        self.set_info(
            u'Diff calculated: "current" vs "{0}". Resource: {1}'.format(
                cgi.escape(old_archive_release_subj),
                lb.resource_link(old_archive_res_id),
            ),
            do_escape=False,
        )
        diff = models_compare.compare_archives(archiver, old_archive, new_archive)
        self.ctx['diff'] = diff

        if utils.get_or_default(self.ctx, StrictMode):
            changed = itertools.chain(diff.get('changed', []), diff.get('added', []), diff.get('deleted', []))
            production = sorted(p for p in changed if is_production_path(p))
            if production:
                eh.check_failed('Production models changed: {}'.format(production))

    def get_resource_type(self):
        base = utils.get_or_default(self.ctx, ModelsForBase)
        return resource_types.DYNAMIC_MODELS_ARCHIVE if not base else resource_types.DYNAMIC_MODELS_ARCHIVE_BASE

    def get_unpacked_resource_type(self):
        base = utils.get_or_default(self.ctx, ModelsForBase)
        return models_res.DynamicModelsArchiveUnpacked if not base else models_res.DynamicModelsArchiveBaseUnpacked

    def get_latest_released_archive(self):
        resource_type = self.get_resource_type()
        releases = channel.sandbox.list_releases(
            resource_type=resource_type,
            arch='any',
            order_by='-release__creation_time',
            limit=3,
        )
        logging.info('Recent releases: %s', [r.id for r in releases])
        return releases[0] if len(releases) > 0 else None

    def on_execute(self):
        latest_archive_release = self.get_latest_released_archive()
        self.ctx['latest_archive_release_info'] = {
            "release_id": latest_archive_release.id if latest_archive_release else None,
            "release_subject": latest_archive_release.subject if latest_archive_release else None,
            "resource_id": latest_archive_release.resources[0].id if latest_archive_release else None,
        }
        try:
            self.ctx['models'] = self.create_models_archive()
        except models_utils.ModelsError as e:
            eh.check_failed(str(e))
        logging.debug("Models archive creation is finished")
        # lua = [model_name for model_name in self.ctx['models'] if ".lua" in model_name]
        archive_size = os.path.getsize(channel.sandbox.get_resource(self.ctx['out_resource_id']).path)
        logging.info("Archive size: %s", size2str(archive_size))
        archive_max_size = utils.get_or_default(self.ctx, MaxArchiveSize)
        logging.info("Archive size limit: %s", size2str(archive_max_size))
        if utils.get_or_default(self.ctx, CheckSize):
            eh.ensure(
                archive_size <= archive_max_size,
                "Archive is too big: {size} > {limit}".format(
                    size=size2str(archive_size),
                    limit=size2str(archive_max_size),
                )
            )

    def mark_released_resources(self, status, ttl=720):
        return SandboxTask.mark_released_resources(self, status, ttl)

    def on_release(self, additional_parameters):
        nanny.ReleaseToNannyTask.on_release(self, additional_parameters)
        SandboxTask.on_release(self, additional_parameters)


__Task__ = BuildDynamicModels
