# coding: utf-8

import itertools
import collections
import datetime as dt
import json
import logging
import os
from multiprocessing.dummy import Pool as ThreadPool

from sandbox import common
from sandbox.common import itertools as sb_it
import sandbox.common.types.resource as ctr
import sandbox.common.types.notification as ctn
from sandbox import sdk2

import sandbox.projects.common.dynamic_models.bundles as models_bundles
import sandbox.projects.common.dynamic_models.const as models_const
import sandbox.projects.common.dynamic_models.experiments as models_exp
import sandbox.projects.common.dynamic_models.matrixnet as models_mxnet
import sandbox.projects.common.dynamic_models.mxops as models_mxops
import sandbox.projects.common.utils as common_utils
from sandbox.projects.release_machine.helpers.svn_helper import SvnHelper
from sandbox.projects.common import link_builder as lb
from sandbox.projects import resource_types


ModelWithProps = collections.namedtuple("ModelWithProps", ["name", "props"])
ROBOTS = ["robot-web-conveyor", "loadbase"]
RESPONSIBLE_USER = "gaiduk"
NOTIFY = {
    "warn": {
        "subj": "There are some old formulas",
        "body": (
            "Dear {user}!\n"
            "You received this e-mail because you are the owner for one of the following models:\n"
            "{models}.\n"
            "These formulas are probably old and unused, so they will be automatically deleted soon.\n"
            "Check done by task {task}"
        ),
    },
    "remove": {
        "subj": "Your old formulas have been removed",
        "body": (
            "Dear {user}!\n"
            "You received this e-mail because you are the owner for one of the following models:\n"
            "{models}.\n"
            "These formulas are very old and unused, so they were deleted automatically by task: {task}\n"
            "If you need these formulas, please, rollback!"
        )
    },
}


class RemoveUnusedExperimentalFormulas(sdk2.Task):
    """
        1. Checkout all models
        2. Filter used experimental models
        3. Find old experimental models (date < N days)
        4. Write letter to owners of old experimental models (date < M days)
    """
    class Requirements(sdk2.Requirements):
        disk_space = 25 * 1024  # 25 Gb
        cores = 1

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Parameters):
        days_to_remove = sdk2.parameters.Integer("Remove unused models older than (days)", default=70)
        days_to_notify = sdk2.parameters.Integer("Notify owners about unused models older than (days)", default=60)
        send_emails = sdk2.parameters.Bool("Send e-mails", default=True)
        commit = sdk2.parameters.Bool("Commit removed models", default=True)

    def on_execute(self):
        self.check_input()
        self.today = dt.datetime.today()
        prod_path = sdk2.svn.Arcadia.export(models_const.MODELS_PROD_URL, "production")
        exp_path = sdk2.svn.Arcadia.export(models_const.MODELS_EXPERIMENT_URL, "experiment")

        prod_bundles_info = list(models_bundles.get_bundles_info([prod_path], short_path=False))
        exp_bundles_info = list(models_bundles.get_bundles_info([exp_path], short_path=False))
        all_bundles_info = prod_bundles_info + exp_bundles_info
        logging.debug("Prod bundles info:\n%s", json.dumps(prod_bundles_info, indent=2))
        logging.debug("Exp bundles info:\n%s", json.dumps(exp_bundles_info, indent=2))
        exp_models = models_mxnet.models_from_dir(exp_path, recursive=False)
        logging.debug("Experimental models from dir '%s':\n%s", exp_path, log_json(exp_models))

        curr_exp = models_exp.get_currently_used_models()

        prod_files = self.get_all_files_used_in_prod()
        log_models("All files used in prod", prod_files)
        bundles_rm, bundles_warn = self.bundles_to_remove_and_notify(exp_bundles_info, curr_exp, prod_files)
        models_rm, models_warn = self.models_to_remove_and_notify(all_bundles_info, exp_models, curr_exp)
        self.notify_about_models(models_rm, models_warn, "models")
        self.notify_about_models(bundles_rm, bundles_warn, "bundles")
        if self.Parameters.commit:
            self.remove_models_from_svn(models_rm)
            self.remove_models_from_svn(bundles_rm)

    @staticmethod
    def get_all_files_used_in_prod():
        prod_tags_url = models_const.MODELS_FLAGS_TAG_DIR_URL
        last_tag_num = int(SvnHelper.get_highest_folder(prod_tags_url, only_num=True))
        prod_files = ["test_base", "test_middle"]
        mappings = []
        for tag_num in range(last_tag_num - 30, last_tag_num):
            for file_name in prod_files:
                mappings.append("{}/stable-{}/{}".format(prod_tags_url, tag_num, file_name))
        pool = ThreadPool(16)
        all_files_in_prod = pool.map(get_all_files_used_in_mapping, mappings)
        pool.close()
        pool.join()
        return set.union(*list(all_files_in_prod))

    def notify_about_models(self, models_rm, models_warn, file_type):
        logging.info("Old %s to warn:", file_type)
        self.Context.models_to_warn = {}
        self.Context.models_to_remove = {}
        for user, models in itertools.groupby(models_warn, get_user):
            models_list = list(models)
            model_names = get_model_names(models_list)
            log_models(user, model_names)
            self.Context.models_to_warn[user] = [(os.path.relpath(m.name), m.props.get("date")) for m in models_list]
            self.notify_user(NOTIFY["warn"], user, model_names)
        logging.info("Old %s to remove:", file_type)
        for user, models in itertools.groupby(models_rm, get_user):
            models_list = list(models)
            model_names = get_model_names(models_list)
            log_models(user, model_names)
            self.Context.models_to_remove[user] = [(os.path.relpath(m.name), m.props.get("date")) for m in models_list]
            self.notify_user(NOTIFY["remove"], user, model_names)

    def models_to_remove_and_notify(self, bundles_info, exp_models, curr_used_model_names):
        # check usage in experiments:
        in_exp, not_in_exp = split_by_condition(exp_models, models_exp.model_is_experiment, curr_used_model_names)
        log_models("Models IN experiment", in_exp)
        log_models("Models NOT IN experiment", not_in_exp)
        # check ability to remove:
        non_removable, removable = split_by_condition(not_in_exp, models_exp.model_in_do_not_remove)
        log_models("Models NOT removable", non_removable)
        log_models("Models removable", removable)
        mxops_path = get_mx_ops_path()
        removable_models_with_props = [ModelWithProps(i, models_mxops.get_props(mxops_path, i)) for i in removable]
        # fill missing date for props
        for i in removable_models_with_props:
            if i.props.get("date") is None:
                i.props["date"] = sdk2.svn.Arcadia.info(models_const.MODELS_ROOT_URL + os.path.relpath(i.name))["date"]
        # check formula id
        without_formula_id, with_formula_id = split_by_condition(removable_models_with_props, model_without_formula_id)
        log_models("Models without formula id", without_formula_id)
        resources_models = [i for i in without_formula_id if get_model_name(i).startswith("experiment/Resource")]
        log_models("Resources models", without_formula_id)
        # check usage in bundles:
        used_in_bundles = set(common_utils.flatten(models_bundles.bundle_formula_ids(b[1][0]) for b in bundles_info))
        log_models("Formulas used in bundles", list(used_in_bundles))
        unused = [i for i in with_formula_id if i.props["formula-id"] not in used_in_bundles]
        log_models("Models unused", unused)
        # check oldness:
        old_models = sorted(
            [m for m in unused + resources_models if self.models_older(m, self.Parameters.days_to_notify)],
            key=get_user
        )
        log_models("Models older than {} days:".format(self.Parameters.days_to_notify), old_models)
        remove_now, remove_soon = split_by_condition(old_models, self.models_older, self.Parameters.days_to_remove)
        return remove_now, remove_soon

    def bundles_to_remove_and_notify(self, exp_bundles_info, curr_exp, prod_files):
        # check usage in experiments:
        exp_bundle_names = (os.path.relpath(i[0]) for i in exp_bundles_info)
        in_exp, not_in_exp = split_by_condition(exp_bundle_names, models_exp.model_is_experiment, curr_exp)
        log_models("Bundles IN experiment", in_exp)
        log_models("Bundles NOT IN experiment", not_in_exp)
        # check ability to remove:
        non_removable, removable = split_by_condition(not_in_exp, models_exp.model_in_do_not_remove)
        log_models("Bundles NOT removable", non_removable)
        log_models("Bundles removable", removable)
        # check usage in prod:
        in_prod, not_in_prod = split_by_condition(removable, model_was_in_prod, prod_files)
        log_models("Bundles was IN prod", in_prod)
        log_models("Bundles was NOT IN prod", not_in_prod)
        # check oldness:
        pool = ThreadPool(16)
        bundles_with_props = pool.imap_unordered(get_models_info, not_in_prod)
        # close the pool and wait for the work to finish
        pool.close()
        pool.join()
        old_bundles = sorted([
            m for m in bundles_with_props if self.models_older(m, self.Parameters.days_to_notify)], key=get_user
        )
        log_models("Bundles older than {} days:".format(self.Parameters.days_to_notify), old_bundles)
        remove_now, remove_soon = split_by_condition(old_bundles, self.models_older, self.Parameters.days_to_remove)

        return remove_now, remove_soon

    def check_input(self):
        if self.Parameters.days_to_notify > self.Parameters.days_to_remove:
            msg = (
                "It's impossible to remove models before notification owners. "
                "Please, set days_to_remove > days_to_notify"
            )
            raise common.errors.TaskFailure(msg)

    def models_older(self, model_with_props, days):
        return models_exp.model_is_older_than(model_with_props, days, self.today)

    def notify_user(self, tmpl, user, model_names):
        if not self.Parameters.send_emails:
            return
        if user in ROBOTS:
            user = RESPONSIBLE_USER
        self.server.notification(
            subject=tmpl["subj"],
            body=tmpl["body"].format(
                user=user, models=model_names, task=lb.task_link(self.id, plain=True)
            ),
            recipients=[user],
            transport=ctn.Transport.EMAIL
        )

    def remove_models_from_svn(self, models_and_props):
        if not models_and_props:
            logging.info("Nothing to remove, skip it")
            return
        for models_and_props_chunk in sb_it.chunker(models_and_props, 300):
            model_names = get_model_names(models_and_props_chunk)
            urls = [models_const.MODELS_ROOT_URL + m for m in model_names]
            msg = "Delete old models: {}. Checked in task: {}".format(", ".join(model_names), self.id)
            sdk2.svn.Arcadia.delete(urls, message=msg, user='zomb-sandbox-rw')
        self.set_info("{} models were removed from svn".format(len(models_and_props)))


def get_all_files_used_in_mapping(mapping_url):
    files = set()
    logging.info("Process mappings: %s", mapping_url)
    if not sdk2.svn.Arcadia.check(mapping_url):
        return files
    for line in sdk2.svn.Arcadia.cat(mapping_url).split("\n"):
        formula_name_parts = line.strip().split()
        if not formula_name_parts:
            continue
        formula_name_parts[-1] = "_{}_".format(formula_name_parts[-1])
        formula_name_parts.append(".json")
        files.add("".join(formula_name_parts))
        formula_name_parts[-1] = ".info"
        files.add("".join(formula_name_parts))
    return files


def get_models_info(model_name):
    return ModelWithProps(model_name, sdk2.svn.Arcadia.info(models_const.MODELS_ROOT_URL + model_name))


def get_user(model_with_props):
    return model_with_props.props.get("author") or model_with_props.props.get("user") or ""


def model_was_in_prod(model, prod_files):
    return os.path.basename(model) in prod_files


def model_without_formula_id(model):
    return model.props.get("formula-id") is None


def get_mx_ops_path():
    mx_ops = list(sdk2.Resource.find(
        resource_type=resource_types.MX_OPS_EXECUTABLE,
        state=ctr.State.READY,
    ).limit(1))[0]
    logging.info("Mx ops resource found: %s", mx_ops)
    return str(sdk2.ResourceData(mx_ops).path)


def log_json(obj):
    return json.dumps(obj, indent=2)


def log_models(descr, models_and_props):
    model_names = get_model_names(models_and_props)
    logging.info("%s (%s):\n%s", descr, len(models_and_props), json.dumps(model_names, indent=2))


def get_model_names(models_and_props):
    return [get_model_name(i) for i in models_and_props]


def get_model_name(model_with_props):
    return os.path.relpath(model_with_props[0] if isinstance(model_with_props, (tuple, list)) else model_with_props)


def split_by_condition(iterable, condition, *args):
    """
        :param iterable: Any iterable[T]
        :param condition: Function(T, *args) -> bool
        :param args: additional arguments for condition func
        :return: tuple(good_list, bad_list): good_list - list with True condition, bad_list - list with False condition
    """
    good_list, bad_list = [], []
    for i in iterable:
        if condition(i, *args):
            good_list.append(i)
        else:
            bad_list.append(i)
    return good_list, bad_list
