#!/usr/bin/env python

import os
import sys
import csv
import logging
import textwrap
import operator
import argparse
import datetime
import collections

from common import console
from common import projects_handler
from common.types.task import Status

from yasandbox.database import mapping

from sandboxsdk import svn

import projects

sys.path[0:0] = [
    "/skynet",
    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
    os.path.join(
        os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))),
        "sandbox-tasks"
    )
]

CHUNK_SIZE = 1000
SVN_URL_PREFIX = "svn+ssh://arcadia.yandex.ru/arc/trunk/arcadia/search/garden/sandbox-tasks/projects/"


class JunkCleaner(object):

    def __init__(self, log_file_path):
        self.log_path = log_file_path
        self.logger = self.init_logger()

    def chunker(self, data, size):
        while data:
            chunk, data = data[:size], data[size:]
            yield chunk

    def init_logger(self):
        logger = logging.getLogger("CleanerLog")
        logger.setLevel(logging.DEBUG)
        path = os.path.join(self.log_path, "cleaner.log")
        if os.path.exists(path):
            path += ".{}".format(str(datetime.datetime.now()).replace(" ", ":"))
        self.log_path = path
        fh = logging.FileHandler(path)
        fh.setLevel(logging.DEBUG)
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        fh.setFormatter(formatter)
        logger.addHandler(fh)
        return logger

    def get_db_tasks_to_remove(self, data):
        junk_types = filter(operator.itemgetter(2), data)
        junk_ids = list(mapping.Task.objects(
            type__in=map(operator.itemgetter(0), junk_types),
            execution__status__ne=Status.RELEASED).scalar("id"))
        self.logger.info("Junk tasks to remove from DB successfully collected. Total amount: %s", len(junk_ids))
        self.logger.debug("Junk tasks ids: \n %s", junk_ids)
        return junk_ids, len(junk_types)

    def get_repo_urls_to_remove(self, data):
        if not hasattr(projects, "TYPES"):
            projects_handler.load_project_types(raise_exceptions=False)
        filtered_data = filter(operator.itemgetter(1), data)
        junk_path = map(lambda x: projects.TYPES[x[0]].package if x[0] in projects.TYPES else None, filtered_data)
        remove_me = [SVN_URL_PREFIX + item for item in map(lambda x: x.split(".")[-1], filter(None, junk_path))]
        self.logger.info("Junk tasks to remove from repo successfully collected. Total amount: %s", len(remove_me))
        self.logger.debug("Junk tasks repo URLs: \n %s", remove_me)
        return remove_me, len(filtered_data)

    def get_schedulers_to_remove(self, data):
        junk_types = filter(operator.itemgetter(2), data)
        junk_ids = list(mapping.Scheduler.objects(task__type__in=map(operator.itemgetter(0), junk_types)).scalar("id"))
        self.logger.info("Junk schedulers to remove successfully collected. Total amount: %s", len(junk_ids))
        self.logger.debug("Junk schedulers ids: \n %s", junk_ids)
        return junk_ids, len(junk_types)

    def remove_from_db(self, tasks):
        pbar = console.ProgressBar("Removing fetched tasks documents", len(tasks))
        for chunk in self.chunker(tasks, CHUNK_SIZE):
            mapping.Task.objects(id__in=chunk).update(set__execution__status=Status.DELETED)
            # Audits and resources should be deleted by `cleaner` service thread together with DELETED tasks
            pbar.add(len(chunk))
        pbar.finish()
        self.logger.info("Junk tasks successfully removed from DB. Total amount: %s", cnt)

    def remove_from_repo(self, urls, user):
        client = svn.Arcadia()
        removed = client.delete(urls, "Outdated task. Removed by sandbox JunkCleaner", user=user)
        self.logger.info("Junk tasks successfully removed from repo. Total amount: %s", len(removed))
        return len(removed)

    def remove_schedulers(self, schedulers):
        pbar = console.ProgressBar("Removing fetched schedulers documents", len(schedulers))
        for chunk in self.chunker(schedulers, CHUNK_SIZE):
            # Should be deleted by `cleaner` service thread
            mapping.Scheduler.objects(id__in=chunk).update(set__status=mapping.Scheduler.Status.DELETED)
            pbar.add(len(chunk))
        pbar.finish()
        self.logger.info("Junk schedulers successfully removed from DB. Total amount: %s", cnt)

    def input_parser(self, file_path):
        def convert_to_bool(data):
            _dict = {"TRUE": True, "FALSE": False}
            result = []
            for item in data:
                try:
                    result.append(_dict[item])
                except KeyError:
                    result.append(item)
            return result

        with open(file_path, "rb") as fh:
            dialect = csv.Sniffer().sniff(fh.read(1024))
            fh.seek(0)
            reader = csv.reader(fh, dialect)
            return map(convert_to_bool, map(operator.itemgetter(0, 1, 2), list(reader)[1:-1]))


if __name__ == "__main__":
    mapping.ensure_connection()
    parser = argparse.ArgumentParser(
        formatter_class=lambda *args, **kwargs: argparse.RawTextHelpFormatter(*args, width=120, **kwargs),
        description=textwrap.dedent("""
            Junk Sandbox tasks cleaner.
            Cleaner removes all information about runs, resources and audit records for task types given in input file.
            It does not remove RELEASED tasks and their metadata (resources, audit).
            By default it does not remove anything and only collects data for removal.
            To invoke removing collected data use -d option.
            """)
    )
    group = parser.add_argument_group("Input CSV file options")
    group.add_argument(
        "-f",
        "--filepath",
        help=textwrap.dedent("""
            CSV file with info about outdated Sandbox tasks in the following format: column with task type,
            column with flag indicating removing of task's code from repository, and column with flag
            indicating removing of task's history from database (information about all runs and resources).

            task_type; remove_from_repo; remove_from_db
            OUTDATED_TASK; TRUE; FALSE
            ...
            """)
    )
    group.add_argument("-db", "--remove_from_db", default=False, action="store_true", help="Remove tasks from db")
    group.add_argument("-r", "--remove_from_repo", default=False, action="store_true", help="Remove tasks from repo")
    group.add_argument("-d", "--delete", default=False, action="store_true", help="Delete fetched data")
    group.add_argument("-l", "--logpath", default=os.path.curdir, help="Path to log file")
    group.add_argument("-u", "--arcadia_user", default="", help="User name to perform removing outdated tasks in repo")
    input_args = parser.parse_args()
    cz = console.AnsiColorizer()
    if not input_args.filepath:
        print cz.colorize("Specify path to input file with -f argument, please.", "red")
        sys.exit(1)
    if not input_args.remove_from_db and not input_args.remove_from_repo:
        print cz.colorize("Specify place to delete from. See help for more.", "red")
        sys.exit(1)
    if input_args.delete and input_args.remove_from_repo and not input_args.arcadia_user:
        print cz.colorize("Specify arcadia user using '-u' option to remove outdated tasks from repo.", "red")
    cnt = collections.Counter()
    cleaner = JunkCleaner(input_args.logpath)
    types = cleaner.input_parser(input_args.filepath)
    if input_args.remove_from_db:
        with console.LongOperation("Fetching outdated tasks from DB"):
            task_ids, cnt["types"] = cleaner.get_db_tasks_to_remove(types)
        cnt["tasks"] = len(task_ids)
        with console.LongOperation("Fetching outdated schedulers from DB"):
            scheduler_ids, cnt["stypes"] = cleaner.get_schedulers_to_remove(types)
        cnt["schedulers"] = len(scheduler_ids)
        if input_args.delete:
            cleaner.remove_from_db(task_ids)
            cleaner.remove_schedulers(scheduler_ids)

    if input_args.remove_from_repo:
        urls, cnt["rtypes"] = cleaner.get_repo_urls_to_remove(types)
        cnt["repo"] = len(urls)
        if input_args.delete:
            with console.LongOperation("Removing junk task folders from repo"):
                cleaner.remove_from_repo(urls, input_args.arcadia_user)
    if input_args.delete:
        print cz.colorize("{} urls remove from repo for {} task types".format(cnt["repo"], cnt["rtypes"]), "yellow")
        print cz.colorize(
            "Information about {} tasks removed from tasks, resources and audit collections for {} task types".format(
                cnt["tasks"], cnt["types"]
            ),
            "yellow"
        )
        print cz.colorize(
            "{} documents removed from scheduler collection for {} task types".format(cnt["schedulers"], cnt["stypes"]),
            "yellow"
        )
    else:
        print cz.colorize(
            "{} urls fetched to remove from repo for {} task types".format(cnt["repo"], cnt["rtypes"]),
            "white"
        )
        print cz.colorize(
            "{} documents fetched to remove from tasks collection for {} task types".format(cnt["tasks"], cnt["types"]),
            "white"
        )
        print cz.colorize(
            "{} documents fetched to remove from schedulers collection for {} task types".format(
                cnt["schedulers"],
                cnt["stypes"]
            ),
            "white"
        )
        print cz.colorize("See logs at {} with fetched data info".format(cleaner.log_path), "cyan")
    sys.exit(0)
