import six

import os
import re
import sys
import csv
import json
import logging
import textwrap
import collections
import datetime as dt
from six.moves import queue
import threading as th

from sandbox import common
import sandbox.common.types.task as ctt
import sandbox.common.types.client as ctc
import sandbox.common.types.database as ctd

from sandbox.yasandbox.database import mapping

from sandbox.sandboxsdk import task
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk import svn
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.errors import SandboxSubprocessError

from sandbox.projects.common import time_utils as tu
from sandbox.projects import resource_types


class SandboxStatistics(task.SandboxTask):

    type = 'SANDBOX_STATISTICS'

    client_tags = ctc.Tag.SERVER

    execution_space = 2048  # 2 Gb

    OUTPUT_FILENAME = 'tasks.csv'

    class NumberOfDays(parameters.SandboxIntegerParameter):
        name = 'number_of_days'
        default_value = 1
        description = "Number of days"
        required = True

    class MailTo(parameters.SandboxStringParameter):
        name = 'mail_to'
        default_value = ''
        description = 'Mail to'
        required = True

    input_parameters = (
        NumberOfDays,
        MailTo
    )

    def __init__(self, *args, **kwargs):
        task.SandboxTask.__init__(self, *args, **kwargs)
        self.days = self.NumberOfDays.default_value

    class TaskInfo(common.patterns.Abstract):
        class Accum(common.patterns.Abstract):
            __slots__ = ('queue', 'prep', 'exc', 'fin')
            __defs__ = ([], [], [], [])

            def __copy__(self):
                return self.__class__()

        __slots__ = ('status', 'created', 'started', 'type', 'author', 'owner', 'descr', 'accum')
        __defs__ = (None, None, None, None, None, None, None, Accum())

    @staticmethod
    def chunker(data, size):
        while data:
            chunk, data = data[:size], data[size:]
            yield chunk

    @staticmethod
    def audit_fetcher(queue, days):
        chunk_size = 1000
        chunk = []
        with mapping.switch_db(mapping.Audit, ctd.ReadPreference.SECONDARY) as Audit:
            cursor = Audit.objects(
                date__gte=dt.datetime.utcnow() - dt.timedelta(days=days),
                status__exists=True
            ).order_by('+date').scalar('task_id', 'status', 'date')
            queue.put(cursor.count())
            for row in cursor:
                chunk.append(row)
                if len(chunk) >= chunk_size:
                    queue.put(chunk)
                    chunk = []
        queue.put(None)

    @staticmethod
    def task_fetcher(chunker, outq):
        with mapping.switch_db(mapping.Task, ctd.ReadPreference.SECONDARY) as Task:
            for chunk in chunker:
                map(outq.put, Task.objects(id__in=chunk).scalar(
                    'id', 'type', 'time__created', 'author', 'owner', 'description'
                ))
        outq.put((None, ) * 6)

    def get_data(self, path):
        import pandas
        data = pandas.read_csv(path, parse_dates=[3])
        result = data[data['time_created'] > dt.datetime.today() - dt.timedelta(days=self.days)]
        return result

    @staticmethod
    def get_fats_statistics(data):
        def foo(s):
            s = str(s)
            i = s.find(':')
            return s[:i] if i > 0 else s

        t = data[data['type'] == 'YA_MAKE']
        t['test_name'] = t['description'].str.extract(r'.*(FAT_.*?)\s', flags=re.IGNORECASE)
        t['base'] = t['description'].apply(foo)
        t['source'] = t['base'].str.replace(r'^[a-z0-9]{32}', 'Review Board')
        t['hours'] = t['execution_time'] / 3600

        results = []
        for coloumn in ('test_name', 'source', 'base'):
            r = t.groupby(coloumn).agg(['sum', 'mean', 'count']).sort(('hours', 'sum'), ascending=False)
            r['%'] = r['hours']['sum'] / t['hours'].sum()
            top = r[['hours', '%']][:20]
            top.loc['...'] = '...'
            top.loc['Total'] = r.sum()
            top.loc['Total']['hours']['mean'] = 'N/A'
            results.append(top)

        return results

    @staticmethod
    def get_task_type_statistics(t):
        t['hours'] = t['execution_time'] / 3600
        t['%'] = t['hours'] / t['hours'].sum()
        r = t.dropna().groupby(('type', 'owner')).sum().sort('hours', ascending=False)
        top = r[['hours', '%']][:20]
        top.loc['...'] = '...'
        top.loc['Total'] = t.sum()

        return top

    def build_make_report(self, results):
        mail = textwrap.dedent("""
        <h2>Sandbox cluster consumption by FAT_TESTS during last 24h</h2>
        <h3><TOP-20 FAT_TESTS Per test name/h3>
        {}
        <h3>Autocheck vs Review Board and others</h3>
        {}
        <h3><TOP-20 FAT_TESTS Per TestEnv database/h3>
        {}
        """.format(*(_.to_html() for _ in results)))
        return mail

    def send_mail(self, mailbody):
        recipients = self.ctx.get(self.MailTo.name, '')
        channel.sandbox.send_email(
            recipients,
            '',
            "Sandbox FAT_TESTS Statistic Report: {}".format(tu.date_ymdhm()),
            mailbody,
            content_type='text/html',
            extra_headers=['reply-to: ymake-dev@yandex-team.ru']
        )
        logging.info('Email sent to {}', recipients)

    def build_semidist_report(self, devtools_root):

        def readable(value):
            return dt.timedelta(seconds=value)

        stat = self.get_semidist_stat(devtools_root)
        msg = "<h3>ArcadiaSDK semi-distbuild statistics for FAT tests (avg for 1 day)</h3><table class='fat_semi_dist'>"
        msg += "<tr><th> Fat name </th><th> build + test </th><th> build </th><th> test </th></tr>"
        for name, data in sorted(
            six.iteritems(stat),
            key=lambda x: x[1]['build']['average'] + x[1]['test']['average'],
            reverse=True
        ):
            msg += "<tr><td>{name}</td><td>{sum}</td><td>{build}</td><td>{test}</td></tr>".format(
                name=name,
                sum=readable(data['build']['average'] + data['test']['average']),
                build=readable(data['build']['average']),
                test=readable(data['test']['average']),
            )
        return msg + "</table>"

    def get_semidist_stat(self, devtools_root):
        cmd = [
            sys.executable,
            os.path.join(devtools_root, "ya", "ya"),
            "stat",
            "arcadiasdk",
            "--semi-distbuild-time",
            "--fat-only",
            "--json",
        ]
        logfile = self.log_path("ya_stat_arcadiasdk")
        output = logfile + ".out.log"
        with open(output, "w") as stdout, open(logfile + ".err.log", "w", buffering=0) as stderr:
            process.run_process(cmd, stdout=stdout, stderr=stderr, wait=True, check=True)
        with open(output) as afile:
            return json.load(afile)

    def on_execute(self):
        devtools_root = svn.Arcadia.get_arcadia_src_dir("arcadia:/arc/trunk/arcadia/devtools")

        self.days = self.ctx.get(self.NumberOfDays.name)
        mapping.ensure_connection("file://{}/.mongodb_ro".format(os.path.expanduser("~zomb-sandbox")))

        dq = queue.Queue()
        worker = th.Thread(target=self.audit_fetcher, args=(dq, self.days))
        worker.start()

        tasks = collections.defaultdict(lambda: self.TaskInfo(accum=self.TaskInfo.Accum()))
        _TS = ctt.Status
        _FIN = tuple(_TS.Group.BREAK) + (_TS.FAILURE, _TS.SUCCESS)

        while True:
            rows = dq.get()
            if not rows:
                break
            try:
                for row in rows:
                    tid, status, date = row
                    t = tasks[tid]
                    if t.started:
                        delta = (date - t.started).total_seconds() if t.started else 0
                        if status == _TS.PREPARING:
                            t.accum.queue.append(delta)
                        elif status == _TS.EXECUTING:
                            t.accum.prep.append(delta)
                        elif status in (_TS.FINISHING, _TS.STOPPING):
                            t.accum.exc.append(delta)
                        elif status in _FIN:
                            t.accum.fin.append(delta)
                    t.started = date
                    t.status = status
            except TypeError:
                logging.debug("Rows: %d", rows)
                logging.debug("Q: %s", str(dq))

        worker.join()

        worker = th.Thread(target=self.task_fetcher, args=(self.chunker(tasks.keys(), 1000), dq))
        worker.start()

        resource = self.create_resource(self.descr, self.OUTPUT_FILENAME, resource_types.OTHER_RESOURCE)
        with open(resource.path, "w") as output_filename:
            writer = csv.writer(output_filename)
            writer.writerow([
                'id', 'status', 'type', 'time_created', 'author', 'owner', 'description', 'execution_time'
            ])
            while True:
                rows = []
                try:
                    rows.append(dq.get())
                    while True:
                        rows.append(dq.get(block=False))
                except queue.Empty:
                    pass

                for tid, tt, ct, author, owner, descr in rows:
                    if not tid:
                        continue
                    t = tasks[tid]
                    t.type, t.created, t.author, t.owner, t.descr = tt, ct, author, owner, descr or ''
                    for a, v in t.accum:
                        setattr(t.accum, a, sum(v) or 0)
                    t.accum = dict(t.accum)

                    writer.writerow([tid, t.status, tt, ct, author, owner, t.descr.encode('utf-8'), t.accum['exc']])

                if rows and not rows[-1][0]:
                    break
        worker.join()

        data = self.get_data(resource.path)
        results = []
        # top_tasks = self.get_task_type_statistics(data)
        # logging.debug("top_tasks: %s\n%s", type(top_tasks), top_tasks)
        # results.extend(top_tasks)

        results.extend(self.get_fats_statistics(data))
        report = '''<html>
            <head>
            <style TYPE="text/css">
            table tr td {
                border: 1px solid;
                border-collapse: collapse;
                padding: 15px;
            }
            </style>
            </head>
            <body>
        '''
        report += self.build_make_report(results)
        try:
            report += self.build_semidist_report(devtools_root)
        except SandboxSubprocessError:
            logging.error("Cant't get semidist report")
        report += '''
            </body>
            </html>
        '''
        with open(self.log_path("mail_content.html"), "w") as afile:
            afile.write(report)
        self.send_mail(report)
