import datetime
import logging
import math
import data_frame as df
import json

from copy import deepcopy
from collections import defaultdict
from plot_maker import make_plot, draw_critical_path_boxplots, draw_cache_hit_scatter, draw_precommit_checks


KPI = datetime.timedelta(minutes=20)


def with_debug_log(func):
    def logging_func(*args, **kwargs):
        max_len = 200
        shortify = lambda name: str(name)[:max_len] + '...' if len(str(name)) > max_len else str(name)
        logging.debug('{} started with args: {}, kwargs: {}'.format(func.func_name, shortify(args), shortify(kwargs)))
        result = func(*args, **kwargs)
        logging.debug('{} finished'.format(func.func_name))
        return result
    return logging_func


@with_debug_log
def read_statistic_json(data):
    def handle_values(keys, value):
        if len(keys) == 3:
            rev, type, variable = keys
            return {
                'revision': rev,
                'type': type,
                'variable': variable,
                'value': value
            }
        elif len(keys) == 2 and keys[1] != 'timestamp':
            rev, type = keys
            return {
                'revision': rev,
                'type': type
            }

    def handle_timestamp(keys, value):
        if len(keys) == 2:
            rev, _ = keys
            return {
                'revision': rev,
                'timestamp': value
            }

    df1 = df.recast(
        df.read_json(data, handle_values, empty_dict_as_regular_item=True, max_depth=3),
        ['revision', 'type']
    )
    df2 = df.read_json(data, handle_timestamp)

    return df.natural_join(df1, df2)


def aggregate(tbl, avg_cols, func_cols, name, date_proj, post_process=None):
    def avg_name(column):
        return 'average-' + column

    def reducer(key, tbl):
        short_date, type = key
        res = {
            'type': type,
            name: short_date,
        }
        for column, func in zip(avg_cols, func_cols):
            lst = tbl[column] if column in tbl.columns() else []
            res[avg_name(column)] = func(lst)

        return res

    def recast(aggregated, column):
        tmp = df.recast(aggregated, [name], 'type', avg_name(column))
        tmp = tmp.sorted(name)
        if post_process is not None:
            tmp = tmp.map(post_process)
        tmp = tmp.sort_columns([name])
        return tmp.to_json()

    def row_mapper(row):
        prj1 = date_proj(row['date'])
        if prj1 is None:
            return None
        return prj1, row['type']

    aggregated = df.map_reduce(tbl, row_mapper, reducer)

    return dict([(avg_name(col), recast(aggregated, col)) for col in avg_cols])


@with_debug_log
def calc_graphs(data):
    def within(delta):
        def filter(row):
            return datetime.datetime.now() - row['date'] < delta
        return filter

    def extract_time(name):
        def func(row):
            row[name] = row[name][1]
            return row
        return func

    def calc_filtered_data_ratio(data, timedelta, filter_over, filter_under):
        data = data.filter(within(timedelta))
        data_over = data.filter(filter_over)
        data_under = data.filter(filter_under)
        over = len(data_over)
        under = len(data_under)
        if under == 0:
            return "0/0"
        else:
            return "{}/{} ({:.1%})".format(over, under, float(over)/float(under))

    def calc_kpi(data, timedelta, data_filter, kpi_filter):
        data = data.filter(within(timedelta))
        data_filtered = data.filter(data_filter)
        out_of_kpi = data_filtered.filter(kpi_filter)
        return "{}/{}".format(len(out_of_kpi), len(data_filtered))

    def bucket(duration, granularity=12):
        now = datetime.datetime.now()

        def get_bucket(x):
            if now - duration > x:
                return None
            id = int((x - now + duration).total_seconds() * 1.0 / duration.total_seconds() * granularity)
            id_mid_time = now - duration + \
                datetime.timedelta(seconds=duration.total_seconds() * (id + 0.5) / granularity)
            return id, id_mid_time.strftime('%b %d %H:%M')

        return get_bucket

    def filtered(lst):
        clean = [el for el in lst if el is not None]
        return clean

    def quantile(q):
        def quantile_func(lst):
            ss = sorted(filtered(lst))
            return ss[int(q * len(ss))] / 3600.0 if len(ss) > 0 else None
        return quantile_func

    def count_unique(lst):
        return len(set(filtered(lst)))

    def summarize(lst):
        return sum(filtered(lst))

    avg_columns = ['build-time', 'testing-time', 'start-delay', 'notification-delay', 'expected-notification-delay',
                   'revision', 'emails_count']
    func_columns = [quantile(0.9)] * 5 + [count_unique] + [summarize]

    # XXX
    logging.debug("data = %s\ndata.columns() = %s\ndata.rows() = %s", data, data.columns(), data.rows())
    return {
        "info": {
            "generated-time": int((datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)).total_seconds()),
            "fallbacks-ratio-day-linux":
                calc_filtered_data_ratio(data, datetime.timedelta(days=1),
                                         {'type': 'distbuild_linux', 'build_system_used': 'ymake'},
                                         {'type': 'distbuild_linux'}),
            "fallbacks-ratio-week-linux": calc_filtered_data_ratio(data, datetime.timedelta(days=7),
                                                                   {'type': 'distbuild_linux',
                                                                    'build_system_used': 'ymake'},
                                                                   {'type': 'distbuild_linux'}),
            "fallbacks-ratio-month-linux": calc_filtered_data_ratio(data, datetime.timedelta(days=30),
                                                                    {'type': 'distbuild_linux',
                                                                     'build_system_used': 'ymake'},
                                                                    {'type': 'distbuild_linux'}),
            "fallbacks-ratio-day-freebsd": calc_filtered_data_ratio(data, datetime.timedelta(days=1),
                                                                    {'type': 'distbuild_freebsd',
                                                                     'build_system_used': 'ymake'},
                                                                    {'type': 'distbuild_freebsd'}),
            "fallbacks-ratio-week-freebsd": calc_filtered_data_ratio(data, datetime.timedelta(days=7),
                                                                     {'type': 'distbuild_freebsd',
                                                                      'build_system_used': 'ymake'},
                                                                     {'type': 'distbuild_freebsd'}),
            "fallbacks-ratio-month-freebsd": calc_filtered_data_ratio(data, datetime.timedelta(days=30),
                                                                      {'type': 'distbuild_freebsd',
                                                                       'build_system_used': 'ymake'},
                                                                      {'type': 'distbuild_freebsd'}),
            "daily-kpi-linux": calc_kpi(data, datetime.timedelta(days=1), {'type': 'distbuild_linux'},
                                        lambda row: datetime.timedelta(seconds=row['expected-notification-delay']) > KPI)
        },
        "df": {
            'Day': aggregate(
                data,
                avg_columns, func_columns,
                'time', bucket(datetime.timedelta(days=1)), extract_time('time')
            ),
            '3Day': aggregate(
                data,
                avg_columns, func_columns,
                'time', bucket(datetime.timedelta(days=3)), extract_time('time')
            ),
            'Week': aggregate(
                data,
                avg_columns, func_columns,
                'time', bucket(datetime.timedelta(days=7)), extract_time('time')
            ),
            'Month': aggregate(
                data,
                avg_columns, func_columns,
                'time', bucket(datetime.timedelta(days=30)), extract_time('time')
            ),
            'Year': aggregate(
                data,
                avg_columns, func_columns,
                'time', bucket(datetime.timedelta(days=180)), extract_time('time')
            ),
        }
    }


@with_debug_log
def raw_data_to_data_frame(raw_data):
    return read_statistic_json(raw_data).map(add_extra_columns)


def within(from_date, to_date):
    def filter(row):
        return 'date' in row and from_date < row['date'] < to_date
    return filter


@with_debug_log
def calc_stat_for_buildtype_and_period(data, buildtype, from_date, to_date, calc_previous=False):
    def calc(func, cur, prev, *args):
        result = {}
        if len(cur) > 0:
            result['current'] = func(cur, *args)
        if len(prev) > 0:
            result['before'] = func(prev, *args)
        return result

    def calc_quantiles(cur, prev):
        def quantile(values, q):
            return values[int(len(values) * q)]

        return dict(('p%2.f' % (x * 100,), calc(quantile, cur, prev, x)) for x in [0.5, 0.9, 0.95, 0.99])

    def calc_func(cur, prev, func):
        return calc(func, cur, prev)

    def gen_for_column(func, cur_data, prev_data, column_name, *args):
        cur = sorted([x for x in cur_data.subset([column_name])[column_name] if x is not None])
        prev = sorted([x for x in prev_data.subset([column_name])[column_name] if x is not None])

        return func(cur, prev, *args)

    def gen_for_period(data, from_date, to_date, calc_previous):
        cur_data = data.filter(within(from_date, to_date))
        if calc_previous:
            delta = to_date - from_date
            prev_data = data.filter(within(from_date - delta, to_date - delta))
        else:
            prev_data = df.DataFrameBuilder().dump()

        stage_columns = [
            'build-time', 'testing-time', 'notification-delay', 'user-notification-delay', 'gen-graph-time',
            'configuring-time', 'sandbox-queue-lag', 'sandbox-testenv-lag', 'distbs_worktime', 'previous-await-lag',
            'svn-testdata-delay', 'svn-source-delay', 'critical-path', 'critical-path-copying',
            'critical-path-testing', 'critical-path-building', 'critical-path-svn-source',
            'build-task-worktime', 'wait-child-lag', 'testenv-lag', 'sandbox-testenv-kpi',
        ]

        steps = ['testenv-lag', 'sandbox-testenv-lag', 'sandbox-queue-lag', 'svn-source-delay', 'svn-testdata-delay',
                 'build-time', 'testing-time', 'previous-await-lag', 'wait-child-lag', ]

        def countf(func):
            return lambda lst: sum(map(func, lst))

        greater_than_0 = countf(lambda x: x > 0)
        out_of_kpi = countf(lambda x: datetime.timedelta(seconds=x) > KPI)
        is_fallback = countf(lambda x: x == 'ymake')

        def count_users_emails(email_logs, email_reason):
            counter = defaultdict(int)
            for email_log in email_logs:
                for name, reason in email_log.items():
                    if (email_reason == 'all' or reason == email_reason) and name != 'arc-builds':
                        counter[name] += 1
            return sorted(counter.items(), key=lambda x: x[1])

        def gen_for_nth_most_spammed(spam_counter_current, spam_counter_before, pos, reason):
            if pos > len(spam_counter_current):
                return {}
            else:
                user, count = spam_counter_current[-pos]
                old_count = dict(spam_counter_before).get(user, 0)
                return {'{0}-{1}: {2}'.format(reason, str(pos), user): {'current': count, 'before': old_count}}

        def get_stages_outliers(cur_data):
            def out_of_kpi_filter(row):
                if row.get('notification-delay'):
                    return datetime.timedelta(seconds=row['notification-delay']) >= KPI
                return False
            out_of_kpi_stages = cur_data.filter(out_of_kpi_filter).subset(steps)
            stages_counter = defaultdict(int)
            for row in out_of_kpi_stages.rows():
                bad_stage, bad_value = max(row.items(), key=lambda x: x[1])
                if bad_value is not None:
                    stages_counter[bad_stage] += 1
            stages_numbers = [stages_counter[stage] for stage in stage_columns]
            return stages_numbers

        stage_outliers = get_stages_outliers(cur_data)
        result = {}
        for stage, outliers_count in zip(stage_columns, stage_outliers):
            result[stage] = {'outliers': outliers_count} if outliers_count else {}
            result[stage].update(gen_for_column(calc_quantiles, cur_data, prev_data, stage))

        result['revisions'] = gen_for_column(calc_func, cur_data, prev_data, 'date', len)
        result['min_revision'] = gen_for_column(calc_func, cur_data, prev_data, 'revision', lambda lst: int(min(lst)))
        result['max_revision'] = gen_for_column(calc_func, cur_data, prev_data, 'revision', lambda lst: int(max(lst)))

        result['out-of-kpi'] = gen_for_column(calc_func, cur_data, prev_data, 'notification-delay', out_of_kpi)
        result['fallbacks'] = gen_for_column(calc_func, cur_data, prev_data, 'build_system_used', is_fallback)
        result['emails-count'] = gen_for_column(calc_func, cur_data, prev_data, 'emails_count', sum)
        result['emails-count-owners'] = gen_for_column(calc_func, cur_data, prev_data,
                                                       'test_owner_emails_count', sum)
        result['emails-count-commit-authors'] = gen_for_column(calc_func, cur_data, prev_data,
                                                               'commit_author_emails_count', sum)
        result['emailed-revisions'] = gen_for_column(calc_func, cur_data, prev_data, 'emails_count', greater_than_0)
        result['emailed-revisions-owners'] = gen_for_column(calc_func, cur_data, prev_data, 'test_owner_emails_count',
                                                            greater_than_0)
        result['emailed-revisions-commit-authors'] = gen_for_column(calc_func, cur_data, prev_data,
                                                                    'commit_author_emails_count', greater_than_0)
        result['emailed-revisions-out-of-kpi'] = gen_for_column(calc_func, cur_data, prev_data,
                                                                'user-notification-delay', out_of_kpi)

        def filtered(items):
            return [el for el in items if el is not None]

        email_logs = filtered(cur_data['email_log'])
        old_email_logs = filtered(prev_data['email_log'])

        def add_nth_spammed(pos, reason):
            result.update(gen_for_nth_most_spammed(count_users_emails(email_logs, reason),
                                                   count_users_emails(old_email_logs, reason),
                                                   pos=pos, reason=reason))

        add_nth_spammed(1, 'all')
        add_nth_spammed(2, 'all')
        add_nth_spammed(3, 'all')

        return result

    typed_data = data.filter({'type': buildtype})
    return gen_for_period(typed_data, from_date, to_date, calc_previous)


@with_debug_log
def structurize_whole_data(data, subdata_structurizer, buildtypes=None, periods=None):
    buildtypes = buildtypes or set(data['type'])

    def gen_for_buildtype(data, buildtype):
        result = {}
        for name, (from_time, to_time) in periods.items():
            result[name] = subdata_structurizer(data, buildtype, from_time, to_time)

        return result

    return {buildtype: gen_for_buildtype(data, buildtype) for buildtype in buildtypes}


def add_extra_columns(row):
    if row.get('build_started', None) is not None and row.get('build_finished', None) is not None:
        row['build-time'] = row['build_finished'] - row['build_started']

    if row.get('testing_started', None) is not None and row.get('testing_finished', None) is not None:
        row['testing-time'] = row['testing_finished'] - row['testing_started']

    if row.get('timestamp', None) is not None:
        row['date'] = datetime.datetime.fromtimestamp(math.floor(float(row['timestamp'])))

    if row.get('task_started', None) is not None:
        row['start-delay'] = row['task_started']

    if row.get('notifications_sent', None) is not None:
        row['notification-delay'] = row['notifications_sent']

    if row.get('email_log', None) is not None:
        email_log = row['email_log']
        row['test_owner_emails_count'] = len([el for el in email_log if email_log[el] == 'owner' and el != 'arc-builds'])

    if 'emails_count' not in row and \
            (row.get('commit_author_emails_count', None) is not None or
                row.get('test_owner_emails_count', None) is not None):
        row['emails_count'] = (row.get('commit_author_emails_count', 0) or 0) + \
                              (row.get('test_owner_emails_count', 0) or 0)

    if row.get('emails_count', 0) > 0:
        row['user-notification-delay'] = row['notification-delay']

    if row.get('configure_started', None) is not None and row.get('configure_finished', None) is not None:
        row['configuring-time'] = row['configure_finished'] - row['configure_started']

    if row.get('gen_graph_finished', None) is not None and row.get('gen_graph_started', None) is not None:
        row['gen-graph-time'] = row['gen_graph_finished'] - row['gen_graph_started']

    if row.get('task_started', None) is not None and row.get('task_created', None) is not None:
        row['sandbox-queue-lag'] = row['task_started'] - row['task_created']

    if row.get('task_created', None) is not None and row.get('task_finished', None) is not None and \
            row.get('task_processed', None) is not None and row.get('before_create_task', None) is not None:
        row['sandbox-testenv-lag'] = row['task_created'] - row['before_create_task'] + row['task_processed'] - row['task_finished']

    if row.get('before_create_task', None) is not None:
        row['testenv-lag'] = row['before_create_task']

    if row.get('notifications_sent', None) is not None and row.get('task_processed', None) is not None:
        row['previous-await-lag'] = row['notifications_sent'] - row['task_processed']

    if row.get('distbs_worktime', None) is not None:
        row['distbs_worktime'] /= 1000.0

    if row.get('notification-delay') is not None:
        row['expected-notification-delay'] = row['notification-delay']
    else:
        row['expected-notification-delay'] = (datetime.datetime.now() - row['date']).total_seconds()

    if row.get('svn_testdata_finished', None) is not None and row.get('svn_testdata_started', None) is not None:
        row['svn-testdata-delay'] = row['svn_testdata_finished'] - row['svn_testdata_started']

    if row.get('svn_source_finished', None) is not None and row.get('svn_source_started', None) is not None:
        row['svn-source-delay'] = row['svn_source_finished'] - row['svn_source_started']

    if row.get('statistics_graph_time', None) is not None:
        row['critical-path'] = row['statistics_graph_time'] / 1000.0

    if row.get('statistics_graph_copying_time', None) is not None:
        row['critical-path-copying'] = row['statistics_graph_copying_time'] / 1000.0

    if row.get('statistics_graph_testing_time', None) is not None:
        row['critical-path-testing'] = row['statistics_graph_testing_time'] / 1000.0

    if row.get('statistics_graph_compiling_time', None) is not None:
        row['critical-path-building'] = row['statistics_graph_compiling_time'] / 1000.0

    if row.get('statistics_graph_source_time', None) is not None:
        row['critical-path-svn-source'] = row['statistics_graph_source_time'] / 1000.0

    if row.get('statistics_graph_tests_data_time', None) is not None:
        row['critical-path-tests-data'] = row['statistics_graph_tests_data_time'] / 1000.0

    if row.get('task_started', None) is not None and row.get('build_task_finished', None) is not None:
        row['build-task-worktime'] = row['build_task_finished'] - row['task_started']

    if row.get('build_task_finished', None) is not None and row.get('task_finished', None) is not None:
        row['wait-child-lag'] = row['task_finished'] - row['build_task_finished']

    if row.get('task_finished', None) is not None:
        if row.get('testing_finished', None) is not None:
            row['task-finalization'] = row['task_finished'] - row['testing_finished']
        elif row.get('build_finished', None) is not None:
            row['task-finalization'] = row['task_finished'] - row['build_finished']

    if row.get('distbs_running', None) is not None and row.get('distbs_ok', None) is not None \
            and row.get('critical-path', 0) > 0:
        row['distbs_critical_svn_prepare_copy'] = row['distbs_running'] + row.get('critical-path-svn-source', 0)
        row['distbs_critical_tests_data_prepare_copy'] = row['distbs_running'] + row.get('critical-path-tests-data', 0)
        row['distbs_critical_prepare_copy'] = max(row['distbs_critical_svn_prepare_copy'],
                                                  row['distbs_critical_tests_data_prepare_copy'])
        row['distbs_critical_copy_build'] = row['distbs_critical_prepare_copy'] + row.get('critical-path-copying', 0)
        row['distbs_critical_build_test'] = row['distbs_critical_copy_build'] + row.get('critical-path-building', 0)
        row['distbs_critical_test_ok'] = row['distbs_critical_build_test'] + row.get('critical-path-testing', 0)

    if row.get('critical_path', None) is not None:
        row['critical_path'] = json.loads(row['critical_path'])

    if row.get('build_task_finished', None) is not None and row.get('task_processed', None) is not None:
        if row.get('first_build_task_started', None) is not None:
            row['sandbox-testenv-kpi'] = row['task_processed'] - (row['build_task_finished'] - row['first_build_task_started'])

        elif row.get('task_started', None) is not None:
            row['sandbox-testenv-kpi'] = row['task_processed'] - (row['build_task_finished'] - row['task_started'])

    return row


@with_debug_log
def calc_stat(data, buildtypes, periods):
    def stat_structurizer(data, buildtype, date_from, date_to):
        return calc_stat_for_buildtype_and_period(data, buildtype, date_from, date_to, True)
    return structurize_whole_data(data, stat_structurizer, buildtypes, periods)


@with_debug_log
def draw_pictures(data, buildtypes, periods, now, task, dump_to_file):
    emulation = task.ctx.get('torrent') is not None if task else False
    dash = task.ctx.get('dash', False) if task is not None else False

    def plot_structurizer(data, buildtype, date_from, date_to):
        plots = []
        plots.append(make_plot(
            task.client_info,
            path=dump_to_file,
            buildtype=buildtype,
            date_from=date_from,
            date_to=date_to,
            boxes=False,
            dash=dash)
        )
        if emulation:
            plots.append(make_plot(
                task.client_info,
                path=dump_to_file,
                buildtype=buildtype,
                date_from=date_from,
                date_to=date_to,
                boxes=True,
                dash=dash,
                disable_legend=True)
            )
        elif buildtype == 'distbuild_linux':  # draw critical_path
            scores = defaultdict(list)
            for cp in filter(bool, data.filter(within(date_from, date_to))['critical_path']):
                for cp_item in cp:
                    node_name = cp_item[0]
                    time = cp_item[1]
                    scores[node_name].append(int(time) / 1000.0)
            filename = '_'.join(['critpath', buildtype, str((date_to - date_from).days)]) + '.png'
            plots.append(draw_critical_path_boxplots(task.client_info, scores, filename))

        return plots

    plot_periods = deepcopy(periods)
    delta = datetime.timedelta(hours=3)
    plot_periods.update({'3hours': (now - delta, now)})  # for dashboard
    return structurize_whole_data(data, plot_structurizer, buildtypes, plot_periods)


@with_debug_log
def draw_boxes(data, now, task, dump_to_file):
    dash = task.ctx.get('dash', False) if task is not None else False

    def box_structurizer(data, buildtype, date_from, date_to):
        return [make_plot(task.client_info, path=dump_to_file, buildtype=buildtype, date_from=date_from,
                          date_to=date_to, boxes=True, dash=dash)]

    hours = lambda h: datetime.timedelta(hours=h)
    periods = {
        '3hours': (now - hours(3), now),
        'day': (now - hours(24), now),
        'week': (now - hours(24 * 7), now),
    }
    return structurize_whole_data(data, box_structurizer, buildtypes=['distbuild_linux'], periods=periods)


@with_debug_log
def draw_cache_hit(data, now, periods, task, dump_to_file):
    dash = task.ctx.get('dash', False) if task is not None else False
    limit = lambda date_from, date_to: str(int((date_to - date_from).total_seconds())) + 's'
    cache_hit_periods = deepcopy(periods)
    cache_hit_periods.update({'3hours': (now - datetime.timedelta(hours=3), now)})

    def cache_hit_structurizer(data, buildtype, date_from, date_to):
        return [draw_cache_hit_scatter(task.client_info, path=dump_to_file,
                                       dash=dash, limit=limit(date_from, date_to))]

    return structurize_whole_data(data, cache_hit_structurizer, buildtypes=['distbuild_linux'],
                                  periods=cache_hit_periods)


def draw_rb_checks(task, now, rb_data):
    dash = task.ctx.get('dash', False) if task is not None else False
    period = lambda hours: str(hours) + 'h'
    periods = {'3hours': period(3), 'day': period(24), 'week': period(7 * 24), 'month': period(30 * 24)}
    result = {
        'distbuild_linux': {
            name: [
                draw_precommit_checks(task.client_info, limit=limit, review=True, dash=dash, wood=False, rb_data=rb_data),
                draw_precommit_checks(task.client_info, limit=limit, boxes=True, review=True, dash=dash, wood=False, rb_data=rb_data)
            ] if name != 'month' else [
                draw_precommit_checks(task.client_info, limit=limit, review=True, dash=dash, wood=False, rb_data=rb_data),
            ] for name, limit in periods.items()
        }
    }
    return result


def inject_rb_data(result, head, data, now):
    def p(x):
        def _p(lst):
            return sorted(lst)[len(lst) * x / 100] if lst else None
        return _p

    def lists(row, delta):
        i, ti = head.index(row), head.index('timestamp')
        b, c = [], []
        for line in data:
            t = datetime.datetime.fromtimestamp(float(line[ti]))
            if now - delta < t < now:
                c.append(line[i])
            elif now - 2 * delta < t < now - delta:
                b.append(line[i])
        return b, c

    def stat(func, before, current):
        return {'before': func(before), 'current': func(current)}

    def pstat(before, current):
        res = {}
        for x in [50, 90, 95, 99]:
            res['p{}'.format(x)] = stat(p(x), before, current)
        return res

    def period(days):
        return datetime.timedelta(days=days)

    map_ = {'day': period(1), 'week': period(7), 'month': period(30)}

    for period in result['stat']['distbuild_linux']:
        if period in map_:
            before, current = lists('revision', map_[period])
            result['stat']['distbuild_linux'][period]['checks'] = stat(len, before, current)

            before, current = lists('end_time', map_[period])
            f = lambda lst: [x for x in lst if x is not None]
            result['stat']['distbuild_linux'][period]['precommit-check-delay'] = pstat(f(before), f(current))

    return result


@with_debug_log
def run(time_limit=None, dump_to_file=None, task=None, raw_data=None, buildtypes=None, periods=None):
    now = datetime.datetime.now()
    days = lambda count: datetime.timedelta(days=count)
    DEFAULT_PERIODS = {'day': (now - days(1), now), 'week': (now - days(7), now), 'month': (now - days(30), now)}
    periods = DEFAULT_PERIODS if periods is None else periods

    pictures = task.ctx.get('draw_pictures', False) if task is not None else False
    emulation = task.ctx.get('torrent') is not None if task else False
    boxes = task.ctx.get('draw_boxes', False) if task is not None else False
    dump_to_file = dump_to_file or 'dump.csv'

    data = raw_data_to_data_frame(raw_data)

    with open(dump_to_file, 'w') as stream:
        df.write_csv(data, stream)

    result = calc_graphs(data) if not emulation else {}

    result['stat'] = calc_stat(data, buildtypes, periods)

    result['images'] = {'cache_hit': draw_cache_hit(data, now, periods, task, dump_to_file)}

    if pictures:
        result['images']['plot'] = draw_pictures(data, buildtypes, periods, now, task, dump_to_file)

    if boxes:
        result['images']['box'] = draw_boxes(data, now, task, dump_to_file)

    if not emulation:
        rb_data = 'rb_data.json'
        result['images']['rb_checks'] = draw_rb_checks(task, now, rb_data)
        if pictures:
            for btype in result['images']['rb_checks']:
                for period in result['images']['rb_checks'][btype]:
                    if btype in result['images']['plot'] and period in result['images']['plot'][btype]:
                        img = result['images']['rb_checks'][btype][period][0]
                        try:
                            result['images']['plot'][btype][period].insert(1, img)
                        except Exception:
                            result['images']['plot'][btype][period].append(img)
        head, data = json.load(open(rb_data))
        try:
            result = inject_rb_data(result, head, data, now)
        except Exception as ex:
            logging.error(ex)

    return result


if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    result = run()
    with open('result.json', 'w') as stream:
        json.dump(result, stream, indent=4, sort_keys=True)

#    import testenv
#    raw_data = testenv.fetch(datetime.timedelta(days=365))
#    dump = raw_data_to_data_frame(raw_data)
#    with open('dump.csv', 'w') as stream:
#        df.write_csv(dump, stream)
