#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import codecs
import argparse

import nile
from nile.api.v1 import (
    clusters,
    filters as nf,
    extractors as ne,
    aggregators as na,
    statface as ns,
    Record
)
from qb2.api.v1 import (
    filters as sf,
    extractors as se
)
import getpass
import datetime
import itertools
from pytils import yt_get_date_from_table, date_range


def as_rename(records):
    import yaml
    asname_dict = yaml.load(open('as-name.yaml'), Loader=yaml.CLoader)
    for rec in records:
        vrs = vars(rec)
        vrs['autonomous_system'] = get_asname(
            vrs['autonomous_system'], asname_dict
        )
        yield Record(**vrs)


def get_asname(name, as_dict):
    if as_dict.get(name):
        name = as_dict[name]
    else:
        return name
    name = name or ''
    if len(name.split('- ')) > 1:
        name = name.split('- ')[1]
    name = name.split('. ')[0]
    return name


def video_as_stats_filter(request):
    return request is not None and request.find(
        'for-regional-cache=1'
    ) == -1 and request.find('monitoring=') == -1


def get_error_id(parsed_parameters):
    try:
        if parsed_parameters.get('error_id'):
            return parsed_parameters.get('error_id')[0]

        if parsed_parameters.get('event_id')[0] == 'Buffer.Empty':
            return parsed_parameters.get('event_id')[0]
    except (AttributeError, TypeError):
        return None

    return None


def totalize(records):
    for rec in records:
        if rec.error_id:
            error_tup = (rec.error_id, '_all_errors_', '_total_')
        else:
            error_tup = ('no errors', '_total_')
        as_tup = (rec.autonomous_system, '_total_')
        result = vars(rec)
        for x in itertools.product(
            error_tup,
            as_tup
        ):
            result['error'] = x[0]
            result['provider'] = x[1]
            yield Record(**result)


def add_shares(groups):
    for key, records in groups:
        before_total = []
        total = None
        for rec in records:
            if rec.error == '_total_':
                total = float(rec.count)
                result = vars(rec)
                result['share'] = 1
                yield Record(**result)
            elif total is None:
                before_total.append(rec)
            else:
                result = vars(rec)
                result['share'] = round(rec.count / total, 6)
                yield Record(**result)
        for rec in before_total:
            result = vars(rec)
            result['share'] = round(rec.count / total, 6)
            yield Record(**result)


def process_date(date, hahn, args):
    date_f = format(date)
    job_root = '//home/videolog/strm_stats/{}'.format(date)
    as_stats_table = '{}/as_stats'.format(job_root)

    job = hahn.job().env(parallel_operations_limit=10)

    logs = job.table('//statbox/strm-access-log/{}'.format(date))

    if not hahn.driver.exists(as_stats_table):
        stats = logs.qb2(
            log='strm-access-log',
            fields=[
                # TODO убрать autonomous_systems
                ['date', 'timestamp', 'page', 'parsed_parameters'],
                se.custom('error_id', get_error_id, 'parsed_parameters'),
                se.unfold('autonomous_system', sequence='autonomous_systems'),
                'request_time',
                'bytes_sent',
                'yandexuid',
                'referer',
                'status'
            ],
            filters=[
                nf.custom(video_as_stats_filter, 'request'),
                nf.custom(lambda x: x is not None and len(
                    x), 'autonomous_systems')
            ]
        ).map(
            as_rename,
            files=[nile.files.StatboxDict('as-name.yaml')]
        ).put(
            as_stats_table
        )
    else:
        stats = job.table(as_stats_table)

    stats.groupby(
        'error_id', 'autonomous_system'
    ).aggregate(
        count=na.count()
    ).sort(
        'count'
    ).map(
        totalize
    ).groupby(
        'provider', 'error', 'fielddate'
    ).aggregate(
        count=na.sum('count')
    ).groupby(
        'provider'
    ).sort(
        'error'
    ).reduce(
        add_shares
    ).project(
        ne.all(), fielddate=ne.const(date_f)
    ).sort(
        'provider', 'error'
    ).put(
        '{}/errors'.format(job_root)
    )

    stats.filter(
        nf.custom(lambda x: bool(x), 'yandexuid')
    ).groupby(
        'yandexuid', 'error_id', 'autonomous_system'
    ).aggregate(
        count=na.count()
    ).map(
        totalize
    ).groupby(
        'error', 'provider'
    ).aggregate(
        count=na.sum('count'),
        quantiles=na.quantile_estimate(
            field='count',
            quantiles=[0.1 * x for x in range(1, 10)]
        )
    ).groupby(
        'provider'
    ).sort(
        'error'
    ).reduce(
        add_shares
    ).project(
        ne.all(), fielddate=ne.const(date_f)
    ).sort(
        'provider', 'error'
    ).project(
        'provider', 'error', 'count', 'fielddate', 'share',
        q10=ne.custom(lambda x: x[0][1], 'quantiles'),
        q20=ne.custom(lambda x: x[1][1], 'quantiles'),
        q30=ne.custom(lambda x: x[2][1], 'quantiles'),
        q40=ne.custom(lambda x: x[3][1], 'quantiles'),
        q50=ne.custom(lambda x: x[4][1], 'quantiles'),
        q60=ne.custom(lambda x: x[5][1], 'quantiles'),
        q70=ne.custom(lambda x: x[6][1], 'quantiles'),
        q80=ne.custom(lambda x: x[7][1], 'quantiles'),
        q90=ne.custom(lambda x: x[8][1], 'quantiles'),
    ).put(
        '{}/errors_by_user'.format(job_root)
    )

    job.run()

    client = ns.StatfaceClient(
        proxy='upload.stat.yandex-team.ru',
        username=args.stat_login,
        password=args.stat_password
    )

    for e in ['errors', 'errors_by_user']:
        recs = hahn.read(
            '{}/{}'.format(job_root, e)
        )
        ns.StatfaceReport().path(
            'Video/Others/Strm/{}'.format(e)
        ).scale(
            'daily'
        ).client(
            client
        ).data(
            recs
        ).publish()


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--token')
    parser.add_argument('--pool')
    parser.add_argument('--from')
    parser.add_argument('--to')
    parser.add_argument('--stat_login', '-sl', default='robot_pecheny')
    parser.add_argument('--stat_password', '-sp')
    args = parser.parse_args()
    kwargs = {}
    if args.token:
        kwargs['token'] = args.token
    if args.pool:
        kwargs['pool'] = args.pool
    hahn = clusters.yt.Hahn(**kwargs)

    for date in date_range(
        yt_get_date_from_table(getattr(args, 'from')),
        yt_get_date_from_table(getattr(args, 'to')),
    )[::-1]:
        process_date(date, hahn, args)


if __name__ == "__main__":
    main()
