#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import os
import argparse
from nile.api.v1 import (
    clusters,
    filters as nf,
    extractors as ne,
    aggregators as na,
    statface as ns,
    with_hints,
    modified_schema,
    Record
)
import nile.files as nfi
import copy
from yql.api.v1.client import YqlClient
from qb2.api.v1 import typing as qt
from pytils import (
    yql_run,
    date_range,
    yt_get_date_from_table as get_date,
    get_dates_from_stat,
    get_stat_headers
)
import itertools


errors_query = """
SELECT DISTINCT error_id
FROM [home/videoquality/vh_analytics/strm_cube_2/DATE/preprocessed]
"""


def get_duration_category(duration):
    if not duration or duration < 0:
        return 'other'
    if 0 < duration <= 60 * 5:
        return "005m-"
    elif 60 * 5 < duration <= 60 * 20:
        return "005-20m"
    elif 60 * 20 < duration <= 60 * 30:
        return "020-30m"
    elif 60 * 30 < duration <= 60 * 40:
        return "030-40m"
    elif 60 * 40 < duration <= 60 * 60:
        return "040-60m"
    elif 60 * 60 < duration <= 60 * 90:
        return "060-90m"
    elif 60 * 90 < duration <= 60 * 120:
        return "090-120m"
    elif 60 * 120 < duration <= 60 * 240:
        return "120-240m"
    else:
        return "240m+"


class SessionMapper(object):

    def __init__(self, all_errors):
        self.all_errors = all_errors

    def __call__(self, recs):
        for rec in recs:
            if not rec['view_time']:
                continue
            duration_category = get_duration_category(
                rec.get('content_duration', None)
            )
            time_before_first_error = {}
            for error in rec.get('errors', []):
                eid = error['id']
                if eid not in self.all_errors:
                    continue
                if eid not in time_before_first_error:
                    time_before_first_error[eid] = error['rel_time']
            for eid in (
                self.all_errors - set(time_before_first_error.keys())
            ):
                time_before_first_error[eid] = rec['view_time']

            minv = min(time_before_first_error.values())
            time_before_first_error['_total_'] = minv
            for comb in itertools.product(
                (duration_category, '_total_'),
                (rec['provider'], '_total_'),
                (rec['os_family'], '_total_'),
                (rec['view_type'], '_total_'),
                sorted(time_before_first_error.keys())
            ):
                dc, provider, os_family, view_type, eid = comb
                yield Record(
                    duration_category=dc,
                    error_id=eid,
                    view_type=view_type,
                    os_family=os_family,
                    provider=provider,
                    rel_time=float(time_before_first_error[eid])
                )


def is_good_vcid(vcid):
    if not vcid or vcid in {'-', "novcid"}:
        return False
    return True


@with_hints(output_schema=modified_schema(
    exclude=['histogram'],
    extend={
        'quantile_type': qt.String,
        'quantile_value': qt.Float
    }
))
def add_quantiles(recs):
    for rec in recs:
        dct = rec.to_dict()
        hist = dct.pop('histogram')
        for qpair in hist:
            ndct = copy.deepcopy(dct)
            ndct['quantile_type'] = 'q{}'.format(qpair[0])
            ndct['quantile_value'] = float(qpair[1])
            yield Record(**ndct)


def process_date(
    date, report, cluster, replace_mask=None, async_mode=False
):
    date_s = str(date)
    proxy = os.environ['YT_PROXY'].split('.')[0].lower()
    out_table_path = (
        "//home/videolog/time_before_first_error/yt/{date}".format(
            date=date
        )
    )
    input_table = (
        '//home/videoquality/vh_analytics/strm_cube_2/'
        '{}/sessions'.format(date)
    )
    client = YqlClient(
        db=proxy,
        token=os.environ["YQL_TOKEN"]
    )
    req = client.query(
        query=errors_query.replace('DATE', date_s),
        title='Time Before First Error | YQL'
    )
    req.run()
    all_errors = set(
        req.results.full_dataframe['error_id']
    )
    all_errors = {
        x for x in all_errors if x and x.endswith('_fatal') or
        x == 'Stalled_Other'
    }

    job = cluster.job()

    job.table(
        input_table
    ).filter(
        nf.custom(is_good_vcid, 'video_content_id'),
        nf.custom(lambda x: x > 0, 'view_time')
    ).map(
        with_hints(
            output_schema={
                'duration_category': qt.String,
                'error_id': qt.String,
                'view_type': qt.String,
                'os_family': qt.String,
                'provider': qt.String,
                'rel_time': qt.Float,
            }
        )(SessionMapper(all_errors)), intensity='ultra_cpu'
    ).groupby(
        'duration_category', 'error_id', 'view_type', 'os_family',
        'provider'
    ).aggregate(
        sessions=na.count(),
        histogram=na.quantile(
            'rel_time',
            [round(0.1 * x, 1) for x in range(1, 10)]
        )
    ).map(
        add_quantiles, intensity='ultra_cpu'
    ).project(
        ne.all(),
        fielddate=ne.const(date_s)
    ).put(
        out_table_path
    )

    job.run()

    client = ns.StatfaceClient(
        proxy='upload.stat.yandex-team.ru',
        username=os.environ['STAT_LOGIN'],
        password=os.environ['STAT_TOKEN']
    )

    print('Performing remote push of {}...'.format(
        out_table_path
    ))

    pub = ns.StatfaceReport().path(
        report
    ).scale('daily')

    if replace_mask:
        pub = pub.replace_mask(
            'fielddate'
        )

    pub = pub.client(
        client
    ).remote_publish(
        proxy=proxy,
        table_path=out_table_path,
        async_mode=async_mode,
        upload_config=False
    )


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--report', default='Video/Others/Strm/Stability/time_before_first_error')
    parser.add_argument('--from', default=None)
    parser.add_argument('--pool', default=None)
    parser.add_argument('--to', default=None)
    args = parser.parse_args()

    proxy = os.environ['YT_PROXY'].split('.')[0].title()
    cluster = getattr(clusters.yql, proxy)(
        token=os.environ['YT_TOKEN'],
        yql_token=os.environ['YQL_TOKEN']
    ).env(
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
        ),
        templates=dict(
            tmp_root='//home/videoquality/vh_analytics/tmp',
            title='Time Before First Error'
        )
    )
    if args.pool:
        cluster = cluster.update(pool=args.pool)

    from_ = getattr(args, 'from')
    to_ = getattr(args, 'to')

    if from_ and to_:
        dates = date_range(from_, to_)
    else:
        stat_headers = get_stat_headers()

        last_date_from_stat = get_dates_from_stat(
            headers=stat_headers,
            report=args.report,
            dimensions=[]
        )
        available_dates = sorted(
            get_date(x) for x in cluster.driver.client.search(
                root='//logs/strm-access-log/1d',
                path_filter=lambda x: get_date(x)
            )
        )

        dates = [x for x in available_dates if x > last_date_from_stat]

    for date in dates:
        print('processing {}'.format(date))
        process_date(date, args.report, cluster)


if __name__ == "__main__":
    main()
