#!/usr/bin/env python
# -*- coding: utf-8 -*-
# from __future__ import unicode_literals
from __future__ import division
import sys
import os
import codecs
import random
import argparse
from collections import defaultdict, Counter
import math
import json

from nile.api.v1 import (
    statface as ns,
    # Record
)
import getpass
import datetime
from decimal import Decimal, getcontext


def choose_interval(num, intervals):
    return [interval for interval in intervals
            if num >= interval[0] and num <= interval[1]][0]


def counter_quantile(counter, quantile):
    keys = sorted([x for x in counter if counter[x] != 0])
    kmapper = {}
    mover = 0
    mover_prev = 0
    for k in keys:
        mover = mover_prev + counter[k] - 1
        kmapper[(mover_prev, mover)] = k
        mover_prev = mover + 1
    length = sum(counter.values()) - 1  # not gonna work with zero counters
    if length <= 0:
        return 0
    target = length * quantile
    if int(target) == target:
        return kmapper[choose_interval(target, kmapper)]
    else:
        return ((kmapper[choose_interval(math.floor(target), kmapper)] +
                 kmapper[choose_interval(math.ceil(target), kmapper)]) / 2.0)


def count_crawl_kpi(cU, cUT, s_t, dT):
    if not (cU or cUT):
        return
    if not (s_t or dT):
        return
    cU = cU or 0
    cUT = cUT or 0
    s_t = s_t or 0
    dT = dT or 0
    result = max(cU, cUT) - max(s_t, dT)
    result = max(0, round(result / 60, 2))
    return result


def count_crawl_kpi_2(cU, cUT, s_t, dT):
    if not (cU or cUT):
        return
    if not (s_t or dT):
        return
    cU = cU or 0
    cUT = cUT or 0
    s_t = s_t or 0
    dT = dT or 0
    result = max(cU, cUT) - min(s_t, dT)
    result = max(0, round(result / 60, 2))
    return result


def count_content_kpi(fSTS, cU, cUT):
    if not (cU or cUT):
        return
    fSTS = fSTS or 0
    cU = cU or 0
    cUT = cUT or 0
    result = fSTS - max(cU, cUT)
    result = max(0, round(result / 60, 2))
    return result


statuses = {
    'CRAWLED',
    'DELETED',
    'DELETED_MIDDLESEARCH',
    'DOWNLOAD_ERROR',
    'FILTERED',
    'INDEXED_CANOURL',
    'NOT_CRAWLABLE',
    'NOT_CRAWLED',
    'NOT_CRAWLED_CANOURL',
    'NOT_DISCOVERED',
    'NOT_INDEXED',
    'NO_THUMB',
    'NO_THUMB_FOUND',
    'SEARCHABLE_BY_URL',
    'SEARCHABLE_WITH_ATTRS',
    'SEARCHABLE_WO_PLAYER',
}


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--results', '-r', required=True)
    parser.add_argument('--platform', '-p', required=True)
    parser.add_argument('--musca', '-m', required=True)
    parser.add_argument('--date', '-d', required=True)
    parser.add_argument('--output', '-o', default='output.json')
    parser.add_argument('--output_cc', '-occ', default='output_cc.json')
    parser.add_argument('--stat_login', '-sl', default='robot_pecheny')
    parser.add_argument('--stat_password', '-sp', required=True)
    parser.add_argument('--sp_report', '-spr', default=None)
    args = parser.parse_args()

    try:
        args.date = str(datetime.datetime.strptime(args.date, '%Y-%m-%d'))
    except (ValueError, TypeError):
        args.date = str(datetime.date.today())
    print(args.date)

    obj = json.load(open(args.results))

    musca = json.load(open(args.musca))

    merged_obj = {
        x['data']['grouping_url']: x
        for x in obj
    }

    c = defaultdict(lambda: defaultdict(lambda: Counter()))
    kpi = defaultdict(lambda: defaultdict(lambda: Counter()))

    recs = []

    for x in musca:
        url = x['canon_url'].split('//', 2)[1]
        if url in merged_obj:
            x['metrics'] = merged_obj[url]
    print('len of objects with metrics: {}'.format(len(obj)))
    print('len of musca output: {}'.format(len(musca)))
    print('number of successfully joined objects: {}'.format(
        len([1 for x in musca if 'metrics' in x])
    ))

    reaction = defaultdict(lambda: Counter())
    cat_counter = Counter()
    cat_counter_metrics = Counter()
    cat_yavideospike = Counter()
    cat_yawebspike = Counter()
    hours_yavideo = defaultdict(lambda: Counter())
    hours_yaweb = defaultdict(lambda: Counter())
    musca_counter = defaultdict(lambda: Counter())
    skipped = 0
    for x in musca:
        host_cat_tup = ['_total_']
        host_cat = x['host_cat']
        cat_counter[host_cat] += 1
        if host_cat not in {'PORN', 'UNKNOWN'}:
            host_cat_tup.append('NOT_PORN')
        host_cat_tup.append(host_cat)
        if x.get('disable_sbr') != 1:
            for host_cat in host_cat_tup:
                musca_counter[host_cat][x['musca']] += x['weight']
        if 'metrics' in x:
            cat_counter_metrics[host_cat] += 1
            deltas_dict = dict(x['metrics']['metrics']['deltas'])
            if x['metrics'].get('data', {}).get('skipUrlForUkropFresh') != 1:
                for z in deltas_dict:
                    if (
                        isinstance(deltas_dict[z], int) or
                        isinstance(deltas_dict[z], float)
                    ):
                        delta = round(max(deltas_dict[z], 0) / 60.0, 2)
                        for host_cat in host_cat_tup:
                            c[host_cat][z][delta] += 1
            else:
                skipped += 1
            ts = defaultdict(lambda: 0, x['metrics']['metrics']['timestamps'])
            crawl_kpi = count_crawl_kpi(
                ts['crawledUkrop'],
                ts['crawledUkropThumb'],
                x['spike_ts'],
                ts['discoveredTime'],
            )
            crawl_kpi_2 = count_crawl_kpi_2(
                ts['crawledUkrop'],
                ts['crawledUkropThumb'],
                x['spike_ts'],
                ts['discoveredTime'],
            )
            if crawl_kpi is not None:
                x['crawl_kpi'] = crawl_kpi
                for host_cat in host_cat_tup:
                    kpi[host_cat]['crawlKPI'][crawl_kpi] += 1
            if crawl_kpi_2 is not None:
                x['crawl_kpi_2'] = crawl_kpi_2
                for host_cat in host_cat_tup:
                    kpi[host_cat]['crawlKPI2'][crawl_kpi_2] += 1
            if ts['firstSentToSaas']:
                content_kpi = count_content_kpi(
                    ts['firstSentToSaas'],
                    ts['crawledUkrop'],
                    ts['crawledUkropThumb'],
                )
                if content_kpi is not None:
                    x['content_kpi'] = content_kpi
                    for host_cat in host_cat_tup:
                        kpi[host_cat]['contentKPI'][content_kpi] += 1
        if not x['spike_ts']:
            continue
        for host_cat in host_cat_tup:
            reaction[host_cat]['spike_ts'] += 1
        if x['spike_ts_yandex_video']:
            for host_cat in host_cat_tup:
                reaction[host_cat]['spike_ts_yandex_video'] += 1
            cat_yavideospike[host_cat] += 1
            delta = x['spike_ts_yandex_video'] - x['spike_ts']
            delta_r = round(max(0, delta) / 60.0, 2)
            for host_cat in host_cat_tup:
                kpi[host_cat]['spikeTs_yaVideoSpikeTs'][delta_r] += 1
                hours_yavideo[host_cat][round(delta_r / 60.0)] += 1
        if x['spike_ts_yandex_web']:
            for host_cat in host_cat_tup:
                reaction[host_cat]['spike_ts_yandex_web'] += 1
            cat_yawebspike[host_cat] += 1
            delta = x['spike_ts_yandex_web'] - x['spike_ts']
            delta_r = round(max(0, delta) / 60.0, 2)
            for host_cat in host_cat_tup:
                kpi[host_cat]['spikeTs_yaWebSpikeTs'][delta_r] += 1
                hours_yaweb[host_cat][round(delta_r / 60.0)] += 1

    print('number of skipped urls: {}'.format(skipped))
    print('categories breakdown:')
    for pair in cat_counter.most_common():
        print('{}: {}'.format(pair[0], pair[1]))
    print('yavideo spikes breakdown:')
    for pair in cat_yavideospike.most_common():
        print('{}: {}'.format(pair[0], pair[1]))
    print('yaweb spikes breakdown:')
    for pair in cat_yawebspike.most_common():
        print('{}: {}'.format(pair[0], pair[1]))
    print('==YAWEB HOURS BREAKDOWN==\n')
    for cat in hours_yaweb:
        print('===={}===='.format(cat))
        for h in range(0, 24):
            print('{:02} hours: {} urls'.format(h, hours_yaweb[cat][h]))
    print('==YAVIDEO HOURS BREAKDOWN==\n')
    for cat in hours_yavideo:
        print('===={}===='.format(cat))
        for h in range(0, 24):
            print('{:02} hours: {} urls'.format(h, hours_yavideo[cat][h]))

    for x in musca:
        if 'html' in x:
            del x['html']
    json.dump(
        musca, codecs.open(args.output, 'w', 'utf8'),
        indent=2, ensure_ascii=False, sort_keys=True
    )
    ccsum = sum(cat_counter.values())
    for x in cat_counter:
        cat_counter[x] = cat_counter[x] / ccsum
    ccsum = sum(cat_counter_metrics.values())
    for x in cat_counter_metrics:
        cat_counter_metrics[x] = cat_counter_metrics[x] / ccsum
    json.dump(
        {'all': cat_counter, 'metrics_only': cat_counter_metrics},
        codecs.open(args.output_cc, 'w', 'utf8'),
        indent=2, ensure_ascii=False, sort_keys=True
    )

    recs_kpi = []
    recs_spikes = []

    for host_cat in reaction:
        for system in ['web', 'video']:
            recs_spikes.append(dict(
                fielddate=args.date,
                host_cat=host_cat,
                platform=args.platform,
                lag='yandex_{}'.format(system),
                value=reaction[host_cat][
                    'spike_ts_yandex_{}'.format(system)
                ] / reaction[host_cat]['spike_ts']
            ))

    for host_cat in kpi:
        for d in kpi[host_cat]:
            for quantile in [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99]:
                recs_kpi.append(dict(
                    fielddate=args.date,
                    delta=d,
                    host_cat=host_cat,
                    platform=args.platform,
                    value=counter_quantile(kpi[host_cat][d], quantile),
                    quantile=quantile
                ))

    for host_cat in c:
        for d in c[host_cat]:
            for quantile in [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99]:
                recs.append(dict(
                    fielddate=args.date,
                    delta=d,
                    host_cat=host_cat,
                    platform=args.platform,
                    value=counter_quantile(c[host_cat][d], quantile),
                    quantile=quantile
                ))

    client = ns.StatfaceClient(
        proxy='upload.stat.yandex-team.ru',
        username=args.stat_login,
        password=args.stat_password
    )

    print('pushing {} recs to fresh-deltas'.format(len(recs)))
    ns.StatfaceReport().path(
        'Video/Others/fresh-deltas'
    ).scale(
        'daily'
    ).client(
        client
    ).data(
        recs
    ).publish()

    print('pushing {} recs to fresh-sbr-kpi'.format(len(recs_kpi)))
    ns.StatfaceReport().path(
        'Video/Others/fresh-sbr-kpi'
    ).scale(
        'daily'
    ).client(
        client
    ).data(
        recs_kpi
    ).publish()

    print(
        'pushing {} recs to fresh-sbr-spike-coverage'.format(len(recs_spikes))
    )
    ns.StatfaceReport().path(
        'Video/Others/fresh-sbr-spike-coverage'
    ).scale(
        'daily'
    ).client(
        client
    ).data(
        recs_spikes
    ).publish()

    recs_s = []
    recs_sp = []

    for host_cat in musca_counter:
        zero_statuses = statuses - set(musca_counter[host_cat].keys())
        total = sum(musca_counter[host_cat].values())
        for d in musca_counter[host_cat]:
            recs_s.append(dict(
                fielddate=args.date,
                status=d,
                host_cat=host_cat,
                share=str(musca_counter[host_cat][d]),
                share_true=round(musca_counter[host_cat][d] / total, 4)
            ))
        for d in zero_statuses:
            recs_s.append(dict(
                fielddate=args.date,
                status=d,
                host_cat=host_cat,
                share=0,
                share_true=0
            ))
        try:
            ps_value = (
                (
                    musca_counter[host_cat]['SEARCHABLE_WITH_ATTRS'] +
                    musca_counter[host_cat]['INDEXED_CANOURL']
                ) / (
                    sum(musca_counter[host_cat].values()) -
                    musca_counter[host_cat]['SEARCHABLE_WO_PLAYER'] -
                    musca_counter[host_cat]['DELETED'] -
                    musca_counter[host_cat]['RKN_BANNED'] -
                    musca_counter[host_cat]['NOT_CRAWLABLE'] -
                    musca_counter[host_cat]['DEAD_URL'] -
                    musca_counter[host_cat]['BAD_URL']
                )
            )
        except ZeroDivisionError:
            ps_value = 0
        recs_sp.append(
            dict(
                fielddate=args.date,
                status=d,
                host_cat=host_cat,
                value=ps_value
            )
        )

    print('pushing {} recs to fresh-sbr-1'.format(len(recs_s)))
    ns.StatfaceReport().path(
        'Video/Others/fresh-sbr-1'
    ).scale('daily').client(
        client
    ).data(
        recs_s
    ).publish()

    if args.sp_report:
        ns.StatfaceReport().path(
            args.sp_report
        ).scale('daily').client(
            client
        ).data(
            recs_sp
        ).publish()


if __name__ == "__main__":
    main()
