#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import os
import argparse
import math
import pdb
from pytils import (parseparams, parsevars, from_bytes,
                    yt_config_set_defaults, make_logger,
                    safediv, yt_get_date_from_table,
                    push_to_razladki)
from collections import defaultdict, Counter
from monytoring import Monitoring
from v4_daily_yt import is_training, normalize_host
import yt.wrapper as yt


# from http://code.activestate.com/recipes/511478/

def percentile(N, percent, key=lambda x: x):
    """
    Find the percentile of a list of values.

    @parameter N - is a list of values. Note N MUST BE already sorted.
    @parameter percent - a float value from 0.0 to 1.0.
    @parameter key - optional key function to compute value from each element of N.

    @return - the percentile of the values
    """
    if not N:
        return None
    k = (len(N) - 1) * percent
    f = math.floor(k)
    c = math.ceil(k)
    if f == c:
        return key(N[int(k)])
    d0 = key(N[int(f)]) * (c - k)
    d1 = key(N[int(c)]) * (k - f)
    return d0 + d1

# end of snippet


prefix = 'tech.portal-ads.'


def redir_map(rec):
    rec = from_bytes(rec)
    params = parseparams(rec['value'])
    if (params['path'].startswith(prefix) and
            'ATOMS' in params['vars']):
        vrs = parsevars(params['vars'])
        if ((vrs['eventtype'] == 'show' and is_training(vrs))
                or (vrs['eventtype'] == 'click')):
            yield {
                'showid': vrs['showid'],
                'distr_obj': params['path'][len(prefix):],
                'referer': normalize_host(params['HTTP_REFERER']),
                'vars': params['vars'],
            }


def creative_quality_reduce(key, recs):
    showids = set()
    for rec in recs:
        rec = from_bytes(rec)
        vrs = parsevars(rec['vars'])
        distr_obj = rec['distr_obj']
        if vrs['eventtype'] == 'show':
            if is_training(vrs):
                showids.add(vrs['showid'])
            yield {
                'distr_obj': distr_obj,
                'bannerid': vrs['bannerid'],
                'eventtype': 'show'
            }
        elif vrs['eventtype'] == 'click' and vrs['showid'] in showids:
            yield {
                'distr_obj': distr_obj,
                'bannerid': vrs['bannerid'],
                'eventtype': 'click'
            }


def get_middle(x):
    if len(x) % 2:
        return x[x // 2 + 1]
    return x // 2


def creative_quality_reduce2(key, recs):
    distr_obj = key['distr_obj'].decode('utf8', errors='replace')
    counter = defaultdict(lambda: Counter())
    for rec in recs:
        rec = from_bytes(rec)
        counter[rec['bannerid']][rec['eventtype']] += 1
    srtd = sorted((counter[x]['click'] / sum(counter[x].values())
                   for x in counter), reverse=True)
    yield {
        'distr_obj': distr_obj,
        'type': 'top1',
        'value': format(round(srtd[0], 5))
    }
    yield {
        'distr_obj': distr_obj,
        'type': 'avg',
        'value': format(round(safediv(sum(srtd), len(srtd)), 5))
    }
    if percentile(srtd, 0.5) > 0:
        yield {
            'distr_obj': distr_obj,
            'type': 'p50',
            'value': format(percentile(srtd, 0.5))
        }
    if percentile(srtd, 0.9) > 0:
        yield {
            'distr_obj': distr_obj,
            'type': 'p90',
            'value': format(percentile(srtd, 0.9))
        }


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--from', '-f', default=None)
    parser.add_argument('--to', '-t', default=None)
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--nolock', action='store_true')
    parser.add_argument('--persistent', action='store_true')
    args = parser.parse_args()

    logger = make_logger(os.path.abspath(__file__), debug=args.debug)
    yt_config_set_defaults(yt, logger=logger)

    monitoring = Monitoring(
        prefix='//statbox/redir-log',
        filepath=os.path.abspath(__file__),
        default_id='//statbox/redir-log/2016-07-18',
        str_to_table_id=yt_get_date_from_table,
        process_table=process_table,
        logger=logger
    )

    monitoring.start(
        args,
        {
            'persistent': args.persistent,
        }
    )


def process_table(table, logger=None, persistent=False):
    tdate = yt_get_date_from_table(table).strftime('%Y%m%d')
    table1 = '//tmp/personalization/creative_quality/{}_reduce1'.format(
        tdate
    )
    table2 = '//tmp/personalization/creative_quality/{}_reduce2'.format(
        tdate
    )
    logger.info('{} -> {}'.format(table, table1))
    yt.run_map_reduce(
        mapper=redir_map,
        reducer=creative_quality_reduce,
        source_table=table,
        destination_table=table1,
        reduce_by="distr_obj"
    )
    pdb.set_trace()
    yt.run_sort(
        source_table=table1,
        destination_table=table1,
        sort_by='distr_obj'
    )
    yt.run_reduce(
        creative_quality_reduce2,
        source_table=table1,
        destination_table=table2,
        reduce_by="distr_obj"
    )
    for rec in yt.read_table(table2, raw=False):
        rec = from_bytes(rec)
        param = rec['distr_obj'] + '_' + rec['type']
        value = float(rec['value'])
        data = {
            'param': param,
            'value': value,
            'ts': int(yt_get_date_from_table(table).strftime('%s'))
        }
        project = 'CreativesEfficiency'
        push_to_razladki(data, project=project, override=True)
    if not persistent:
        yt.remove(table1)
        yt.remove(table2)


if __name__ == "__main__":
    main()
