#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import re
import pdb
import codecs
import argparse
from pytils import (parseparams, parsevars,
                    yt_config_set_defaults, make_logger, push_to_razladki,
                    yt_date_to_ts, get_yt_exists, from_bytes)
from collections import defaultdict, Counter
from monytoring import Monitoring
import yt.wrapper as yt
import json


yt_exists = get_yt_exists(yt)


prefix = 'tech.portal-ads.'
re_date = re.compile(r'[0-9]{4}-[0-9]{2}-[0-9]{2}')


SPEC = {
    "job_count": 10000,
    "map_job_count": 10000,
    "reduce_job_count": 10000,
}


def first_map(rec):
    rec = from_bytes(rec)
    params = parseparams(rec['value'])
    if (params['yandexuid'] and
        params['unixtime'] and
        params['path'].startswith(prefix) and
            'ATOMS' in params['vars']):
        yield {
            'yandexuid': params['yandexuid'],
            'unixtime': params['unixtime'],
            'distr_obj': params['path'][len(prefix):],
            'vars': params['vars']
        }


def first_reduce(key, recs):
    key = from_bytes(dict(key))
    key = key['yandexuid']
    events = defaultdict(lambda: defaultdict(lambda: False))
    for rec in recs:
        rec = from_bytes(rec)
        pvars = parsevars(rec['vars'])
        if pvars['reqid']:
            distr_obj = rec['distr_obj']
            if pvars['eventtype'] == 'show':
                events[distr_obj]['show'] = True
                if events[distr_obj]['close']:
                    events[distr_obj]['show_after_close'] = True
            if pvars['eventtype'] == 'close':
                events[distr_obj]['close'] = True
    for distr_obj in events:
        if events[distr_obj]['show']:
            if events[distr_obj]['show_after_close']:
                yield {
                    'distr_obj': distr_obj,
                    'show_after_close': 1
                }
            else:
                yield {
                    'distr_obj': distr_obj,
                    'show_after_close': 0
                }


def safediv(x, y):
    try:
        return x / y
    except ZeroDivisionError:
        return 0


def second_reduce(key, recs):
    key = from_bytes(dict(key))
    key = key['distr_obj']
    val = Counter()
    for rec in recs:
        rec = from_bytes(rec)
        val[int(rec['show_after_close'])] += 1
    yield {
        'distr_obj': key,
        'show_after_close_rate': format(round(safediv(val[1],
                                                      sum(val.values())), 5))
    }


def yt_date_from_table(table):
    try:
        return re_date.search(table).group(0)
    except:
        return None


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--from', '-f')
    parser.add_argument('--to', '-t')
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--nolock', action='store_true')
    args = parser.parse_args()

    logger = make_logger(os.path.abspath(__file__), debug=args.debug)

    monitoring = Monitoring(
        prefix='//statbox/redir-log',
        filepath=os.path.abspath(__file__),
        default_id='//statbox/redir-log/2016-06-19',
        str_to_table_id=yt_date_from_table,
        process_table=process_table,
        logger=logger
    )

    monitoring.start(args)


def process_table(table, logger=None, redo=False):
    date = yt_date_from_table(table)
    src = yt.TablePath(
        '//statbox/redir-log/{}'.format(date),
        columns=['value']
    )
    tmp1 = '//home/personalization/monitorings/afterclose/{}/tmp1'.format(
        date
    )
    tmp2 = '//home/personalization/monitorings/afterclose/{}/tmp2'.format(
        date
    )
    out = '//home/personalization/monitorings/afterclose/{}/final'.format(
        date
    )
    if not yt_exists(tmp1) or redo:
        yt.run_map(
            first_map,
            source_table=src,
            destination_table=tmp1,
            spec=SPEC
        )
        yt.run_sort(
            source_table=tmp1,
            destination_table=tmp1,
            sort_by=['yandexuid', 'unixtime']
        )
    if not yt_exists(tmp2) or redo:
        yt.run_reduce(
            first_reduce,
            source_table=tmp1,
            destination_table=tmp2,
            reduce_by='yandexuid'
        )
        yt.run_sort(
            source_table=tmp2,
            destination_table=tmp2,
            sort_by='distr_obj'
        )
    yt.run_reduce(
        second_reduce,
        source_table=tmp2,
        destination_table=out,
        reduce_by='distr_obj'
    )

    project = 'AtomAntiNag'
    for rec in yt.read_table(out, raw=False):
        rec = from_bytes(rec)
        data = {
            'param': rec['distr_obj'],
            'value': rec['show_after_close_rate'],
            'ts': yt_date_to_ts(date)
        }
        push_to_razladki(data=data,
                         project=project,
                         override=True,
                         logger=logger)

    yt.remove(tmp1)
    yt.remove(tmp2)


if __name__ == "__main__":
    main()
