#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import codecs
import re
import argparse
import datetime
import psutil
import pdb
import json
from pytils import (make_logger, safediv, yt_config_set_defaults,
                    parseparams, parsevars, push_to_razladki,
                    yt_get_date_from_table, get_yt_exists, SPEC10k,
                    from_bytes)
from collections import defaultdict, Counter
import yt.wrapper as yt
from monytoring import Monitoring
from v4_daily_yt import is_training

yt_exists = get_yt_exists(yt)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--nolock', action='store_true')
    parser.add_argument('--redo', action='store_true')
    parser.add_argument('--from', '-f', default=None)
    parser.add_argument('--to', '-t', default=None)
    args = parser.parse_args()

    logger = make_logger(os.path.abspath(__file__), debug=args.debug)

    yt_config_set_defaults(yt, logger)

    monitoring = Monitoring(
        prefix='//statbox/redir-log',
        filepath=os.path.abspath(__file__),
        default_id='//statbox/redir-log/2016-07-06',
        str_to_table_id=yt_get_date_from_table,
        process_table=process_table,
        get_srctables=get_srctables,
        logger=logger
    )

    monitoring.start(
        args,
        {
            'redo': args.redo
        }
    )


prefix = 'tech.portal-ads.'


def get_srctables(self):
    ti = ['{}/{}'.format(self.prefix, x) for x in
          yt.list('{}'.format(self.prefix))]
    return sorted(ti)[:-1]


def redir_map(rec):
    rec = from_bytes(rec)
    params = parseparams(rec['value'])
    if (params['yandexuid'] and
        params['unixtime'] and
        params['path'].startswith(prefix) and
        'ATOMS' in params['vars'] and
            ('browser.yandex.ru' in params['HTTP_REFERER'] or
             'product=browser' in params['vars'])):
        yield {
            'yandexuid': params['yandexuid'],
            'unixtime': params['unixtime'],
            'referer': normalize_url(params['HTTP_REFERER']),
            'vars': params['vars'],
        }


def landing_reduce_1(key, recs):
    showlandings = set()
    shows = {}
    for rec in recs:
        rec = from_bytes(rec)
        vrs = parsevars(rec['vars'])
        referer = rec['referer']
        if vrs['eventtype'] == 'show' and vrs['reqid']:
            try:
                shows[vrs['reqid']] = is_training(vrs)
            except ValueError:
                continue
        if ('browser.yandex.ru' in referer and
                (vrs['eventtype'] == 'showlanding' or
                    vrs['eventtype'] == 'install' and
                    vrs['reqid'] in showlandings) and
            vrs['reqid'] and
                vrs['reqid'] in shows):
            showlandings.add(vrs['reqid'])
            yield {
                'referer': referer,
                'eventtype': vrs['eventtype'],
                'collectpoolmode': shows[vrs['reqid']]
            }


def landing_reduce_2(key, recs):
    key = key['referer']
    dct = Counter()
    conv_cnt = Counter()
    for rec in recs:
        rec = from_bytes(rec)
        dct[rec['eventtype']] += 1
        if rec['collectpoolmode']:
            conv_cnt[rec['eventtype']] += 1
    conversion = safediv(conv_cnt['install'], conv_cnt['showlanding'])
    yield {
        'key': key,
        'conversion': conversion,
        'showlanding': dct['showlanding'],
        'product': conversion * dct['showlanding']
    }


def normalize_url(url):
    for prefix in ('http://', 'https://'):
        if url.startswith(prefix):
            url = url[len(prefix):]
    if '?' in url:
        url = url.split('?')[0]
    while url.endswith('/'):
        url = url[:-1]
    return url


def process_table(table, logger=None, redo=False):
    table0 = '//home/personalization/monitorings/browser_landings/{}/map'.format(
        yt_get_date_from_table(table).strftime('%Y-%m-%d')
    )
    table1 = '//home/personalization/monitorings/browser_landings/{}/reduce1'.format(
        yt_get_date_from_table(table).strftime('%Y-%m-%d')
    )
    table2 = '//home/personalization/monitorings/browser_landings/{}/reduce2'.format(
        yt_get_date_from_table(table).strftime('%Y-%m-%d')
    )

    if not yt_exists(table0) or redo:
        logger.info("{} -> {}".format(
            table, table0
        ))
        yt.run_map(
            redir_map,
            source_table=table,
            destination_table=table0,
            spec=SPEC10k
        )
        yt.run_sort(
            source_table=table0,
            destination_table=table0,
            sort_by=['yandexuid', 'unixtime']
        )
    if not yt_exists(table1) or redo:
        logger.info("{} -> {}".format(
            table0, table1
        ))
        yt.run_reduce(
            landing_reduce_1,
            source_table=table0,
            destination_table=table1,
            reduce_by='yandexuid'
        )
        yt.run_sort(
            source_table=table1,
            destination_table=table1,
            sort_by='referer'
        )
    if not yt_exists(table2) or redo:
        logger.info("{} -> {}".format(
            table1, table2
        ))
        yt.run_reduce(
            landing_reduce_2,
            source_table=table1,
            destination_table=table2,
            reduce_by='referer'
        )

    showlandings_total = 0
    dct = {}
    conv = {}
    logger.info('Getting records from {}...'.format(table2))
    for rec in yt.read_table(table2, raw=False):
        if 'desktop' in rec['key']:
            dct[rec['key']] = float(rec['product'])
            conv[rec['key']] = float(rec['conversion'])
            showlandings_total += int(rec['showlanding'])

    kpi = {}
    project = 'BrowserLandingEfficiency'
    for x in dct:
        kpi[x] = conv[x]
        logger.info('Pushing {}={} to razladki'.format(x, kpi[x]))
        data = {
            'param': '{}_kpi'.format(x),
            'value': kpi[x],
            'ts': int(yt_get_date_from_table(table).strftime('%s'))
        }
        push_to_razladki(data, project=project, override=True)
    data = {
        'param': 'total_kpi',
        'value': sum(dct.values()) / showlandings_total,
        'ts': int(yt_get_date_from_table(table).strftime('%s'))
    }
    push_to_razladki(data, project=project, override=True)
    yt.remove(table0)
    yt.remove(table1)


if __name__ == "__main__":
    main()
