#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import os
import codecs
import argparse
from nile.api.v1 import (
    clusters,
    aggregators as na,
    Record
)
import re
import getpass
from collections import defaultdict
import datetime
from pytils import (
    yt_get_date_from_table, yt_config_set_defaults, make_logger,
    push_to_razladki, from_bytes, yt_date_to_ts
)
from monytoring import Monitoring
import yt.wrapper as yt


def process_distr_obj(distr_obj, referer):
    if distr_obj != 'portal_popup':
        return distr_obj
    if referer == 'yandex.ru':
        return 'portal_popup_morda'
    elif referer == 'yandex.ru/search':
        return 'portal_popup_serp'
    return distr_obj


def antinag_map(groups):
    for key, records in groups:
        user_id = ""
        reqid = ""
        distr_obj = ""
        for record in records:
            if record.eventtype in {"show", "trueinstall"}:
                if (
                    record.distr_obj.decode('utf8', errors='replace').replace(
                        ".training", ""
                    ) in
                    {"promofooter", "promofooter_mobile"} or
                    record.eventtype == "show" and not record.product or
                    record.eventtype == "show" and not record.distr_obj
                ):
                    continue
                os_ = ""
                if hasattr(record, 'os') and record.os in {'iOS', 'Android'}:
                    os_ = '_{}'.format(record.os)
                elif (
                    hasattr(record, 'showid') and
                    os_dict.get(record.showid[6:8], '') in {'iOS', 'Android'}
                ):
                    os_ += '_{}'.format(os_dict[record.showid[6:8]])
                if getattr(record, 'yandexuid', ''):
                    user_id = record.yandexuid
                elif getattr(record, 'device_id', ''):
                    user_id = record.device_id
                reqid = record.reqid
                distr_obj = process_distr_obj(
                    record.distr_obj, record.referer
                ) + os_
                try:
                    unixtime = int(record.unixtime)
                except:
                    continue
                if record.eventtype == 'trueinstall' and os_:
                    unixtime += 10800
                if distr_obj:
                    yield Record(
                        user_id=user_id,
                        distr_obj=distr_obj,
                        reqid=reqid,
                        eventtype=record.eventtype,
                        product=record.product,
                        unixtime=unixtime
                    )


def antinag_reduce(groups, t1, t2):
    for key, records in groups:
        i = 0
        event_times = defaultdict(lambda: [])
        for record in records:
            i += 1
            if i >= 100:
                break
            event_times[record.eventtype].append(
                (record.unixtime, record.reqid, record.product)
            )
        if not event_times['trueinstall']:
            continue
        event_times['trueinstall'] = sorted(
            event_times['trueinstall'], key=lambda x: x[0]
        )
        first_close = event_times['trueinstall'][0][0]
        product = event_times['trueinstall'][0][2]
        shows = [
            x for x in event_times['show']
            if x[0] > first_close and x[2] == product
        ]
        t1(Record(
            distr_obj=key.distr_obj,
            shows_after_ti_binary=(1 if shows else 0),
            shows_after_ti=len(shows)
        ))
        for x in shows:
            t2(Record(
                requestId=x[1],  # reqid
                subkey=key.distr_obj
            ))


def get_srctables(self):
    return sorted(x for x in yt.search(
        root=self.prefix,
        path_filter=lambda x: re.search(
            r'v4_daily/[0-9]{4}-[0-9]{2}-[0-9]{2}/atom_banners$', x
        )
    ))


os_dict = {}


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--nolock', action='store_true')
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--from', '-f')
    parser.add_argument('--to', '-t')
    args = parser.parse_args()

    yt_config_set_defaults(yt)

    logger = make_logger(os.path.abspath(__file__), debug=args.debug)

    with codecs.open('context_dict.txt', 'r', 'utf8') as f:
        for line in f:
            sp = line.strip().split()
            if sp and sp[0] == '3':
                os_dict[sp[1]] = sp[2]

    monitoring = Monitoring(
        prefix='//home/personalization/v4_daily',
        filepath=os.path.abspath(__file__),
        default_id='//home/personalization/v4_daily/2016-10-30/atom_banners',
        str_to_table_id=yt_get_date_from_table,
        get_srctables=get_srctables,
        process_table=process_table,
        logger=logger,
        confirmation=True,
        today=False
    )

    monitoring.start(args, {'debug': args.debug})


def process_table(table, logger=None, debug=False):
    date = yt_get_date_from_table(table)
    date_s = format(date)

    job_root = 'home/personalization/monitorings/antinag_ti'

    out_table = '//{}/shows_after_close_aggregated/{}'.format(
        job_root, date_s
    )

    hahn = clusters.Hahn(
        pool='search-research_{}'.format(getpass.getuser())
    ).env(
        templates=dict(
            job_root=job_root,
            date=date_s
        )
    )

    job = hahn.job()

    an_mapped = job.table(
        table
    ).groupby("key").reduce(
        antinag_map
    )

    if debug:
        an_mapped.put(
            '$job_root/map_test/{}'.format(date)
        )

    an_reduced, reqids = an_mapped.groupby(
        'user_id', 'distr_obj'
    ).reduce(
        antinag_reduce
    )

    an_reduced.groupby(
        'distr_obj'
    ).aggregate(
        shows_after_ti_mean=na.mean('shows_after_ti'),
        shows_after_ti_total=na.sum('shows_after_ti'),
        shows_after_ti_binary_mean=na.mean('shows_after_ti_binary'),
        total_users=na.count()
    ).put(
        out_table
    )

    job.run()

    project = 'AtomAntiNagTI'
    for rec in yt.read_table(out_table, raw=False):
        rec = from_bytes(rec)
        data = {
            'param': rec['distr_obj'],
            'value': rec['shows_after_ti_binary_mean'],
            'ts': yt_date_to_ts(date_s)
        }
        push_to_razladki(data=data,
                         project=project,
                         override=True,
                         logger=logger)
        data = {
            'param': rec['distr_obj'] + '_total',
            'value': rec['shows_after_ti_total'],
            'ts': yt_date_to_ts(date_s)
        }
        push_to_razladki(data=data,
                         project=project,
                         override=True,
                         logger=logger)
    if datetime.datetime.now().date() != date:
        return True


if __name__ == "__main__":
    main()
