#!/usr/bin/env python
# -*- coding: utf-8 -*-
# from __future__ import unicode_literals
from __future__ import division
import os
import codecs
import argparse
from nile.api.v1 import (
    clusters,
    aggregators as na,
    Record
)
import re
import getpass
from collections import defaultdict
import datetime
from pytils import (
    yt_config_set_defaults, make_logger,
    push_to_razladki, from_bytes, yt_date_to_ts
)
from monytoring import Monitoring
import yt.wrapper as yt


def antinag_reduce(groups):
    for key, records in groups:
        i = 0
        event_times = defaultdict(lambda: [])
        for record in records:
            i += 1
            if i >= 100:
                break
            event_times[record.eventtype].append(
                record.unixtime
            )
        if not event_times['close']:
            continue
        event_times['close'] = sorted(
            event_times['close'], key=lambda x: x[0]
        )
        first_close = event_times['close'][0]
        shows = [x for x in event_times['show'] if x > first_close]
        yield Record(
            shows_after_close_binary=(1 if shows else 0),
            shows_after_close=len(shows)
        )


def get_srctables(self):
    return sorted(x for x in yt.search(
        root=self.prefix,
        path_filter=lambda x: re.search(
            r'v4_daily/[0-9]{4}-[0-9]{2}-[0-9]{2}/atom_banners$', x
        )
    ))


os_dict = {}


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--nolock', action='store_true')
    parser.add_argument('--from', '-f')
    parser.add_argument('--to', '-t')
    args = parser.parse_args()

    yt_config_set_defaults(yt)

    logger = make_logger(os.path.abspath(__file__))

    with codecs.open('context_dict.txt', 'r', 'utf8') as f:
        for line in f:
            sp = line.strip().split()
            if sp and sp[0] == '3':
                os_dict[sp[1]] = sp[2]

    monitoring = Monitoring(
        prefix='//home/personalization/v4_daily',
        filepath=os.path.abspath(__file__),
        default_id='2017-04-08',
        process_table=process_table,
        logger=logger,
        mode='dates',
        confirmation=True,
        today=False
    )

    monitoring.start(args)


def parseparams(value, sep='\t'):
    if not isinstance(value, unicode):
        value = value.decode('utf8', errors='replace')
    tabs = value.split(sep)
    result = {}
    for x in tabs:
        if len(x.split('=')) > 1:
            result[x.split('=')[0]] = '='.join(x.split('=')[1:])
        else:
            result[x] = 'SINGLE'
    return defaultdict(lambda: '', result)


def parse_path(lid):
    if isinstance(lid, basestring):
        lid = lid.split('.')
    if lid[:3] != ['v14', 'dist', 'popup']:
        return {}
    source = lid[-2]
    action = lid[-1]
    return {'source': source, 'action': action}


def bs_map(records):
    for rec in records:
        parsed = {}
        if not getattr(rec, 'blocks', ''):
            continue
        for block in rec.blocks.decode('utf8', errors='replace').split('\t'):
            if block.startswith('v14.dist.popup'):
                parsed = parse_path(block)
                break
        cookies = parseparams(getattr(rec, "cookies", ""), sep='; ')
        if (
            'yandexuid' in cookies and
            cookies['yandexuid'] and
            parsed.get('source', '') == 'yabs'
        ):
            yield Record(
                yandexuid=cookies['yandexuid'],
                eventtype='show',
                unixtime=str(rec.unixtime),
                source='yabs'
            )


def redir_map(records):
    for rec in records:
        parsed_value = parseparams(rec.value)
        if (
            parsed_value.get('yandexuid', '') and
            parsed_value.get('unixtime', '') and
            parsed_value.get('lid', '')
        ):
            parsed_lid = parse_path(parsed_value['lid'])
            if (
                parsed_lid.get('action', '') == 'no' and
                parsed_lid.get('source', '') == 'yabs'
            ):
                yield Record(
                    source='yabs',
                    yandexuid=parsed_value['yandexuid'],
                    unixtime=str(parsed_value['unixtime']),
                    eventtype='close'
                )


def process_table(date, logger=None):
    table_bs = '//logs/{{search-blockstat-log,morda-blockstat-log,mobreport-blockstat-log}}/1d/{}'.format(date)
    table_redir = '//logs/redir-log/1d/{}'.format(date)

    date_s = format(date)

    job_root = 'home/personalization/monitorings/antinag_bs'

    out_table = '//{}/shows_after_close_aggregated/{}'.format(
        job_root, date_s
    )

    hahn = clusters.Hahn(
        pool='search-research_{}'.format(getpass.getuser())
    ).env(
        templates=dict(
            job_root=job_root,
            date=date_s
        )
    )

    job = hahn.job()

    an_mapped_bs = job.table(
        table_bs
    ).map(
        bs_map
    )

    an_mapped_redir = job.table(
        table_redir
    ).map(
        redir_map
    )

    an_everything = job.concat(
        an_mapped_bs, an_mapped_redir
    ).sort(
        'yandexuid', 'unixtime'
    )

    an_everything.groupby(
        'yandexuid'
    ).reduce(
        antinag_reduce
    ).aggregate(
        shows_after_close_mean=na.mean('shows_after_close'),
        shows_after_close_total=na.sum('shows_after_close'),
        shows_after_close_binary_mean=na.mean('shows_after_close_binary'),
        total_users=na.count()
    ).put(
        out_table
    )

    job.run()

    project = 'BKAntiNag'
    for rec in yt.read_table(out_table, raw=False):
        rec = from_bytes(rec)
        data = {
            'param': 'portal_popup',
            'value': rec['shows_after_close_binary_mean'],
            'ts': yt_date_to_ts(date_s)
        }
        push_to_razladki(data=data,
                         project=project,
                         override=True,
                         logger=logger)
        data = {
            'param': 'portal_popup' + '_total',
            'value': rec['shows_after_close_total'],
            'ts': yt_date_to_ts(date_s)
        }
        push_to_razladki(data=data,
                         project=project,
                         override=True,
                         logger=logger)
    if datetime.datetime.now().date() != date:
        return True


if __name__ == "__main__":
    main()
