# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os

def map_block(block):
    if not block:
        return '-'
    elif '%' in block:
        return '-'
    elif 'adw' in block:
        return 'adw'
    elif 'digital' in block:
        return 'digital'
    elif 'efir' in block:
        return 'efir'
    elif 'fb_' in block:
        return 'fb'
    elif 'gdn' in block:
        return 'gdn'
    elif 'media-episode' in block:
        return 'media-episode'
    elif 'media-footer' in block:
        return 'media-footer'
    elif 'morda_teaser' in block:
        return 'morda_teaser'
    elif 'mt_' in block:
        return 'mt'
    elif 'rsya' in block:
        return 'rsya'
    elif 'serp_podteaser' in block:
        return 'serp_podteaser'
    elif 'tv_online' in block:
        return 'tv_online'
    elif 'vk_' in block:
        return 'vk'
    elif 'youtube' in block:
        return 'youtube'
    elif 'celeb' in block:
        return 'celeb'
    elif 'searchapp_mordacard' in block:
        return 'searchapp_mordacard'
    else:
        return block

def change_from_blocks(recs):

    all_paid = ['tv_online', 'rsya', 'gdn', 'morda_teaser', 'adw', 'digital', 'mt', 'efir', 'fb', 'serp_podteaser', 'vk', 'celeb', 'youtube']

    for rec in recs:

        if rec.from_block:
            mapped_from_block = map_block(rec.from_block)
        else:
            mapped_from_block = '-'

        if mapped_from_block in all_paid:
            yield Record(date = rec.date, source = rec.source,  mapped_from_block = mapped_from_block,
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'no', changed_source = 'no', hb = rec.hb)
            yield Record(date = rec.date, source = rec.source,  mapped_from_block = 'all_paid',
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'yes', changed_source = 'no', hb = rec.hb)
            if 'touch' in rec.source or 'appsearch' in rec.source:
                yield Record(date = rec.date, source = u'Эфир-тач',  mapped_from_block = mapped_from_block,
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'no', changed_source = 'yes', hb = rec.hb)
                yield Record(date = rec.date, source = u'Эфир-тач',  mapped_from_block = 'all_paid',
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'yes', changed_source = 'yes', hb = rec.hb)
            else:
                yield Record(date = rec.date, source = u'Эфир',  mapped_from_block = mapped_from_block,
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'no', changed_source = 'yes', hb = rec.hb)
                yield Record(date = rec.date, source = u'Эфир',  mapped_from_block = 'all_paid',
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'yes', changed_source = 'yes', hb = rec.hb)

        else:
            yield Record(date = rec.date, source = rec.source,  mapped_from_block = mapped_from_block,
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'no', changed_source = 'no', hb = rec.hb)
            yield Record(date = rec.date, source = rec.source,  mapped_from_block = 'all_organic',
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'yes', changed_source = 'no', hb = rec.hb)
            if 'touch' in rec.source or 'appsearch' in rec.source:
                yield Record(date = rec.date, source = u'Эфир-тач',  mapped_from_block = mapped_from_block,
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'no', changed_source = 'yes', hb = rec.hb)
                yield Record(date = rec.date, source = u'Эфир-тач',  mapped_from_block = 'all_organic',
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'yes', changed_source = 'yes', hb = rec.hb)
            else:
                yield Record(date = rec.date, source = u'Эфир',  mapped_from_block = mapped_from_block,
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'no', changed_source = 'yes', hb = rec.hb)
                yield Record(date = rec.date, source = u'Эфир',  mapped_from_block = 'all_organic',
                         from_block = rec.from_block, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         changed_block = 'yes', changed_source = 'yes', hb = rec.hb)

def recs_combination(recs):

    for rec in recs:

        if rec.changed_block == 'no' and rec.changed_source == 'no':
            recs_list = list(product(
                (rec.from_block, '_total_'),
                (rec.mapped_from_block, '_total_'),
                (rec.source, '_total_')
                                    )
                            )

            for item in recs_list:
                yield Record(fielddate = rec.date, from_block = item[0], mapped_from_block = item[1],
                             source = item[2], reqid = rec.reqid, yandexuid = rec.yandexuid,
                             hb = rec.hb
                            )
        elif rec.changed_block == 'yes' and rec.changed_source == 'no':
            recs_list = list(product(
            (rec.from_block, '_total_'),
            (rec.source, '_total_')
                                )
                        )

            for item in recs_list:
                yield Record(fielddate = rec.date, from_block = item[0],
                             mapped_from_block = rec.mapped_from_block,
                             source = item[1], reqid = rec.reqid, yandexuid = rec.yandexuid,
                             hb = rec.hb
                            )
        elif rec.changed_block == 'no' and rec.changed_source == 'yes':
            recs_list = list(product(
            (rec.from_block, '_total_'),
            (rec.mapped_from_block, '_total_')
                                )
                        )

            for item in recs_list:
                yield Record(fielddate = rec.date, from_block = item[0], mapped_from_block = item[1],
                             source = rec.source, reqid = rec.reqid, yandexuid = rec.yandexuid,
                             hb = rec.hb
                            )
        else:
            yield Record(fielddate = rec.date, from_block = rec.from_block,
                         mapped_from_block = rec.mapped_from_block,
                         source = rec.source, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         hb = rec.hb
                        )
            yield Record(fielddate = rec.date, from_block = '_total_',
                         mapped_from_block = rec.mapped_from_block,
                         source = rec.source, reqid = rec.reqid, yandexuid = rec.yandexuid,
                         hb = rec.hb
                        )


def redir_log_parse(date):

    cluster = clusters.yt.Hahn(pool='vika-pavlova'
    ).env(templates=dict(job_root='home/videolog/vika-pavlova/2848-mapped_from_blocks'
                        ),
          yt_spec_defaults=dict(pool_trees=["physical"],
                                #tentative_pool_trees=["cloud"]),
                                use_default_tentative_pool_trees = True),
          parallel_operations_limit=10
         )

    job = cluster.job()

    raw = job.table('//home/videolog/vika-pavlova/one_parse_from_redir_log/raw_' + date
                   ).filter(sf.custom(lambda x:
                                      x in ['morda', 'morda_touch', 'videohub', 'videohub_touch', 'efir',
                                            'efir_touch', 'streamhandler_other', 'streamhandler_appsearch'],
                                      'source'
                                     )
                           )

    channels = job.table('home/videolog/strm_meta/iron_branch/concat'
                        ).project('JoinKey','computed_channel','computed_program')

    mapped_from_blocks = raw.join(channels, by_left='content_id', by_right='JoinKey', type='inner'
                                ).project('date','hb', 'yandexuid', 'source',
                                          from_block = ne.custom(lambda x: x if x else "None", 'from_block'),
                                          reqid = ne.custom(lambda x: x if x else "None", 'reqid')
                                         ).map(change_from_blocks, memory_limit = 4000
                                              ).put('$job_root/mapped_from_blocks')

    mapped_from_blocks.map(recs_combination, memory_limit = 4000
                          ).groupby('fielddate', 'mapped_from_block', 'from_block', 'source'
                                   ).aggregate(hb_count = na.sum('hb'),
                                               reqid_count = na.count_distinct('reqid'),
                                               yuid_count = na.count_distinct('yandexuid'),
                                               yuid_count_with_hb = na.count_distinct('yandexuid',
                                                                                      predicate=nf.custom(lambda x: x > 0, 'hb')
                                                                                     )
                                              ).project('fielddate', 'mapped_from_block',
                                                        'from_block', 'source', 'reqid_count', 'yuid_count',
                                                        tvt = ne.custom(lambda x: x*30, 'hb_count'),
                                                        yuid_count_with_hb = ne.custom(lambda x:
                                                                                       x if x else 0,
                                                                                       'yuid_count_with_hb'
                                                                                      )
                                                       ).put('$job_root/final_' + date)


    job.run()


def put_data_to_stat(date):

    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        token = os.environ['STAT_TOKEN']
    )

    ns.StatfaceReport().path('Video.All/tvt_per_user_efir'
                            ).scale('daily'
                                   ).client(client
                                           ).remote_publish(proxy='hahn',
                                                            table_path='//home/videolog/vika-pavlova/2848-mapped_from_blocks/final_' + date,
                                                            async_mode=False,
                                                            upload_config=False
                                                           )


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    for date in pd.date_range(start=args.start_date, end=args.end_date):
        date_str = str(date)[:10]
        redir_log_parse(date_str)
        put_data_to_stat(date_str)


if __name__ == '__main__':
    main()
