# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os

def map_block(block):
    if not block:
        return '-'
    elif '%' in block:
        return '-'
    elif 'adw' in block:
        return 'adw'
    elif 'digital' in block:
        return 'digital'
    elif 'efir' in block:
        return 'efir'
    elif 'fb_' in block:
        return 'fb'
    elif 'gdn' in block:
        return 'gdn'
    elif 'media-episode' in block:
        return 'media-episode'
    elif 'media-footer' in block:
        return 'media-footer'
    elif 'morda_teaser' in block:
        return 'morda_teaser'
    elif 'mt_' in block:
        return 'mt'
    elif 'rsya' in block:
        return 'rsya'
    elif 'serp_podteaser' in block:
        return 'serp_podteaser'
    elif 'tv_online' in block:
        return 'tv_online'
    elif 'vk_' in block:
        return 'vk'
    elif 'youtube' in block:
        return 'youtube'
    elif 'celeb' in block:
        return 'celeb'
    elif 'searchapp_mordacard' in block:
        return 'searchapp_mordacard'
    elif block.startswith('logo_partner_player'):
        return 'logo_partner_player'
    elif block.startswith('partner_context_menu'):
        return 'partner_context_menu'
    elif block.startswith('player_context_menu_yavideo'):
        return 'player_context_menu_yavideo'
    elif block.startswith('player_share_button_zen:article'):
        return 'player_share_button_zen:article'
    else:
        if len(block.split('_')) > 1:
            return "_".join((block.split('_'))[:2])
        else:
            return block


def change_from_blocks(from_block):
    all_paid = ['tv_online', 'rsya', 'gdn', 'morda_teaser', 'adw', 'digital', 'mt', 'efir', 'fb', 'serp_podteaser', 'vk', 'celeb', 'youtube']
    if from_block in all_paid:
        return 'all_paid'
    else:
        return 'all_organic'


def change_source(source):
    if 'touch' in source or 'appsearch' in source:
        return u'Эфир-тач'
    else:
        return u'Эфир'


def recs_combination(recs):
    for rec in recs:

        recs_list = list(product(
            (rec.from_block, '_total_'),
            {map_block(rec.from_block), '_total_', change_from_blocks(map_block(rec.from_block))},
            {rec.source, '_total_', change_source(rec.source)},
            (rec.computed_channel, '_total_')
                                )
                        )

        for item in recs_list:
            yield Record(fielddate = rec.date, from_block = item[0], mapped_from_block = item[1],
                         source = item[2], computed_channel = item[3], reqid = rec.reqid,
                         yandexuid = rec.yandexuid, hb = rec.hb
                        )


def redir_log_parse(date):

    cluster = clusters.yt.Hahn(pool='vika-pavlova'
    ).env(templates=dict(job_root='home/videolog/vika-pavlova/3671-mapped_from_blocks'
                        ),
          yt_spec_defaults=dict(pool_trees=["physical"],
                                #tentative_pool_trees=["cloud"]),
                                use_default_tentative_pool_trees = True),
          parallel_operations_limit=10
         )

    yt = cluster.driver.client
    def exists_and_not_empty(path, yt):
        return yt.exists(path) and not yt.is_empty(path)

    while 1:
        if exists_and_not_empty('//home/videolog/vika-pavlova/one_parse_from_redir_log/raw_' + date, yt):
            print 'yes'
            break

    job = cluster.job()

    raw = job.table('//home/videolog/vika-pavlova/one_parse_from_redir_log/raw_' + date
                   ).filter(sf.custom(lambda x:
                                      x in ['morda', 'morda_touch', 'videohub', 'videohub_touch', 'efir',
                                            'efir_touch', 'streamhandler_other', 'streamhandler_appsearch'],
                                      'source'
                                     )
                           )

    channels = job.table('home/videolog/strm_meta/iron_branch/concat'
                        ).project('JoinKey','computed_channel','computed_program')

    mapped_from_blocks = raw.join(channels, by_left='content_id', by_right='JoinKey', type='inner'
                                ).project('date','hb', 'yandexuid', 'source',
                                          from_block = ne.custom(lambda x: urllib.unquote(x) if x else "None", 'from_block'),
                                          reqid = ne.custom(lambda x: x if x else "None", 'reqid'),
                                          computed_channel = ne.custom(lambda x: x if x else "None", 'computed_channel'),
                                         )

    mapped_from_blocks.map(recs_combination, memory_limit = 4000
                          ).groupby('fielddate', 'mapped_from_block', 'from_block', 'source', 'computed_channel'
                                   ).aggregate(hb_count = na.sum('hb'),
                                               reqid_count = na.count_distinct('reqid'),
                                               views_with_hb = na.count_distinct('reqid',
                                                                        predicate=nf.custom(lambda x: x > 0, 'hb')
                                                                                ),
                                               yuid_count = na.count_distinct('yandexuid'),
                                               yuid_count_with_hb = na.count_distinct('yandexuid',
                                                                                      predicate=nf.custom(lambda x: x > 0, 'hb')
                                                                                     )
                                              ).project('fielddate', 'mapped_from_block', 'computed_channel',
                                                        'from_block', 'source', 'reqid_count', 'yuid_count',
                                                        tvt = ne.custom(lambda x: x*30, 'hb_count'),
                                                        yuid_count_with_hb = ne.custom(lambda x:
                                                                                       x if x else 0,
                                                                                       'yuid_count_with_hb'
                                                                                      ),
                                                        views_with_hb = ne.custom(lambda x: x if x else 0,
                                                                                  'views_with_hb')
                                                       ).put('$job_root/final_' + date)


    job.run()


def put_data_to_stat(date):

    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        token = os.environ['STAT_TOKEN']
    )

    ns.StatfaceReport().path('Video.All/tvt_efir_new'
                            ).scale('daily'
                                   ).client(client
                                           ).remote_publish(proxy='hahn',
                                                            table_path='//home/videolog/vika-pavlova/3671-mapped_from_blocks/final_' + date,
                                                            async_mode=False,
                                                            upload_config=False
                                                           )


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    for date in pd.date_range(start=args.start_date, end=args.end_date):
        date_str = str(date)[:10]
        redir_log_parse(date_str)
        put_data_to_stat(date_str)


if __name__ == '__main__':
    main()
