# -*- coding: utf-8 -*-

from qb2.api.v1 import extractors as qe, filters as qf
from qb2.api.v1.typing import *
from urllib2 import unquote
from nile.api.v1 import Record, with_hints, aggregators as na, extractors as ne, cli
from collections import OrderedDict


table_schema_0 = {
    "key": String,
    "yuid": UInt64,
    "type": String,
    "page": String,
    "portalpage": String,
    "block": String,
    "value": Float,
    "ui": String,
}


METRICS_COMBINATIONS = OrderedDict([
    ('total', {'total': ['total']}),
    ('rsya', OrderedDict([
        ('total', ['total']),
        ('touch-phone-search', ['total', 'search', 'pane']),
        ('desktop-search', ['total', 'search', 'right', 'footer']),
        ('touch-phone-preview', ['total', 'pane', 'organic', 'first-organic', 'touch-under-player']),
        ('desktop-preview', ['total', 'organic', 'right-column', 'under-player', 'footer', 'video']),
        ])),
    ('distribution', {'total': ['total']}),
    ('nonstop', OrderedDict([
        ('total', ['total']),
        ('nonstop_common', ['total']),
        ('nonstop_adult', ['total']),
        ])),
    ('video_promo', {'total': ['total']}),
    ('videohosting', OrderedDict([
        ('total', ['total']),
        ('instream', ['total']),
        ('interstitial', ['total']),
        ('inpage', ['total']),
        ])),
])


NAMES = {
    'types': {
        'total': 'всего',
        'rsya': 'РСЯ',
        'distribution': 'дистрибуция',
        'nonstop': 'нонстоп',
        'video_promo': 'видеопродвижение',
        'videohosting': 'видеохостинг',
    },
    'portalpages': {
        'total': 'всего',
        'touch-phone-search': 'тач выдача',
        'touch-phone-preview': 'тач просмотрщик',
        'desktop-preview': 'десктоп просмотрщик',
        'desktop-search': 'десктоп выдача',
        'inpage': 'inpage',
        'instream': 'instream',
        'interstitial': 'interstitial',
        'nonstop_adult': 'порно',
        'nonstop_common': 'не-порно',
    },
    'blocks': {
        'total': 'всего',
        'search': 'поиск',
        'under-player': 'под плеером',
        'organic': 'органика',
        'touch-organic': 'органика',
        'touch-under-player': 'под плеером',
        'first-organic': 'первый блок органики',
        'right': 'правая колонка',
        'right-column': 'правая колонка',
        'footer': 'футер',
        'pane': 'распашонка',
        'video': 'нонстоп',
        'player': 'плеер',
        'right-float': '???',
    },
    'players': {
        'serp': 'на выдаче',
        'related': 'в связанных',
        'total': 'всего',
        'jsapi': 'jsapi',
        'other': 'не jsapi',
        'youtube': 'youtube',
        'vh': 'видеохостинг',
        'ott': 'ott'
    },
    'plus' : {
        "total": "всего",
        "unknown": "unknown",
        "serp": "выдача",
        "viewer": "просмотрщик",
        "party": "совместный просмотр",
        "pane": "кнопка",
        "player": "плеер",
        "creative": "баннер",
        "plus": "Плюс",
        "amedia": "Амедиа",
        "moretv": "MoreTv",
        "tvod": "TVOD",
        "est": "EST",
        "desktop": "десктоп",
        "touch-phone": "тачи",
    }
}


PAGE_IDS = {
    48058: {'device': 'desktop', 'region': 'RU', 'type': 'rsya'},
    113683: {'device': 'desktop', 'region': 'TR', 'type': 'rsya'},
    146642: {'device': 'touch', 'region': 'RU', 'type': 'rsya'},
    249026: {'device': 'tablet', 'region': 'RU', 'type': 'rsya'},
    249031: {'device': 'desktop', 'region': 'UA', 'type': 'rsya'},
    249037: {'device': 'desktop', 'region': 'BY', 'type': 'rsya'},
    249064: {'device': 'desktop', 'region': 'KZ', 'type': 'rsya'},
    249066: {'device': 'tablet', 'region': 'UA', 'type': 'rsya'},
    249067: {'device': 'tablet', 'region': 'BY', 'type': 'rsya'},
    249069: {'device': 'tablet', 'region': 'KZ', 'type': 'rsya'},
    249073: {'device': 'touch', 'region': 'UA', 'type': 'rsya'},
    249076: {'device': 'touch', 'region': 'BY', 'type': 'rsya'},
    249077: {'device': 'touch', 'region': 'KZ', 'type': 'rsya'},
    261275: {'device': 'tablet', 'region': 'TR', 'type': 'rsya'},
    261276: {'device': 'touch', 'region': 'TR', 'type': 'rsya'},
    267171: {'device': 'smart_tv', 'type': 'rsya'},
    277852: {'device': 'desktop', 'porn': 'porn', 'type': 'rsya'},
    287821: {'device': 'desktop', 'type': 'distribution'},
    289657: {'device': 'touch', 'type': 'distribution'},
    347423: {'device': 'tablet', 'region': 'COM', 'type': 'rsya'},
    347804: {'device': 'desktop', 'region': 'COM', 'type': 'rsya'},
    347807: {'device': 'desktop', 'region': 'UZ', 'type': 'rsya'},
    355611: {'device': 'desktop', 'type': 'video_promo'},
    381176: {'device': 'desktop', 'region': 'RU', 'type': 'nonstop'},
    400477: {'device': 'desktop', 'type': 'distribution'},
    411436: {'device': 'touch', 'porn': 'porn', 'type': 'rsya'},
    415283: {'device': 'touch', 'region': 'RU', 'type': 'nonstop'},
    463375: {'device': 'desktop', 'type': 'nonstop'},
    463379: {'device': 'desktop', 'type': 'nonstop'},
    463381: {'device': 'desktop', 'type': 'nonstop'},
    474654: {'device': 'touch', 'porn': 'porn', 'type': 'rsya'},
    474674: {'device': 'desktop', 'porn': 'porn', 'type': 'rsya'},
    613558: {'device': 'desktop', 'porn': 'porn', 'type': 'nonstop'},
    613559: {'device': 'touch', 'porn': 'porn', 'type': 'nonstop'},
    539181: {'device': 'touch', 'type': 'nonstop', 'exp_only': True},
    539182: {'device': 'touch', 'type': 'nonstop', 'exp_only': True},
    539183: {'device': 'touch', 'type': 'nonstop', 'exp_only': True},
    539184: {'device': 'touch', 'type': 'nonstop', 'exp_only': True},
    539185: {'device': 'touch', 'type': 'nonstop', 'exp_only': True},
    539186: {'device': 'touch', 'type': 'nonstop', 'exp_only': True},
    539189: {'device': 'touch', 'type': 'nonstop', 'exp_only': True},
    539190: {'device': 'touch', 'type': 'nonstop', 'exp_only': True},
    539204: {'device': 'touch', 'type': 'nonstop', 'exp_only': True},
    539205: {'device': 'touch', 'type': 'nonstop', 'exp_only': True},
    539240: {'device': 'desktop', 'type': 'nonstop', 'exp_only': True},
    539241: {'device': 'desktop', 'type': 'nonstop', 'exp_only': True},
    539244: {'device': 'desktop', 'type': 'nonstop', 'exp_only': True},
    539246: {'device': 'desktop', 'type': 'nonstop', 'exp_only': True},
    539249: {'device': 'desktop', 'type': 'nonstop', 'exp_only': True},
    539250: {'device': 'desktop', 'type': 'nonstop', 'exp_only': True},
    539263: {'device': 'desktop', 'type': 'nonstop', 'exp_only': True},
    539272: {'device': 'desktop', 'type': 'nonstop', 'exp_only': True},
    539275: {'device': 'desktop', 'type': 'nonstop', 'exp_only': True},
    539280: {'device': 'desktop', 'type': 'nonstop', 'exp_only': True},
    580337: {'porn': 'porn', 'type': 'rsya', 'exp_only': True},
    547969: {'porn': 'porn', 'type': 'rsya', 'exp_only': True},
    580341: {'porn': 'porn', 'type': 'rsya', 'exp_only': True},
    580343: {'porn': 'porn', 'type': 'rsya', 'exp_only': True},
    637165: {'device': 'desktop', 'porn': 'porn', 'type': 'nonstop', 'exp_only': True},
    637166: {'device': 'touch', 'porn': 'porn', 'type': 'nonstop', 'exp_only': True},
    694630: {'device': 'desktop', 'type': 'rsya', 'exp_only': True},
    694633: {'device': 'touch', 'type': 'rsya', 'exp_only': True},
}


def generate_all_keys(keys, index=0):
    if len(keys) > index:
        for key in generate_all_keys(keys, index + 1):
            cur = keys[index]
            if not isinstance(cur, list):
                cur = [cur]
            for k in cur:
                yield [k] + key
            yield ["_total_"] + key
    else:
        yield []
        

def convert_exps(exp_ids, user_id):
    user_id = filter(type(user_id).isdigit, user_id)
    result = []
    if exp_ids:
        for exp_id in exp_ids:
            result.append(exp_id + ",0," + str(hash(user_id) % 100))
    if result:
        return ';'.join(result)
    return ""


def extract_reqid(queryargs):
    for arg in queryargs.split("&"):
        split = arg.split("=")
        if len(split) > 1 and split[0] == "303":
            return unquote(split[1])
    return ""


def make_portalpage(queryargs):
    portalpage = None
    for arg in queryargs.split("&"):
        split = arg.split("=")
        if len(split) > 1 and split[0] == "863":
            if split[1] in METRICS_COMBINATIONS["nonstop"].keys():
                portalpage = split[1]
                break
            for param in unquote(split[1]).split(";"):
                tmp = param.split(":")
                if len(tmp) > 1 and tmp[0] == "page":
                    portalpage = tmp[1].lower()
                    break
            break
    return portalpage


def make_portalblock(queryargs):
    block = None
    for arg in queryargs.split("&"):
        split = arg.split("=")
        if len(split) > 1 and split[0] == "863":
            for param in unquote(split[1]).split(";"):
                tmp = param.split(":")
                if len(tmp) > 1 and tmp[0] == "block":
                    block = tmp[1].lower()
                    break
            break
    return block


@with_hints(table_schema_0, ensure_optional_types=True)
def extract_daily_money_features(records):
    for record in records:
        act = []
        ui = record.get("ui")
        yuid = record.get("yuid")
        page = record.get("pageid")
        
        try:
            yuid = int(yuid)
        except:
            yuid = None

        if page in PAGE_IDS:
            type = PAGE_IDS[page].get("type")
        elif record.get("video_placement_name"):
            type = "videohosting"
        else:
            type = 'unknown_type'

        if type == "videohosting":
            portalpage = record.get("video_placement_name")
            block = record.get("video_type")
        else:
            portalpage = record.get("portalpage")
            block = record.get("portalblock")

        if record["log_src"] == "chevent":
            if (record["placeid"] == 542 and record["countertype"] == 2) or (record["placeid"] == 1542 and record["countertype"] == 1):
                price = 30.0 * record.get("eventcost", 0.0) / 1.18
                act.append("money")
        elif record["log_src"] == "dsp":
            if record["win"] == 1 and record["countertype"] == 1 and record["dspid"] != 1:
                price = 1.0 * record.get("price", 0.0)
                act.append("money")
            if record["countertype"] == 0:
                act.append("hits")
            if record["win"] == 1 and record["countertype"] == 0 and record["dspid"] != 5 and record["dspid"] != 10:
                act.append("wins")
            if record["win"] == 1 and record["countertype"] == 1:
                act.append("shows")
        if (portalpage in NAMES['portalpages'] and block in NAMES['blocks']) or type == 'videohosting':
            cur_keys = [
                type, 
                str(page) if type == 'rsya' else None, 
                portalpage, 
                block,
                ui,
            ]
            for a in act:
                if a == "money":
                    value = price / 1000000
                else:
                    value = 1
                for type, page, portalpage, block, ui in generate_all_keys(cur_keys):
                    yield Record(
                        key=a, 
                        yuid=yuid,
                        type=type,
                        page=page,
                        portalpage=portalpage, 
                        block=block, 
                        value=value,
                        ui=ui,
                    )
                    
                    
def add_date_to_job(job, date):
    dsp_log = job.table('@dsp_log/%s' % date)
    chevent_log = job.table('@chevent_log/%s' % date)
    video_log = job.table('@video_log/%s' % date)
    video_related_log = job.table('@video_related_log/%s' % date)
    vh_pages = job.table('@vh_pages')

    video_stream = job.concat(
        video_log,
        video_related_log,
    ).qb2(
        log='generic-yson-log',
        fields=[
            qe.log_fields('uid', 'ReqID', 'SearchPropsValues', 'RelevValues', 'testids', 'triggered_testids', 'ts', 'ui'),
            qe.custom('test_buckets', convert_exps, 'testids', 'uid').with_type(str),
        ],
        filters=[
            qf.defined('uid'),
            qf.defined('ReqID'),
        ],
        mode='records',
    )


    vh_pages = vh_pages.qb2(
        log='generic-yson-log',
        fields=[
            qe.log_fields('pageid', 'impid', 'category', 'video_placement_name', 'video_type'),
        ],
        filters=[
            qf.custom(lambda x: 'VH:' in x, 'category'),
            qf.defined('pageid'),
        ],
        mode='records',
    ).groupby(
        'pageid', 
        'impid', 
    ).aggregate(
        video_type=na.any('video_type'),
        video_placement_name=na.any('video_placement_name'),
    )


    dsp_stream = dsp_log.qb2(
        log='generic-yson-log',
        fields=[
            qe.log_fields('uniqid', 'pageid', 'impid', 'queryargs', 'dspid', 'win', 'price', 'countertype', 'dspfraudbits', 'dspeventflags', 'bidreqid'),
            qe.custom('reqid', extract_reqid, 'queryargs').with_type(str),
            qe.custom('portalpage', make_portalpage, 'queryargs').with_type(str),
            qe.custom('portalblock', make_portalblock, 'queryargs').with_type(str),
            qe.custom('yuid', lambda x: str(x), 'uniqid').with_type(str),
            qe.const('log_src', 'dsp').with_type(str),
        ],
        filters=[
            qf.equals('dspfraudbits', 0),
            qf.equals('dspeventflags', 0),
            qf.defined('bidreqid'),
            qf.not_(qf.equals('bidreqid', 0)),
            qf.custom(lambda x: x and x != "-" and x != 'undefined', 'reqid'),
        ],
        mode='records',
    )


    chevent_stream = chevent_log.qb2(
        log='generic-yson-log',
        fields=[
            qe.log_fields('uniqid', 'queryargs', 'eventcost', 'pageid', 'impid', 'placeid', 'countertype', 'fraudbits'),
            qe.custom('reqid', extract_reqid, 'queryargs').with_type(str),
            qe.custom('portalpage', make_portalpage, 'queryargs').with_type(str),
            qe.custom('portalblock', make_portalblock, 'queryargs').with_type(str),
            qe.custom('yuid', lambda x: str(x), 'uniqid').with_type(str),
            qe.const('log_src', 'chevent').with_type(str),
        ],
        filters=[
            qf.equals('fraudbits', 0),
            qf.or_(qf.equals('placeid', 542), qf.equals('placeid', 1542)),
            qf.custom(lambda x: x and x != "-" and x != 'undefined', 'reqid'),
        ],
        mode='records',
    )


    video_stream = job.concat(
        dsp_stream,
        chevent_stream,
    ).join(
        video_stream, 
        by_left="reqid", 
        by_right="ReqID", 
        type='left', 
        assume_unique_right=True
    ).join(
        vh_pages, 
        by_left=("pageid", "impid"), 
        by_right=("pageid", "impid"), 
        type='left', 
        assume_unique_right=True
    ).filter(
        qf.or_(
            qf.custom(lambda x: x in PAGE_IDS, 'pageid'),
            qf.and_(qf.defined('video_placement_name'), qf.defined('video_type'), qf.defined('ReqID')),
        ),
    ).project(
        "pageid",
        "video_placement_name",
        "portalpage",
        "portalblock",
        "log_src",
        "placeid",
        "countertype",
        "eventcost",
        "video_type",
        "win",
        "dspid",
        "price",
        "yuid",
        "ui",
    ).map(
        extract_daily_money_features,
    ).groupby(
        "key",
        "type",
        "page",
        "portalpage",
        "block",
        "ui",
    ).aggregate(
        value=na.sum("value"),
        yuids=na.count_distinct("yuid"),
    ).project(
        ne.all(),
        fielddate=ne.const(date),
    ).put(
        '@job_root/%s' % date,
    )
    
    
@cli.statinfra_job
def make_job(job, options, statface_client):
    job = job.env(
        templates={
            "job_root": "//home/videoquality/ndchikin/MMA-6374/daily",
            'dsp_log': '//statbox/cooked_logs/bs-dsp-cooked-log/v1/1d',
            'chevent_log': '//statbox/cooked_logs/bs-chevent-cooked-log/v2/1d',
            'video_log': '//home/videoquality/24julia/video_queries_cube',
            'video_related_log': '//home/videoquality/24julia/video_queries_cube.related',
            'vh_pages': '//home/bs/users/yabs-analytics/video/pageimp_ids_video_with_inapp',
        },
        yt_spec_defaults={
            "pool_trees": ["physical"],
            "use_default_tentative_pool_trees": True,
        },
    )

    for date in options.dates:
        add_date_to_job(job, date)
        
    return job


if __name__ == "__main__":
    cli.run()
