# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    cli,
    with_hints,
    clusters,
    Record,
)

from qb2.api.v1.typing import *

import nile
import re

TRANSLATION = None

output_schema_0 = {
    "ui": String,
    "uid": String,
    "pos": UInt64,
    "pos1": UInt64,
    "pos3": UInt64,
    "query": String,
    "domain": String,
    "filmid": String,
    "clicks": UInt64,
    "wiztype": String,
    "platform": String,
    "uid_click": String,
}


rules = {
    "get_thumb": "#video_wizard//big-video",
    "get_popup": "#get_thumb//fragment-popup",
    "video_wizard": '#wizard_result [@wizard_name = "videowiz" or @wizard_name = "video" or @wizard_name = "videoshort" or @wizard_name = "video-unisearch"]',
}


def build_translation():
    global TRANSLATION
    if TRANSLATION is None:
        import sys
        import unicodedata

        TRANSLATION = {
            index: u" " for index in xrange(sys.maxunicode) if unicodedata.category(unichr(index)).startswith("P")
        }
        for char in u"\t\n\x0b\x0c\r$+<=>^`|~":
            TRANSLATION[ord(char)] = u" "
    return TRANSLATION


def normalize_query(query):
    try:
        query = query.decode("utf8")
    except UnicodeDecodeError:
        return
    query = query.translate(build_translation())
    query = query.lower()
    query = query.strip()
    query = re.sub(r"\s\s+", " ", query)
    return query.encode("utf8")


def get_baobab_path(block):
    curr, path = block, ""
    while curr is not None:
        path = curr.name + "/" + path
        curr = curr.parent
    return path


def get_wiz_info(r):
    import baobab
    import tamus
    def get_block_clicks_count(block, joiners):
        return len(
            [x for joiner in joiners for x in joiner.get_events_by_block(block) if isinstance(x, baobab.common.Click)]
        )
    joiners = r.BaobabAllTrees()
    if not joiners:
        return
    marks = tamus.check_rules_multiple_joiners_merged(rules, joiners)
    for block in marks.get_blocks("video_wizard"):
        wiz = None
        wiz_pos = None
        thumb_id = None
        wiz_type = None
        wiz_clicks = None
        fragment_clicks = None
        has_fragment_attr = False
        wiz = block.attrs.get("wizard_name")
        if wiz == 'video-unisearch':
            wiz = 'videowiz'
        wiz_pos = baobab.web.create_result_interface_adapter(block).position
        wiz_type = block.attrs.get("subtype")
        wiz_clicks = get_block_clicks_count(block, joiners)
        for inner_block in baobab.common.bfs_iterator(block):
            wiz_clicks += get_block_clicks_count(inner_block, joiners)
            if marks.has_marker_in_block(inner_block, "get_thumb"):
                if inner_block.attrs.get("externalId") and inner_block.attrs["externalId"].get("entity") == "video":
                    thumb_id = inner_block.attrs["externalId"].get("id")
                if inner_block.attrs.get("fragment"):
                    fragment_clicks = 0
                    has_fragment_attr = True
                    for inner_inner_block in baobab.common.bfs_iterator(inner_block):
                        if marks.has_marker_in_block(inner_inner_block, "get_popup"):
                            fragment_clicks += get_block_clicks_count(inner_inner_block, joiners)
        yield wiz, wiz_pos, thumb_id, wiz_type, wiz_clicks, fragment_clicks, has_fragment_attr


@with_hints(output_schema=output_schema_0, ensure_optional_types=True)
def parse_us(sessions):
    import baobab
    import tamus
    import uatraits

    def get_block_clicks_count(block, joiners):
        return len(
            [x for joiner in joiners for x in joiner.get_events_by_block(block) if isinstance(x, baobab.common.Click)]
        )

    for key, rcont in sessions:
        session = rcont.GetRequests()
        uid = key.replace("y", "").replace("uu/", "")

        for r in session:
            if r.IsA("TYandexWebRequest"):
                ui = "desktop"
            elif r.IsA("TMobileYandexWebRequest") or r.IsA("TTouchYandexWebRequest"):
                ui = "touch"
            elif r.IsA("TPadYandexWebRequest"):
                ui = "pad"
            elif r.IsA("TMobileAppYandexWebRequest"):
                ui = "app"
            else:
                continue

            try:
                detector = uatraits.detector("/usr/share/uatraits/browser.xml")
                detect_res = detector.detect(r.UserAgent)
                if "isTablet" in detect_res and detect_res["isTablet"]:
                    platform = "pad"
                elif "isMobile" in detect_res and detect_res["isMobile"]:
                    platform = "touch"
                else:
                    platform = "desktop"
            except:
                platform = "unknown"

            domain = r.ServiceDomRegion
            if not domain:
                domain = "unknown"
            domain = domain.replace(":443", "")

            query = normalize_query(str(r.Query))

            for wiz, wiz_pos, thumb_id, wiz_type, wiz_clicks, fragment_clicks, has_fragment_attr in get_wiz_info(r):
                if not wiz:
                    continue
                for w in [str(wiz) + "|" + str(wiz_type), "_total_"]:
                    for u in [ui, "_total_"]:
                        for p in [platform, "_total_"]:
                            for d in [domain, "_total_"]:
                                yield Record(
                                    wiztype=w,
                                    ui=u,
                                    platform=p,
                                    domain=d,
                                    uid=uid,
                                    uid_click=uid if wiz_clicks else None,
                                    clicks=wiz_clicks,
                                    filmid=thumb_id,
                                    pos=wiz_pos,
                                    pos1=1 if wiz_pos == 0 else 0,
                                    pos3=1 if wiz_pos < 3 else 0,
                                    query=query,
                                )
                if wiz_type == "xl" and has_fragment_attr:
                    for u in [ui, "_total_"]:
                        for p in [platform, "_total_"]:
                            for d in [domain, "_total_"]:
                                yield Record(
                                    wiztype="xl|fragment",
                                    ui=u,
                                    platform=p,
                                    domain=d,
                                    uid=uid,
                                    uid_click=uid if fragment_clicks else None,
                                    clicks=fragment_clicks,
                                    filmid=thumb_id,
                                    pos=wiz_pos,
                                    pos1=1 if wiz_pos == 0 else 0,
                                    pos3=1 if wiz_pos < 3 else 0,
                                    query=query,
                                )


@cli.statinfra_job
def make_job(job, options, statface_client):
    job = job.env(
        templates={
            "job_root": "//home/videolog/itajn/videowiz",
        },
        yt_spec_defaults={
            "pool_trees": ["physical"],
            "use_default_tentative_pool_trees": True,
        },
    )

    report = ns.StatfaceReport().path(
        "Video.All/videowiz/serp_stats",
    ).scale(
        "daily",
    ).client(
        statface_client,
    )

    date = options.dates[0]

    processed = job.table(
        "user_sessions/pub/search/daily/%s/columns/clean" % date,
    ).libra(
        parse_us,
        memory_limit=4000,
        libra_file=nile.files.RemoteFile('//statbox/resources/libra_nile_udf2.7.so'),
    )

    processed.groupby("wiztype", "ui", "platform", "domain").aggregate(
        shows=na.count(),
        clicks=na.sum("clicks"),
        users_seen=na.count_distinct("uid"),
        users_clicked=na.count_distinct("uid_click"),
        mean_pos=na.mean("pos"),
        median_pos=na.median("pos"),
        top1=na.sum("pos1"),
        top3=na.sum("pos3"),
    ).project(
        ne.all(),
        fielddate=ne.const(date).with_type(String),
    ).publish(
        report,
    )

    processed.filter(
        nf.and_(
            nf.not_(nf.equals("wiztype", "_total_")),
            nf.not_(nf.equals("ui", "_total_")),
            nf.not_(nf.equals("platform", "_total_")),
            nf.not_(nf.equals("domain", "_total_")),
        )
    ).groupby("wiztype", "ui", "domain", "query").aggregate(
        shows=na.count(),
        clicks=na.sum("clicks"),
        mean_pos=na.mean("pos"),
        median_pos=na.median("pos"),
        top1=na.sum("pos1"),
        top3=na.sum("pos3"),
    ).put(
        "$job_root/%s" % date,
    )

    processed.filter(
        nf.and_(
            nf.not_(nf.equals("wiztype", "_total_")),
            nf.not_(nf.equals("ui", "_total_")),
            nf.not_(nf.equals("platform", "_total_")),
            nf.not_(nf.equals("domain", "_total_")),
            nf.custom(lambda x: x is not None and x != "", "filmid"),
        )
    ).put(
        "$job_root/xl_weekly_stats",
        append=True,
    )

    return job


if __name__ == "__main__":
    cli.run()
