# -*-coding: utf8 -*-

import urllib
import urlparse
from qb2.api.v1.typing import *
from qb2.api.v1 import extractors as qe, filters as qf
from nile.api.v1 import (
    aggregators as na,
    extractors as ne,
    statface as ns,
    with_hints,
    Record,
    cli,
)


def generate_all_keys(keys, index=0):
    if len(keys) > index:
        for key in generate_all_keys(keys, index + 1):
            cur = keys[index]
            if not isinstance(cur, list):
                cur = [cur]
            for k in cur:
                yield [k] + key
            yield ["_total_"] + key
    else:
        yield []


def safe_div(x, y):
    if y != 0:
        return float(x) / y
    else:
        return float(0)


map_schema = {
    "fielddate": String,
    "film": Int64,
    "full_req": String,
    "is_related": String,
    "lvt": Float,
    "lw": Int64,
    "page": String,
    "porn": Int64,
    "req": String,
    "serial": Int64,
    "theme": String,
    "tvt": Int64,
    "ui": String,
    "uid": String,
    "vh_lvt": Float,
    "vh_lw": Int64,
    "vh_tvt": Int64,
    "yt_8_from_10": Int64,
    "yt_docs": UInt64,
    "vh_docs": UInt64,
    "all_docs": UInt64,
}


@with_hints(map_schema, ensure_optional_types=True)
def map_t(recs):
    for rec in recs:
        uid = rec.uid
        fielddate = rec.fielddate
        ui = rec.ui
        tvt = rec.tvt
        lvt = rec.lvt
        lw = rec.long_watches

        obj = str(rec.obj_type)
        is_related = str(rec.is_related)

        spv = rec.SearchPropsValues
        relev = rec.RelevValues
        results = rec.results
        full_req = urllib.unquote(rec.url)

        if rec.serpid == rec.ReqID:
            page = "first"
        else:
            page = "2+"

        THEME = ["-", "not_porn"]
        serial = 0
        film = 0
        porn = 0

        vserial = 0
        try:
            vserial = float(relev["vserial"])
        except:
            pass

        if vserial > 0 or "Film/Series" in obj:
            THEME = ["serial", "not_porn"]
            serial = 1

        if "Film/Film" in obj:
            THEME = ["film", "not_porn"]
            film = 1

        if spv.get("VIDEO.VideoPorno.vidprn") == "ipq1" or spv.get("UPPER.VideoRecommenderRvb.VideoIsRelatedPornoQuery") == "1":
            THEME = "porn"
            porn = 1

        yt10 = 0
        vh_tvt = 0
        vh_lvt = 0
        vh_lw = 0
        yt_docs = 0
        vh_docs = 0
        all_docs = 0
        for result in results:
            url = result.get("url", "-")
            try:
                host = urlparse.urlparse(url).netloc
            except:
                host = url.split("?")[0].replace("https://", "").replace("http://", "")

            pos = result.get("pos", -1)
            if pos < 10 and "youtube.com" in host:
                yt10 += 1

            if "youtube.com" in host:
                yt_docs += 1
            if "kinopoisk.ru" in host or "frontend.vh" in host:
                vh_docs += 1
                vh_tvt += result.get("tvt", 0)
                vh_lvt += result.get("lvt", 0)
                vh_lw += result.get("long_watches", 0)

            all_docs += 1

        if "запрос/сериал" in full_req:
            req = "запрос/сериал"
        elif "/video/search" in full_req or "/touch/search" in full_req or "video/pad/search" in full_req:
            req = "search"
        elif "/video/preview" in full_req or "/touch/preview" in full_req:
            req = "preview"
        else:
            spl = full_req.split("?")[0]
            if spl[-2:] == "//":
                spl = spl[:-2]
            if spl[-1] == "/":
                spl = spl[:-1]
            req = spl.split("/")[-1]

        if len(req) >= 1000:
            req = req[:999]

        if yt10 >= 8:
            yt_8_from_10 = 1
        else:
            yt_8_from_10 = 0

        current_keys = [ui, req, THEME, is_related, page]

        for ui, req, THEME, is_related, page in generate_all_keys(current_keys):
            yield Record(
                uid=uid,
                fielddate=fielddate,
                ui=ui,
                req=req,
                theme=THEME,
                is_related=is_related,
                page=page,
                serial=serial,
                film=film,
                porn=porn,
                tvt=tvt,
                lvt=lvt,
                lw=lw,
                yt_8_from_10=yt_8_from_10,
                vh_tvt=vh_tvt,
                vh_lvt=vh_lvt,
                vh_lw=vh_lw,
                full_req=full_req,
                yt_docs=yt_docs,
                vh_docs=vh_docs,
                all_docs=all_docs,
            )


@cli.statinfra_job
def make_job(job, options, statface_client):
    job = job.env(
        yt_spec_defaults=dict(pool_trees=["physical"], use_default_tentative_pool_trees=True),
        templates=dict(job_root="//home/videolog/ensuetina/VIDEO_CUBE"),
    )
    report = ns.StatfaceReport().path("Video.All/videocube").scale("daily").client(statface_client)

    date = options.dates[0]

    t1 = job.table(
        "home/videoquality/24julia/video_queries_cube/{}".format(date),
        ignore_missing=True,
    )
    t2 = job.table(
        "home/videoquality/24julia/video_queries_cube.related/{}".format(date),
        ignore_missing=True,
    )

    data = job.concat(t1, t2)
    ps_data = (
        data.project(
            "uid",
            qe.integer_dictitem("UPPER.PrismBigBLog.prism_segment", "SearchPropsValues").rename("ps"),
        )
        .groupby("uid")
        .aggregate(ps=na.any("ps", predicate=qf.defined("ps")))
    )

    data.map(map_t).join(ps_data, "uid", type="left").project(
        ne.all("ps"), qe.coalesce("ps", "ps", -1).with_type(Int64)
    ).groupby("fielddate", "ui", "req", "theme", "is_related", "page").aggregate(
        uids=na.count_distinct("uid"),
        reqs=na.count(),
        tvt=na.sum("tvt"),
        lvt=na.sum("lvt"),
        lw=na.sum("lw"),
        yt_8_from_10=na.sum("yt_8_from_10"),
        vh_tvt=na.sum("vh_tvt"),
        vh_lvt=na.sum("vh_lvt"),
        vh_lw=na.sum("vh_lw"),
        prism_tvt=na.sum("tvt", predicate=qf.compare("ps", ">=", 4)),
        prism_lvt=na.sum("lvt", predicate=qf.compare("ps", ">=", 4)),
        not_porn_tvt=na.sum("tvt", predicate=qf.compare("porn", "==", 0)),
        not_porn_lvt=na.sum("lvt", predicate=qf.compare("porn", "==", 0)),
        prism_uids=na.count_distinct("uid", predicate=qf.compare("ps", ">=", 4)),
        yt_docs=na.sum("yt_docs"),
        vh_docs=na.sum("vh_docs"),
        all_docs=na.sum("all_docs"),
    ).project(
        ne.all(
            ["prism_tvt", "prism_lvt", "not_porn_tvt", "not_porn_lvt", "prism_uids", "yt_docs", "vh_docs", "all_docs"]
        ),
        qe.custom("vh_share", safe_div, "vh_docs", "all_docs").with_type(Float),
        qe.custom("yt_share", safe_div, "yt_docs", "all_docs").with_type(Float),
        qe.coalesce("prism_tvt", "prism_tvt", 0).with_type(Int64),
        qe.coalesce("prism_lvt", "prism_lvt", 0).with_type(Float),
        qe.coalesce("not_porn_tvt", "not_porn_tvt", 0).with_type(Int64),
        qe.coalesce("not_porn_lvt", "not_porn_lvt", 0).with_type(Float),
        qe.coalesce("prism_uids", "prism_uids", 0).with_type(UInt64),
    ).put(
        "@job_root/{}".format(date)
    ).publish(
        report
    )

    return job


if __name__ == "__main__":
    cli.run()
