import re
import os
import nile
from nile.api.v1 import cli, with_hints
from nile.api.v1.files import LocalFile
from video_wizard_desktop_cube import (
    get_block_clicks_count,
    get_block_subtree_clicks_count,
    parse_keypoint_event,
    add_video_results_info_desktop,
)
from schema import OUTPUT_PATH, SCHEMA, SPV_FIELDS, RV_FIELDS


rules = {
    "get_thumb": "#main_result//big-video",
    "get_popup": "#get_thumb//fragment-popup",
    "video_wizard": '#wizard_result [@wizard_name = "videowiz" or @wizard_name = "video"'
                    ' or @wizard_name = "videoshort" or @wizard_name = "video-unisearch"]',
}

spaces_regexp = re.compile(r"\s\s+")


def normalize_query(query):
    norm_query = spaces_regexp.sub(" ", query.lower().strip())
    return norm_query


def parse_attributes_from_request(key, r):
    import libra
    res = {
        "uid": key,
        "ts": r.Timestamp,
        "ReqID": r.ReqID,
        "query": r.Query,
        "norm_query": normalize_query(str(r.Query)),
        "ui": libra.GetUIForRequest(r),
        "domain": r.ServiceDomRegion,
        "url": r.FullRequest,
        "referer": r.Referer,
        "UserAgent": r.UserAgent,
        "UserRegion": r.UserRegion,
        "testids": [test.TestID for test in r.GetTestInfo()],
        "triggered_testids": list(filter(
            lambda x: x,
            r.SearchPropsValues.get("REPORT.test-ids", "").split(" ")
        )),
        "PassportUID": r.PassportUID,
        "ICookie": r.ICookie,
        "SearchPropsValues":  {
            str(x): str(r.SearchPropsValues[x])
            for x in SPV_FIELDS
            if x in r.SearchPropsValues
        },
        "RelevValues":  {
            str(x): str(r.RelevValues[x]) for x in RV_FIELDS if x in r.RelevValues
        }
    }
    return res


def add_and_parse_block_info(res, block):
    import baobab
    res["wiz_name"] = block.attrs.get("wizard_name")
    res["wiz_type"] = block.attrs.get("subtype")
    res["wiz_pos"] = baobab.web.create_result_interface_adapter(block).position


def parse_translations(translations_block, joiners):
    import baobab

    title_clicks_count = 0
    translations_videos = []
    for child in baobab.common.children_iterator(translations_block):
        if child.name != "showcase":
            if child.name == "title":
                title_clicks_count += get_block_clicks_count(child, joiners)
            continue
        for subchild in baobab.common.children_iterator(child):
            if subchild.name == "item" and check_video_has_externalid(subchild):
                video_row = parse_video(subchild, joiners)
                translations_videos.append(video_row)

    return title_clicks_count, translations_videos


def parse_video(video, joiners):
    import baobab

    video_clicks_count = get_block_clicks_count(video, joiners)
    video_id = 0
    for video_attr in baobab.common.children_iterator(video):
        if video_attr.attrs.get("externalId") and video_attr.attrs["externalId"].get("entity") == "video":
            video_id = video_attr.attrs["externalId"]["id"]
        clicks_count = get_block_clicks_count(video_attr, joiners)
        video_clicks_count += clicks_count

    video_row = {
        "id": int(video_id),
        "duplicates_ids": [],
        "pos": int(video.attrs["item"]),
        "clicks": video_clicks_count
    }
    return video_row


def parse_video_with_keypoints(video, joiners, marks):
    import baobab

    video_clicks_count = get_block_clicks_count(video, joiners)
    kp_blocks = marks.get_blocks("wizard_video_showcase_item_keypoints_items")
    kp_clicks = []

    video_id = 0
    for video_attr in baobab.common.children_iterator(video):
        if video_attr.attrs.get("externalId") and video_attr.attrs["externalId"].get("entity") == "video":
            video_id = video_attr.attrs["externalId"]["id"]
        if video_attr.name != "keypoints":
            clicks_count = get_block_clicks_count(video_attr, joiners)
            video_clicks_count += clicks_count

    keypoints = {}
    for kp_block in kp_blocks:
        kp_joiner = marks.get_joiner_by_block_id(kp_block.id)
        kp_events = kp_joiner.get_events_by_block(kp_block)
        for kp_event in kp_events:
            parse_keypoint_event(kp_event, keypoints, kp_clicks)

    keypoints["clicks_pos"] = ";".join(kp_clicks)
    keypoints["clicks_count"] = len(kp_clicks)

    video_row = {
        "id": int(video_id),
        "duplicates_ids": [],
        "pos": int(video.attrs["item"]),
        "clicks": video_clicks_count,
        "keypoints": keypoints
    }
    return video_row


def check_video_has_keypoints(video):
    import baobab
    return "keypoints" in [attr.name for attr in baobab.common.children_iterator(video)]


def check_video_has_fragment(video):
    import baobab
    return "fragment" in [attr.name for attr in baobab.common.children_iterator(video)]


def check_video_has_externalid(video):
    import baobab
    for child in baobab.common.children_iterator(video):
        if child.attrs.get("externalId") and child.attrs["externalId"].get("entity") == "video":
            return True
    return False


def parse_showcase_videos(showcase, joiners, marks):
    import baobab

    video_clicks_count = 0
    keypoints_clicks_count = 0
    has_keypoints = False
    has_fragments = False
    wiz_results = []

    for video in baobab.common.children_iterator(showcase):
        if check_video_has_keypoints(video) and check_video_has_externalid(video):
            has_keypoints = True
            video_row = parse_video_with_keypoints(video, joiners, marks)
            keypoints_clicks_count += video_row["keypoints"].get("clicks_count", 0)
        elif check_video_has_fragment(video) and check_video_has_externalid(video):
            has_fragments = True
            video_row = parse_video(video, joiners)
        elif check_video_has_externalid(video):
            video_row = parse_video(video, joiners)
        else:
            continue
        video_clicks_count += video_row["clicks"]
        wiz_results.append(video_row)

    return video_clicks_count, keypoints_clicks_count, has_keypoints, has_fragments, wiz_results


def add_video_results_info(res, block, joiners, marks):
    import baobab

    title_clicks_count = 0
    video_clicks_count = 0
    keypoints_clicks_count = 0
    other_clicks_count = 0
    has_keypoints = False
    has_fragments = False
    has_translations = False
    wiz_results = []
    translations_title_clicks_count = 0
    translations_videos = []

    for child in baobab.common.children_iterator(block):
        if child.name == "title":
            title_clicks_count += get_block_clicks_count(child, joiners)
        elif child.name == "showcase":
            video_clicks_count_, keypoints_clicks_count_, has_keypoints_, \
                has_fragments_, wiz_results_ = parse_showcase_videos(child, joiners, marks)
            video_clicks_count += video_clicks_count_
            keypoints_clicks_count += keypoints_clicks_count_
            has_keypoints += has_keypoints_
            has_fragments += has_fragments_
            wiz_results += wiz_results_
        elif child.name == "translations":
            has_translations = True
            translations_title_clicks_count, translations_videos = parse_translations(child, joiners)
        else:
            other_clicks_count += get_block_subtree_clicks_count(child, joiners)

    res["title_clicks_count"] = title_clicks_count
    res["video_clicks_count"] = video_clicks_count
    res["keypoints_clicks_count"] = keypoints_clicks_count
    res["other_clicks_count"] = other_clicks_count
    res["wiz_results"] = wiz_results
    res["has_keypoints"] = bool(has_keypoints)
    res["has_fragment"] = bool(has_fragments)
    res["has_translations"] = has_translations
    res["translations_title_clicks_count"] = translations_title_clicks_count
    res["translations_videos"] = translations_videos


def is_desktop_request(res):
    return res["ui"] == "desktop"


def parse_request(key, r):
    import tamus

    if not r.IsA("TBaobabProperties") or not r.IsA("TWebRequestProperties"):
        return
    joiners = r.BaobabAllTrees()
    if not joiners:
        return
    marks = tamus.check_rules_multiple_joiners_merged(rules, joiners)
    res = parse_attributes_from_request(key, r)

    for block in marks.get_blocks("video_wizard"):
        res_ = res.copy()
        add_and_parse_block_info(res_, block)
        if is_desktop_request(res):
            add_video_results_info_desktop(res_, block, joiners, marks)
        else:
            add_video_results_info(res_, block, joiners, marks)
        if res_["wiz_type"] is None:
            res_["video_clicks_count"] = res_["other_clicks_count"]
            res_["other_clicks_count"] = 0
        yield res_


@with_hints(SCHEMA, ensure_optional_types=True)
def reduce_libra(session):
    for key, rcont in session:
        requests = rcont.GetRequests()
        for r in requests:
            for wizard in parse_request(key, r):
                yield wizard


@cli.statinfra_job(
    options=[
        cli.Option("use_sample", is_flag=True),
        cli.Option("out_path", default=OUTPUT_PATH),
    ]
)
def make_job(job, options):
    folder, script = os.path.split(os.path.realpath(__file__))
    job = job.env(
        yt_spec_defaults={
            "pool_trees": ["physical"],
            "use_default_tentative_pool_trees": True,
        },
        templates={
            "title": "Video Wizard cube",
            "job_root": options.out_path,
        },
        files=[
            LocalFile(os.path.join(folder, "schema.py")),
            LocalFile(os.path.join(folder, "video_wizard_desktop_cube.py"))
        ]
    )

    for date in options.dates:
        if not options.use_sample:
            usersessions = "//home/user_sessions/pub/search/daily/{}/columns/clean".format(date)
        else:
            usersessions = "//home/user_sessions/pub/sample_by_uid_1p/search/daily/{}/columns/clean".format(date)

        job.table(
            usersessions,
        ).libra(
            reduce_libra,
            libra_file=nile.files.RemoteFile("//statbox/resources/libra_nile_udf2.7.so"),
            memory_limit=5*1024
        ).sort(
            "uid",
            "ts",
        ).put(
            "$job_root/{}".format(date),
        )

    return job


if __name__ == "__main__":
    cli.run()
