# -*- coding: utf-8 -*-


import nile
import math
import datetime
import uatraits

from qb2.api.v1.typing import *

from nile.api.v1 import (
    cli,
    Record,
    with_hints,
    multischema,
    extractors as ne,
)


OUTPUT_PATH = "//home/videoquality/video_queries_cube/queries"

SPV_FIELDS = [
    "UPPER.VideoKids.kids",
    "UPPER.VideoSeriesNavigator.viewed_episodes",
    "VIDEO.VideoPorno.vidprn",
    "REPORT.ya_plus",
    "REPORT.user_subscription",
    "UPPER.PrismBigBLog.prism_cluster",
    "UPPER.PrismBigBLog.prism_segment",
    "UPPER.VideoRecommenderRvb.VideoIsRelatedPornoQuery",
]

RV_FIELDS = [
    "vserial",
    "vseason",
    "vepisode",
    "vsertitle",
    "vsexact",
    "is_memorandum_query",
    "mu",
    "mut",
    "is_music_ext",
    "ismusic",
]

RESULT_SHEMA = Struct[                  # currently unused for legacy reasons
    'tvt': Integer,                     # result total view time
    'lvt': Float,                       # result log view time
    'long_watches': Integer,            # 1 if long watched else 0
    'pos': Integer,                     # position on serp
    'clicks': Integer,                  # number of clicks
    'url': String,                      # result url
    'title': Optional[String],          # title (only if randomlog is present)
    'duration': Float,                  # video duration
    'player': String,                   # player ID
    'ratio': Optional[Float],           # video ratio
    'width': Optional[UInt32],          # video width
    'height': Optional[UInt32],         # video height
    'group_id': Optional[String],       # video group ID
    'group_pos': Optional[String],      # video pos in group
    'group_type': Optional[String],     # video group type (name)
    'fragment_begin': Optional[UInt32], # fragment begin
    'has_translation': Bool,            # video has translation in cache
    'keypoint_info': Optional[Struct['borders': List[UInt32], 'click_positions': List[UInt32]]], # keypoint clicks and show data
]

SCHEMA = {
    'uid': String,                       # yandexuid
    'PassportUID': Optional[String],     # puid, if logged in
    'ts': Integer,                       # timestamp in seconds
    'ReqID': String,                     # request id
    'serpid': String,                    # serp id
    'parent_reqid': Optional[String],    # parent request id for related
    'query': String,                     # user query
    'ui': String,                        # desktop|mobile|touch|app|pad|other
    'domain': String,                    # service domain
    'lvt': Float,                        # LogViewTime
    'tvt': Integer,                      # TotalViewTime
    'long_watches': Integer,             # long watch count
    'has_ontoid': Bool,                  # if matched with object answer
    'ontoid': Optional[String],          # object answer id
    'obj_type': String,                  # ontoid type
    'results': List[Json],               # serp by result
    'url': String,                       # full request url
    'referer': String,                   # referer url
    'SearchPropsValues': Json,           # select search props (see SPV_FIELDS)
    'RelevValues': Dict[String, String], # select relevs (see RV_FIELDS)
    'is_portal': Bool,                   # if request is portal
    'testids': List[String],             # list of active test-ids
    'triggered_testids': List[String],   # list of triggered test-ids (for CONDITION filters)
    'UserAgent': Optional[String],       # unparsed user-agent
    'UserRegion': Integer,               # region in number format
    'is_related': Bool,                  # if query is related
    'ICookie': Optional[String],         # icookie
}


def get_fragment_infos(request):
    fragment_infos = {}
    for click in request.GetClicks():
        if click.Path.startswith('open-player.'):
            fragment_begin = None
            for key, value in click.GetVars():
                if key == '-t':
                    try:
                        fragment_begin = int(value)
                    except ValueError:
                        continue
            if fragment_begin is not None and click.Url:
                fragment_infos[click.Url] = fragment_begin
    return fragment_infos


def get_keypoint_infos(request):
    keypoint_infos = {}
    for click in request.GetMiscClicks():
        if click.Path == '2921.4086.3780':
            url = click.Url
            if url:
                keypoint_infos.setdefault(url, {})
                if 'borders' not in keypoint_infos[url]:
                    borders = None
                    for key, value in click.GetVars():
                        if key == 'times':
                            try:
                                borders = list(map(int, value.split(';')))
                            except ValueError:
                                continue
                    keypoint_infos[url]['borders'] = borders
        elif click.Path == '2921.4086.882':
            url = click.Url
            if url:
                keypoint_infos.setdefault(url, {})
                for key, value in click.GetVars():
                    if key == '-pos':
                        pos = None
                        try:
                            pos = int(value)
                        except ValueError:
                            continue
                        if pos is not None:
                            keypoint_infos[url].setdefault('click_positions', [])
                            keypoint_infos[url]['click_positions'].append(pos)
    return keypoint_infos


m_schema = [dict(SCHEMA)] * 2


@with_hints(output_schema=multischema(*m_schema), outputs_count=2)
def video_clicks(session, main_query, related):
    import libra

    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for key, rcont in session:
        uid = key
        requests = rcont.GetRequests()
        for r in requests:
            if not r.IsA('TVideoRequestProperties'):
                continue
            if r.IsA('TDesktopUIProperties'):
                ui = 'desktop'
            elif r.IsA('TMobileUIProperties'):
                ui = 'mobile'
            elif r.IsA('TTouchUIProperties'):
                ui = 'touch'
            elif r.IsA('TMobileAppUIProperties'):
                ui = 'app'
            elif r.IsA('TPadUIProperties'):
                ui = 'pad'
            else:
                ui = 'other'

            if r.UserAgent:
                ua_res = detector.detect(r.UserAgent)
                if ua_res.get("isTV", False):
                    ui = 'tv'

            if '/video/tvapp' in r.FullRequest:
                ui = 'tvapp'
            elif '/video/tv' in r.FullRequest:
                ui = 'tv'

            spv = r.SearchPropsValues
            rv = getattr(r, 'RelevValues', {})
            has_ontoid = True

            if not spv.get('REPORT.object_type') and not spv.get('UPPER.VideoExtraItems.object_type'):
                has_ontoid = False

            tvt = 0
            lvt = 0.0
            results = []
            long_watches = 0

            fragment_infos = get_fragment_infos(r)
            keypoint_infos = get_keypoint_infos(r)

            for bl in r.GetMainBlocks():
                block_results = bl.GetChildren()
                for result in block_results:
                    is_video = result.IsA("TVideoResult")
                    is_video_group = result.IsA("TVideoGrouppedResult")

                    if not is_video and not is_video_group:
                        continue

                    duration = r.FindVideoDurationInfo(result)
                    dt = min(duration.PlayingDuration, duration.Duration) if duration else 0
                    heartbeat = r.FindVideoHeartbeat(result, 'ANY')
                    ht = heartbeat.Ticks if heartbeat else 0
                    res_tvt = max(dt, ht)

                    res_long_watches = 1 if r.FindVideoHeartbeat(result, 'SINGLE') else 0

                    if res_tvt > 30:
                        res_lvt = math.log(res_tvt, math.e)
                    else:
                        res_lvt = 0.0

                    result_markers = result.Markers
                    title = None

                    if 'RandomLog' in result_markers:
                        markers = result_markers['RandomLog']
                        try:
                            eval_markers = markers.decode('base64')
                            markers = eval_markers.replace('true', '1').replace('false', '0')
                            try:
                                em = eval(markers)
                                title = str(em.get('Title'))
                                title = title.replace("\\u0007[", "").replace("\\u0007]", "")
                            except:
                                pass
                        except:
                            pass

                    res = {
                        "tvt": res_tvt,
                        "lvt": res_lvt,
                        "url": result.Url,
                        "pos": result.Position,
                        "title": title,
                        "ratio": result.CRatio,
                        "width": result.CWidth,
                        "height": result.CHeight,
                        "player": str(result.PlayerId),
                        "clicks": len(result.GetClicks()),
                        "duration": result.Duration,
                        "long_watches": res_long_watches,
                        "has_translation": result_markers.get('hasTranslation') == '1',
                    }

                    if is_video_group:
                        res["group_id"] = result.GroupId
                        res["group_pos"] = result.InGroupPos
                        res["group_type"] = result.GroupType

                    if result.Url in fragment_infos:
                        res["fragment_begin"] = fragment_infos[result.Url]

                    if result.Url in keypoint_infos:
                        res["keypoint_info"] = keypoint_infos[result.Url]

                    results.append(res)

                    tvt += res_tvt
                    lvt += res_lvt
                    long_watches += res_long_watches

            res_spv = {x: spv[x] for x in SPV_FIELDS if x in spv}
            res_spv["UPPER.VideoUgc"] = [x[len('UPPER.VideoUgc.') : -len("_passed")] for x in spv if x.startswith('UPPER.VideoUgc.') and x.endswith("_passed")]
            prefix = "UPPER.VideoUgc.MergeRtxCtrAware"
            res_spv[prefix] = {x[len(prefix):] : spv[x] for x in spv if x.startswith(prefix) and 'ugc' in spv[x]}

            res = {
                "ts": r.Timestamp,
                "ui": ui,
                "uid": uid,
                "lvt": lvt,
                "tvt": tvt,
                "url": r.FullRequest,
                "ReqID": r.ReqID,
                "serpid": r.SerpID,
                "query": r.Query,
                "domain": r.ServiceDomRegion,
                "results": results,
                "referer": r.Referer,
                "ICookie": getattr(r, "ICookie"),
                "testids": [test.TestID for test in r.GetTestInfo()],
                "UserAgent": r.UserAgent,
                "is_portal": r.IsA("TVideoPortalRequestProperties"),
                "UserRegion": getattr(r, "UserRegion"),
                "is_related": r.IsA("TRelatedVideoRequestProperties"),
                "has_ontoid": has_ontoid,
                "RelevValues": {x: rv[x] for x in RV_FIELDS if x in rv},
                "long_watches": long_watches,
                "SearchPropsValues": res_spv,
                "triggered_testids": filter(lambda x: x, spv.get("REPORT.test-ids", "").split(" ")),
            }

            if getattr(r, 'PassportUID'):
                res['PassportUID'] = r.PassportUID

            if has_ontoid:
                prefix = 'REPORT' if spv.get('REPORT.object_type') else 'UPPER.VideoExtraItems'
                object_id = spv.get(prefix + '.object_id', '')
                object_tags = spv.get(prefix + '.object_tags', '')
                object_type = spv.get(prefix + '.object_type', '')
                object_subtype = spv.get(prefix + '.object_subtype', '')

                res["ontoid"] = object_id
                res["obj_type"] = '/'.join([object_type, object_subtype, object_tags])
            else:
                res["ontoid"] = None
                res["obj_type"] = ''

            if r.IsA('TRelatedVideoRequestProperties'):
                res["parent_reqid"] = r.FullRequest.split('parent-reqid=', 1)[1].split('&', 1)[0].split('#', 1)[0].strip() if 'parent-reqid=' in r.FullRequest else None
                related(Record(res))
            else:
                main_query(Record(res))


@cli.statinfra_job(
    options=[
        cli.Option('use_sample', is_flag=True),
        cli.Option('out_path', default=OUTPUT_PATH),
    ]
)

def make_job(job, options):

    job = job.env(
        yt_spec_defaults={
            "pool_trees": ["physical"],
            "use_default_tentative_pool_trees": True,
        },
        templates={
            "title": "Video queries cube",
            "job_root": options.out_path,
        },
    )

    for date in options.dates:

        to_insert = '' if not options.use_sample else 'sample_by_uid_1p/'

        if datetime.datetime.strptime(date, "%Y-%m-%d") < datetime.datetime(2020, 3, 4): #dates before are included in Search sessions
            usersessions = '//user_sessions/pub/{}search/daily/{}/clean'.format(to_insert, date)
        else:
            usersessions = '//user_sessions/pub/{}video/daily/{}/clean'.format(to_insert,date)

        main_query, related = job.table(
            usersessions,
        ).libra(
            video_clicks,
            libra_file=nile.files.RemoteFile("//statbox/resources/libra_nile_udf2.7.so"),
            memory_limit=5*1024,
        )

        main_query.project(
            ne.all(),
            fielddate=ne.const(date),
        ).sort(
            'uid',
            'ts',
        ).put(
            '$job_root/{}'.format(date),
        )

        related.project(
            ne.all(),
            fielddate=ne.const(date),
        ).sort(
            'uid',
            'ts',
        ).put(
            '$job_root.related/{}'.format(date),
        )

    return job


if __name__ == '__main__':
    cli.run()
