#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import json
import re
import hashlib
import random
from collections import defaultdict, Counter
from qb2.api.v1 import typing as qt, resources as qr
from nile.api.v1 import Record
from pytils import optionalize_schema, parseparams, parse_cgi

# /dvr/edatv/edatv0_169_576p-1482310130000.ts
request_re = re.compile(r"^\/dvr\/(.*)/.*-(.*)\.")
# /kal/ntv_cv/ntv_cv0_169_576p.json/seg-300660927-v1-a1.ts
request_kal_re = re.compile(r"^\/kal\/(.*)/(.*)/")
# /vh-ntv-converted/vod-content/138931_169_576p7.ts
vod_request_re = re.compile(r"vh-(.*)-converted/")
zen_request_re = re.compile(r"/(.*)-vod/")
re_yu = re.compile(r"/yandexuid=([0-9]+?)/")
re_yu2 = re.compile(r'"yaUID":"([0-9]+?)"')
re_reqid = re.compile(r"/reqid=([0-9\.\-]+?)/")
re_chid = re.compile(r"/channel_id=([0-9a-z]+?)/")
re_source = re.compile(r"/source=([0-9a-z]+?)/")
re_pv = re.compile(
    "yastatic.net/yandex-video-player-iframe-api-bundles/([0-9-.]+)/"
)
RETRANS = "tcpinfo_total_retrans"


def tryint(x, default=0):
    try:
        return int(x)
    except (TypeError, ValueError):
        return default


def get_player_version(s):
    srch = re_pv.search(s)
    if srch:
        return srch.group(1)


def get_player_version_wrapper(referer, *params_list):
    srch = re_pv.search(referer)
    if srch:
        return srch.group(1)
    for params in params_list:
        if "player_version" in params:
            return params["player_version"].split(":")[-1]


def get_resolution(page):
    page = page.split(".")[0].split("_")[-1].split("p")[0]
    try:
        assert len(page) <= 4
        int(page)
        return page
    except (AssertionError, ValueError, TypeError, AttributeError):
        return


def parse_as(as_, asname_dict):
    result = set()
    for as__ in as_ or []:
        result.add(asname_dict.get(as__))
    result = result - {"-", "other"}
    return sorted(result)


def get_as(ip, ip_origins):
    if ip:
        try:
            asn = ip_origins.region_by_ip(ip)
        except (ValueError, TypeError):
            asn = []
        return asn or []
    return []


def process_match(m):
    m = m.split("/")[0]
    return m


def get_request_params_from_url(request):
    if not request or len(request) < 20:
        return [None, 0, ""]
    match = request_re.findall(request)

    if request.find("cdn1tvru/live") > -1:
        return ["1tv", 0, "live"]

    if request.find("cdn1tvru/vod") > -1:
        return ["1tv", 0, "vod"]

    if match:
        return [process_match(match[0][0]), int(int(match[0][1]) / 1000), ""]

    match = request_kal_re.findall(request)

    if match:
        req_ts = 0
        try:
            req_ts = re.search(r".*-([0-9]+)-.*\.", request).group(1)
            req_ts = req_ts * 5
        except (TypeError, ValueError, AttributeError):
            pass
        return [process_match(match[0][0]), req_ts, ""]

    match = vod_request_re.findall(request)

    if match:
        return [process_match(match[0]), 0, "vod"]

    match = zen_request_re.findall(request)

    if match:
        return [process_match(match[0]), 0, "vod"]

    if "/get-video-hosting" in request:
        return ["ott", 0, "vod"]

    return [None, 0, ""]


def get_qs_param(params, name):
    params = params or {}
    if name in params and params[name]:
        if isinstance(params[name], list):
            return params[name]
        return params[name]
    return ""


def get_qs_param_wrapper(*args, **kwargs):
    name = args[0]
    params_list = args[1:]
    return_source = kwargs.get("return_source") or None
    for i, params in enumerate(params_list):
        qsp = get_qs_param(params, name)
        if qsp:
            if return_source:
                return qsp, i
            return qsp
    if return_source:
        return None, -1
    return


def get_detailed_error_id(error_id, details):
    if not isinstance(details, dict):
        return error_id
    reason = details.get("reason", "")
    if reason:
        return "{}_{}".format(error_id, reason)
    return error_id


def get_category_id(parameters, referer_parameters):
    from_params = get_qs_param(parameters, "video_category_id")
    if from_params:
        return from_params
    from_referer_params = get_qs_param(referer_parameters, "category")
    return from_referer_params


def get_hash(yandexuid, salt="e0440ebc0786e3d2cff6ef51319bc226"):
    md5 = hashlib.md5(yandexuid + salt)
    return md5.hexdigest()


def get_error_details(params):
    try:
        details = json.loads(params["details"])
        connection = params.get("connection", []) or []
        if connection:
            details["connection"] = connection
        return details
    except (KeyError, IndexError, TypeError):
        return {}
    except ValueError:
        return {"unparsed": params["details"]}


def get_error_id(parsed_parameters, error_details, fatal=None):
    if fatal is None:
        fatal = parsed_parameters.get("fatal", "false") == "true"
    result = ""
    event_id = parsed_parameters.get("event_id", "")
    error_id = parsed_parameters.get("error_id", "") or ""
    if event_id in {
        "Buffer.Empty",
        "InvalidFragDuration",
        "RecoverStreamError",
        "BufferEmptyCritical",
        "Buffer.Empty.Critical",
        "10SecWatched",
        "20SecWatched",
        "30SecHeartbeat",
    }:
        result = event_id or ""
    elif event_id in {"PlayerStalled", "PlayedStalled", "Stalled"}:
        result = get_detailed_error_id(event_id, error_details)
    elif error_id:
        result = error_id or ""
    if fatal:
        result += "_fatal"
    result = (result or "").replace("PlayedStalled", "Stalled")
    return result


# https://proxy.video.yandex.net/get-master-hls/videodisk/39092/153bf4bcf70/fd1ecb09fcf6d1bf/hls_master_playlist.m3u8?sign=15b7d13548b.de5eb30c4c27dd23772e290190a5c2c3ed6a62e62960bb59cc33b46684830c1a&vsid=65e94c257325c050ee1f91df90368938a2e51a7204461ecb9b5b262a84652168


def get_error_vsid(parsed_parameters):
    try:
        if parsed_parameters.get("vsid", ""):
            return parsed_parameters["vsid"]
        src = parsed_parameters.get("src")
        pp = parse_cgi(src)
        vsid = pp.get("vsid")
        return vsid
    except (TypeError, ValueError, AttributeError):
        return None


preprocessed_schema = optionalize_schema(
    qt,
    {
        "a_station": qt.String,
        "add_info": qt.Json,
        "adsid": qt.String,
        "bidreqid": qt.String,
        "browser_name": qt.String,
        "browser_version": qt.String,
        "category_id": qt.String,
        "channel_old": qt.String,
        "country": qt.String,
        "device_type": qt.String,
        "error_id": qt.String,
        "event": qt.String,
        "fatal": qt.Bool,
        "ip": qt.String,
        "os_family": qt.String,
        "page_id": qt.String,
        "player_version": qt.String,
        "provider": qt.String,
        "ref_from": qt.String,
        "ref_from_block": qt.String,
        "region": qt.Integer,
        "reqid": qt.String,
        "resolution": qt.String,
        "slots_arr": qt.Json,
        "source": qt.String,
        "source_data": qt.Json,
        "stream_block": qt.String,
        "timestamp": qt.Integer,
        "user_agent": qt.String,
        "user_id": qt.String,
        "video_content_id": qt.String,
        "view_type": qt.String,
        "vsid": qt.String,
        "yandexuid": qt.String,
        "yu_hash": qt.String,
    },
)


def parse_slots(slots):
    if (slots is None) or (slots == ""):
        return []

    try:
        return [pair.split(",")[0] for pair in slots.split(";")]
    except (TypeError, ValueError, AttributeError):
        raise Exception(repr(slots))


def get_country(region):
    for reg in region.path:
        if reg.type == 3:
            return reg.short_name
    return "UNK"


def process_browser(user_agent, detector, result):
    user_agent = str(user_agent)
    result["user_agent"] = user_agent
    if not user_agent:
        return
    try:
        browser = detector.detect(user_agent)
    except (AttributeError, ValueError):
        result["browser_name"] = None
        result["browser_version"] = None
        result["os_family"] = None
        result["device_type"] = None
        return
    result["browser_name"] = str(browser.get("BrowserName"))
    result["browser_version"] = str(browser.get("BrowserVersion"))
    result["os_family"] = str(browser.get("OSFamily"))
    if browser.get("isTV", False):
        ui = "tv"
    elif not browser.get("isMobile", False):
        ui = "desktop"
    elif browser.get("isTablet", False):
        ui = "tablet"
    else:
        ui = "phone"
    result["device_type"] = ui


def process_ip(ip, geobase, ip_origins, asname_dict, result):
    try:
        ip = ip.replace(",", ".")
        geo_region = geobase.region_by_ip(ip)
        result["region"] = geo_region.id
        geo_country = get_country(geo_region)
    except ValueError:
        result["region"] = None
        result["ip"] = None
        result["a_station"] = None
        result["provider"] = None
        return
    result["country"] = geo_country
    result["ip"] = ip
    a_stations = sorted(get_as(ip, ip_origins))
    result["a_station"] = ",".join(a_stations)
    providers = parse_as(a_stations, asname_dict)
    if len(providers) == 1:
        result["provider"] = providers[0]
    else:
        result["provider"] = None


class StrmParser(object):
    def __init__(self, asname_dict):
        # setattr(self, '__name__', 'strm_parser')
        self.asname_dict = asname_dict

    @staticmethod
    def get_common_params(result, *params):
        def getter(x):
            res, source = get_qs_param_wrapper(x, *params, return_source=True)
            result["add_info"]["extended_sources"][x] = source
            return res

        result["ref_from"] = getter("from")
        result["adsid"] = getter("adsid")
        result["ref_from_block"] = getter("from_block")
        result["stream_block"] = getter("stream_block")
        slots = getter("slots")
        result["slots_arr"] = parse_slots(slots)
        result["reqid"] = getter("reqid")
        result["page_id"] = getter("partner_id")
        result["yandexuid"] = getter("yandexuid")
        result["yu_hash"] = getter("hash")
        if result["yandexuid"] and not result["yu_hash"]:
            result["yu_hash"] = get_hash(result["yandexuid"])
        result["user_id"] = result["yu_hash"] or "vsid_{}".format(
            result["vsid"]
        )
        result["video_content_id"] = getter("video_content_id")
        result["add_info"]["vc_name"] = getter("video_content_name")

    def __call__(self, recs):
        import uatraits

        detector = uatraits.detector("/usr/share/uatraits/browser.xml")
        geobase = qr.get("Geobase")
        ip_origins = qr.get("IpOrigins")
        for rec in recs:
            request = getattr(rec, "request", "") or ""
            if (
                "for-regional-cache=1" in request
                or "monitoring=1" in request
                or "hide_stub=1" in request
                or "/get-video-an" in request
                or "/timetail" in request
                or "dvrpy=1" in request
                or "/vh-bsvideo-converted/" in request
            ):
                continue
            if getattr(rec, "status", "") not in {"200", "206"}:
                continue
            request_page = request.split("?")[0].split("#")[0]
            if request_page.endswith((".ts", ".mp4", "m4s")):
                result = {"event": "chunk"}
                params_from_url = get_request_params_from_url(rec.request)
            elif request_page.endswith((".mpd", ".m3u8", ".ismc")):
                result = {"event": "playlist"}
                params_from_url = get_request_params_from_url(rec.request)
            elif request.startswith("/log/"):
                result = {"event": "error"}
            else:
                continue
            result["source"] = "strm"
            result["add_info"] = {}
            result["add_info"]["extended_sources"] = {}
            result["add_info"]["extension"] = request_page.split(".")[-1][:10]
            result["user_agent"] = rec.user_agent
            result["source_data"] = {
                "referer": rec.referer,
                "request": request,
                "user_agent": rec.user_agent,
            }
            try:
                result["timestamp"] = int(rec.timestamp)
            except (AttributeError, ValueError):
                continue
            process_browser(rec.user_agent, detector, result)
            process_ip(rec.ip, geobase, ip_origins, self.asname_dict, result)
            result["add_info"][RETRANS] = int(getattr(rec, RETRANS, 0)) or 0
            try:
                result["add_info"]["bytes_sent"] = int(rec.bytes_sent) or 0
            except (ValueError, TypeError, AttributeError):
                result["add_info"]["bytes_sent"] = 0
            request_params = parse_cgi(rec.request)
            referer_params = parse_cgi(rec.referer)
            stream_url_params = parse_cgi(referer_params.get("stream_url", ""))
            mq_url_params = parse_cgi(referer_params.get("mq_url", ""))
            add_params = referer_params.get("additional_params") or {}
            try:
                add_params = json.loads(add_params)
            except (TypeError, ValueError):
                add_params = {}
            if result["event"] in {"chunk", "playlist"}:
                result["vsid"], vsid_source = get_qs_param_wrapper(
                    "vsid",
                    request_params,
                    referer_params,
                    stream_url_params,
                    mq_url_params,
                    add_params,
                    return_source=True,
                )
                result["add_info"]["extended_sources"]["vsid"] = vsid_source
                if not result["vsid"] or len(result["vsid"]) != 64:
                    continue
                result["category_id"] = get_category_id(
                    request_params, referer_params
                )
                result["player_version"] = get_player_version_wrapper(
                    rec.referer,
                    request_params,
                    referer_params
                )
                result["resolution"] = get_resolution(rec.request)
                self.get_common_params(
                    result,
                    request_params,
                    referer_params,
                    stream_url_params,
                    mq_url_params,
                    add_params,
                )
                result["channel_old"] = params_from_url[0]
                result["add_info"]["request_ts"] = params_from_url[1]
                result["view_type"] = params_from_url[2]
            elif result["event"] == "error":
                src_params = {}
                if request_params.get("src", ""):
                    src_params = parse_cgi(request_params["src"])
                result["vsid"], vsid_source = get_qs_param_wrapper(
                    "vsid",
                    request_params,
                    src_params,
                    referer_params,
                    stream_url_params,
                    mq_url_params,
                    add_params,
                    return_source=True,
                )
                result["add_info"]["extended_sources"]["vsid"] = vsid_source
                if not result["vsid"] or len(result["vsid"]) != 64:
                    continue
                self.get_common_params(
                    result,
                    request_params,
                    referer_params,
                    mq_url_params,
                    src_params,
                    add_params,
                )
                result["add_info"]["error_details"] = get_error_details(
                    request_params
                )
                result["fatal"] = (
                    request_params.get("fatal", "false") == "true"
                )
                result["error_id"] = get_error_id(
                    request_params,
                    result["add_info"]["error_details"],
                    fatal=result["fatal"],
                )
                if result["error_id"] in {
                    "10SecWatched",
                    "20SecWatched",
                    "30SecHeartbeat",
                }:
                    result["event"] = "heartbeat"
                    result["add_info"]["heartbeat_src"] = result["error_id"]
                    result.pop("error_id")
                    yield Record.from_dict(result)
                    continue
                if not result["error_id"]:
                    continue
                result["add_info"]["error_id_raw"] = (
                    request_params.get("event_id")
                    or request_params.get("error_id")
                    or ""
                )
            result["add_info"]["myshell_marker"] = int(
                result["event"] == "chunk"
                and result["add_info"]["extension"] == "ts"
                and result["add_info"]["extended_sources"]["vsid"] <= 1
                and bool(result["video_content_id"])
                and result["add_info"]["extended_sources"]["video_content_id"]
                <= 1
            )
            yield Record.from_dict(result)
            del result


class RedirParser(object):
    def __init__(self, asname_dict):
        self.asname_dict = asname_dict

    def __call__(self, recs):
        geobase = qr.get("Geobase")
        ip_origins = qr.get("IpOrigins")
        for rec in recs:
            if "player-events.heartbeat" not in rec["value"]:
                continue
            params = parseparams(rec["value"].decode("utf8", errors="replace"))
            if params.get("path") != "player-events.heartbeat":
                continue
            if not params["vsid"]:
                continue
            if not params["content_id"]:
                continue
            result = {"event": "heartbeat", "source": "redir", "add_info": {}}
            result["source_data"] = dict(params)
            try:
                ts = int(params["timestamp"])
            except (TypeError, ValueError, AttributeError):
                continue
            result["timestamp"] = ts

            ip = params["ip"].split(",")[0]
            geo_region = geobase.region_by_ip(ip)
            geo_country = get_country(geo_region)
            result["region"] = geo_region.id
            result["country"] = geo_country
            result["ip"] = ip
            result["add_info"]["a_station"] = get_as(ip, ip_origins)
            result["add_info"]["provider"] = parse_as(
                result["add_info"]["a_station"], self.asname_dict
            )
            if params.get("stream_block"):
                result["stream_block"] = params["stream_block"]

            result["yandexuid"] = params.get("yandexuid") or params.get("yuid")
            if result["yandexuid"]:
                result["yu_hash"] = get_hash(result["yandexuid"])
            else:
                result["yu_hash"] = ""
            result["vsid"] = params["vsid"]
            result["user_id"] = result["yu_hash"] or "vsid_{}".format(
                result["vsid"]
            )
            result["video_content_id"] = params["content_id"] or None
            result["add_info"]["channel_id"] = params.get("channel_id")
            result["reqid"] = params.get("reqid")
            result["add_info"]["heartbeat_src"] = "redir_heartbeat"
            yield Record.from_dict(result)


class DspParser(object):
    def __init__(
        self,
        page_ids,
        dsp_ids,
        add_info="add_info",
        process_rtb=False,
        asname_dict=None,
    ):
        self.page_ids = page_ids
        self.dsp_ids = dsp_ids
        self.asname_dict = asname_dict
        self.add_info = add_info
        self.process_rtb = process_rtb

    def _process_rtb(
        self, rec, detector, geobase, ip_origins, queryargs, result
    ):
        vsid = queryargs.get("769", "") or queryargs.get("362", "")
        result["bidreqid"] = str(rec["bidreqid"])
        referer_params = parse_cgi(rec.get("referer", "") or "")
        if not vsid:
            vsid = referer_params.get("vsid")
        result["video_content_id"] = queryargs.get("354")
        user_agent = rec["useragent"]
        ip = rec["clientip"]
        result["user_agent"] = user_agent
        if user_agent:
            process_browser(user_agent, detector, result)
        if ip:
            process_ip(ip, geobase, ip_origins, self.asname_dict, result)
        if not vsid or vsid == "0":
            if not ip:
                ip = get_hash(str(self.rnd.randint(1, 1000000)))
            vsid = "rtbdsp_{}".format(
                result["bidreqid"]
            )
        result["vsid"] = vsid

    @staticmethod
    def _process_dsp(result, rec, add_info):
        result["yandexuid"] = str(rec["uniqid"])
        result["yu_hash"] = get_hash(result["yandexuid"])
        result[add_info]["DetailedDeviceType"] = rec["detaileddevicetype"]
        result[add_info]["DeviceType"] = int(rec["devicetype"] or 0)
        result[add_info]["BrowserName"] = rec["browsername"]
        result["os_family"] = result[add_info]["DetailedDeviceType"]
        result["browser_name"] = result[add_info]["BrowserName"]
        result[add_info]["PageID"] = str(rec["pageid"])
        result[add_info]["DspID"] = tryint(rec["dspid"])
        result["page_id"] = result[add_info]["PageID"]
        result[add_info]["RequestId"] = str(rec["dsplogid"])
        result[add_info]["producttype"] = str(rec["producttype"])
        result[add_info]["UniqID"] = int(result["yandexuid"])
        result["region"] = int(rec["regionid"])
        result[add_info]["RegionId"] = result["region"]
        result[add_info]["adsessionid"] = str(rec["adsessionid"])
        result[add_info]["BidReqid"] = str(rec["bidreqid"])
        result["bidreqid"] = str(rec["bidreqid"])
        result["imp_id"] = str(rec["impid"])
        result[add_info]["imp_id"] = str(rec["impid"])
        result[add_info]["maxadscount"] = tryint(rec["maxadscount"])
        result[add_info]["win"] = tryint(rec["win"])
        result[add_info]["price"] = tryint(rec["price"])
        result[add_info]["partnerprice"] = tryint(rec["partnerprice"])
        result[add_info]["countertype"] = tryint(rec["countertype"])
        result[add_info]["position"] = tryint(rec["position"])
        result[add_info]["Price"] = (
            tryint(rec["price"])
            if str(rec["win"]) == "1" and str(rec["countertype"]) == "1"
            else 0
        )
        result[add_info]["PartnerPrice"] = (
            tryint(rec["partnerprice"])
            if str(rec["win"]) == "1" and str(rec["countertype"]) == "1"
            else 0
        )
        result[add_info]["Hit"] = (
            1
            if str(rec["win"]) == "1" and str(rec["countertype"]) == "0"
            else 0
        )
        result[add_info]["ShownHit"] = (
            1
            if str(rec["win"]) == "1" and str(rec["countertype"]) == "1"
            else 0
        )

    def __call__(self, recs):
        add_info = self.add_info
        self.rnd = random.SystemRandom()
        if self.process_rtb:
            import uatraits

            detector = uatraits.detector("/usr/share/uatraits/browser.xml")
            geobase = qr.get("Geobase")
            ip_origins = qr.get("IpOrigins")
        for rec in recs:
            result = {"event": "rtb-dsp", "source": "rtb-dsp", add_info: {}}
            if str(rec.get("bidreqid", "0")) == "0":
                continue
            if str(rec.get("dspfraudbits", "0")) != "0":
                continue
            if str(rec.get("dspeventflags", "0")) != "0":
                continue
            if str(rec.get("pageid", "")) not in self.page_ids:
                continue
            if tryint(rec.get("dspid", -1)) not in self.dsp_ids and not (
                tryint(rec.get("countertype", -1)) == 0
                and tryint(rec.get("win", -1)) == 1
                and tryint(rec.get("dspid"), -1) in {5, 10}
            ):
                continue
            try:
                result["timestamp"] = int(rec["eventtime"])
            except (KeyError, ValueError, TypeError):
                result["timestamp"] = 0
            self._process_dsp(result, rec, add_info)
            if self.process_rtb:
                queryargs = parse_cgi(
                    rec["queryargs"], prepend="http://example.com/?"
                )
                self._process_rtb(
                    rec, detector, geobase, ip_origins, queryargs, result
                )
            yield Record.from_dict(result)


# vsid — 769, 362
# vcid — 354
class RtbParser(DspParser):
    def __init__(self, asname_dict):
        self.asname_dict = asname_dict

    def __call__(self, recs):
        import uatraits

        detector = uatraits.detector("/usr/share/uatraits/browser.xml")
        geobase = qr.get("Geobase")
        ip_origins = qr.get("IpOrigins")
        self.rnd = random.SystemRandom()
        for rec in recs:
            result = {"event": "rtb", "source": "rtb-dsp", "add_info_b": {}}
            if rec.get("bidreqid", "0") == "0":
                continue
            queryargs = rec.get("queryargs")
            if not queryargs:
                continue
            queryargs = parse_cgi(
                rec["queryargs"], prepend="http://example.com/?"
            )
            self._process_rtb(
                rec, detector, geobase, ip_origins, queryargs, result
            )
            yield Record.from_dict(result)


def parse_location(location):
    result = {"add_info": {}}
    qs = parse_cgi(location)
    pv = get_player_version(location)
    if pv:
        result["player_version"] = pv
    stream_url_qs = {}
    try:
        stream_url = qs["stream_url"]
        stream_url_qs = parse_cgi(stream_url)
    except KeyError:
        pass
    mq_qs = {}
    try:
        mq_url = qs["mq_url"]
        mq_qs = parse_cgi(mq_url)
    except KeyError:
        pass

    def getter(field):
        return get_qs_param_wrapper(field, qs, stream_url_qs, mq_qs)

    result["ref_from"] = getter("from")
    result["ref_from_block"] = getter("from_block")
    result["stream_block"] = getter("stream_block")
    result["adsid"] = getter("adsid")
    slots = getter("slots")
    result["slots_arr"] = parse_slots(slots)
    result["reqid"] = getter("reqid")
    result["page_id"] = getter("partner_id")
    result["yandexuid"] = getter("yandexuid")
    result["yu_hash"] = getter("hash")
    if result["yandexuid"] and not result["yu_hash"]:
        result["yu_hash"] = get_hash(result["yandexuid"])
    result["video_content_id"] = getter("video_content_id")
    result["add_info"]["vc_name"] = getter("video_content_name")
    return result


def get_info_from_source_config(source_config):
    result = {"add_info": {}}

    ad_config = {}
    try:
        ad_config = source_config["adConfig"]
    except KeyError:
        pass
    if ad_config:
        try:
            result["video_content_id"] = str(ad_config["videoContentId"])
        except KeyError:
            pass
        try:
            result["category_id"] = str(ad_config["category"])
        except KeyError:
            pass
        try:
            result["page_id"] = str(ad_config["partnerId"])
        except KeyError:
            pass

    try:
        tracking_events = source_config["trackings"]["trackingEvents"]
        if isinstance(tracking_events, list):
            init_lst = tracking_events[0]["init"]
        elif isinstance(tracking_events, basestring):
            init_lst = json.loads(tracking_events)["init"]
        elif isinstance(tracking_events, dict):
            init_lst = tracking_events["init"]
        else:
            raise TypeError(
                "noncompatible type: {}".format(type(tracking_events))
            )
        init_event = init_lst[0]
        if init_event.startswith("//mc.yandex.ru"):
            init_event = init_lst[-1]
    except (KeyError, ValueError, IndexError, AttributeError, TypeError):
        # yield Record(event='bad_init')
        return result

    yandexuid = None
    reqid = None
    jst_source = None
    channel_id = None
    for init_event in init_lst:
        if not yandexuid:
            for re_yu_ in [re_yu, re_yu2]:
                try:
                    yandexuid = re_yu_.search(init_event).group(1)
                except (KeyError, ValueError, IndexError, AttributeError):
                    pass
        if not reqid:
            try:
                reqid = re_reqid.search(init_event).group(1)
            except (KeyError, ValueError, IndexError, AttributeError):
                pass
        if not jst_source:
            try:
                jst_source = re_source.search(init_event).group(1)
            except (KeyError, ValueError, IndexError, AttributeError):
                pass
        if not channel_id:
            try:
                result["add_info"]["channel_id"] = re_chid.search(
                    init_event
                ).group(1)
            except (KeyError, ValueError, IndexError, AttributeError):
                result["add_info"]["channel_id"] = None

    if yandexuid:
        result["yandexuid"] = yandexuid
        result["yu_hash"] = get_hash(yandexuid)
    if reqid:
        result["reqid"] = reqid
    if jst_source:
        result["add_info"]["jst_source"] = jst_source
    if channel_id:
        result["add_info"]["channel_id"] = channel_id
    return result


class JsTracerParser(object):
    def __init__(self, asname_dict, local=False, new=False):
        self.asname_dict = asname_dict
        self.local = local
        self.new = new

    def __call__(self, recs):
        if not self.local:
            import uatraits

            detector = uatraits.detector("/usr/share/uatraits/browser.xml")
            geobase = qr.get("Geobase")
            ip_origins = qr.get("IpOrigins")
        event_dict = {
            "10SecWatched": "heartbeat",
            "20SecWatched": "heartbeat",
            "30SecHeartbeat": "heartbeat",
            "Start": "start",
            "CreatePlayer": "create_player"
        }
        for rec in recs:
            if self.new and rec["EventName"] not in event_dict:
                continue  # additional check only for new
            try:
                data = json.loads(rec["Data"])
            except:
                # yield Record(event='bad_data')
                continue
            result = {"source": "js_tracer"}
            if self.new:
                event = rec["EventName"]
            else:
                event = data.get("eventName", "")
            if event not in event_dict:
                # yield Record(event='bad_event')
                continue
            result["event"] = event_dict[event]
            try:
                ip = data["jstracer_info"]["client_ip"].encode("utf8")
                user_agent = data["jstracer_info"]["user_agent"].encode("utf8")
            except (KeyError, ValueError, IndexError, AttributeError):
                continue
            ts = None
            try:
                ts = int(data["jstracer_info"]["server_time"])
            except (KeyError, ValueError, IndexError, AttributeError):
                pass
            if not ts:
                try:
                    ts = int(round((data["timestamp"] or 0.0) / 1000.0))
                except (KeyError, ValueError, IndexError, AttributeError):
                    # yield Record(event='bad_server_time')
                    pass
            if not ts:
                continue
            result["add_info"] = {}
            vsid = None
            try:
                vsid = data["vsid"]
            except KeyError:
                pass
            if not vsid:
                try:
                    vsid = data["sid"]
                except KeyError:
                    pass
            if not vsid:
                continue
            result["vsid"] = vsid
            result["source_data"] = {"Data": data}
            result["timestamp"] = ts
            result["ip"] = ip
            result["user_agent"] = user_agent
            if not self.local:
                process_browser(user_agent, detector, result)
                process_ip(ip, geobase, ip_origins, self.asname_dict, result)

            source_config = None
            try:
                source_config = data["data"]["source"]
            except (KeyError, ValueError, AttributeError, TypeError):
                pass
            if not source_config:
                try:
                    source_config = data["data"]["details"]["sourceConfig"]
                except (KeyError, ValueError, AttributeError, TypeError):
                    pass
            if source_config:
                result.update(get_info_from_source_config(source_config))

            stream_url_parsed = {}
            location_parsed = {}
            topLocation_parsed = {}
            try:
                stream_url_parsed = parse_location(
                    data["data"]["currentStream"]["url"]
                )
            except (KeyError, ValueError, AttributeError, TypeError):
                pass
            try:
                stream_url_parsed = parse_location(
                    data["data"]["source"]["streams"][0]["url"]
                )
            except (
                KeyError, ValueError, AttributeError, TypeError, IndexError
            ):
                pass
            try:
                stream_url_parsed = parse_location(
                    data["data"]["source"]["streamUrl"]
                )
            except (KeyError, ValueError, AttributeError, TypeError):
                pass
            try:
                location_parsed = parse_location(data["location"])
            except (KeyError, ValueError, AttributeError, TypeError):
                pass
            try:
                topLocation_parsed = parse_location(data["topLocation"])
            except (KeyError, ValueError, AttributeError, TypeError):
                pass
            for dct in [
                stream_url_parsed,
                location_parsed,
                topLocation_parsed,
            ]:
                for key in dct:
                    value = dct[key]
                    if value and not result.get(key):
                        result[key] = value

            if "version" in data:
                result["player_version"] = data["version"]

            result["user_id"] = (
                result.get("yandexuid", "")
                or result.get("yu_hash", "")
                or "vsid_{}".format(result["vsid"])
            )

            if result["event"] == "heartbeat":
                result["add_info"]["heartbeat_src"] = event

            if not result.get("video_content_id"):
                result["video_content_id"] = None

            yield Record(**result)


def get_counter():
    return defaultdict(Counter)


def new_microsession():
    return {
        "heartbeats": get_counter(),
        "tcpinfo_total_retrans": 0,
        "bytes_sent": 0,
        "errors": [],
        "browser_name_counter": get_counter(),
        "browser_version_counter": get_counter(),
        "ip_counter": get_counter(),
        "user_agent_counter": get_counter(),
        "device_type_counter": get_counter(),
        "a_station_counter": get_counter(),
        "provider_counter": get_counter(),
        "os_family_counter": get_counter(),
        "yandexuid_counter": get_counter(),
        "yu_hash_counter": get_counter(),
        "user_id_counter": get_counter(),
        "reqid_counter": get_counter(),
        "category_id_counter": get_counter(),
        "page_id_counter": get_counter(),
        "region_counter": get_counter(),
        "country_counter": get_counter(),
        "ref_from_counter": get_counter(),
        "ref_from_block_counter": get_counter(),
        "stream_block_counter": get_counter(),
        "player_version_counter": get_counter(),
        "channel_old_counter": get_counter(),
        "ad_events": [],
        "add_info": {
            "durations": {"chunks": 0},
            "chunks": 0,
            "chunks_types": Counter(),
            "resolutions": Counter(),
            "errors_additional": defaultdict(
                lambda: {"times": [], "resolutions": []}
            ),
            "sources": get_counter(),
            "myshell_marker": 0,
        },
        "slots_arr": set(),
    }


microsessions_schema = {
    # 'channel': qt.String,
    # 'heur_category': qt.String,
    # 'program': qt.String,
    # 'timetuple': qt.Json,
    "ad_events": qt.Json,
    "add_info": qt.Json,
    "browser_name": qt.String,
    "browser_version": qt.String,
    "bytes_sent": qt.Integer,
    "category_id": qt.String,
    "channel_old": qt.String,
    "country": qt.String,
    "device_type": qt.String,
    "errors": qt.Json,
    "heartbeats": qt.Json,
    "hits_block_good": qt.Integer,
    "hits_good": qt.Integer,
    "ip": qt.String,
    "os_family": qt.String,
    "page_id": qt.String,
    "partner_price": qt.Integer,
    "player_version": qt.String,
    "price": qt.Integer,
    "provider": qt.String,
    "ref_from": qt.String,
    "ref_from_block": qt.String,
    "region": qt.Integer,
    "reqid": qt.String,
    "shows_block_good": qt.Integer,
    "shows_good": qt.Integer,
    "slots_arr": qt.Json,
    "stream_block": qt.String,
    "tcpinfo_total_retrans": qt.Integer,
    "timestamp": qt.Integer,
    "user_agent": qt.String,
    "user_id": qt.String,
    "UUID": qt.String,
    "video_content_id": qt.String,
    "view_time": qt.Integer,
    "vsid": qt.String,
    "winhits_block_good": qt.Integer,
    "winhits_good": qt.Integer,
    "yandexuid": qt.String,
    "yu_hash": qt.String,
}


def try_up_ms(field, cur_ms, rec, counter_field_name=None):
    source = rec["source"]
    if not counter_field_name:
        counter_field_name = "{}_counter".format(field)
    if rec.get(field, ""):
        if not isinstance(rec[field], basestring):
            cur_ms[counter_field_name][source].toint = True
        val = rec[field]
        if val:
            cur_ms[counter_field_name][source][val] += 1


def common_error_chunk(cur_ms, rec):
    try_up_ms("yandexuid", cur_ms, rec)
    try_up_ms("yu_hash", cur_ms, rec)
    try_up_ms("user_id", cur_ms, rec)
    try_up_ms("page_id", cur_ms, rec)
    try_up_ms("ref_from", cur_ms, rec)
    try_up_ms("ref_from_block", cur_ms, rec)
    try_up_ms("stream_block", cur_ms, rec)
    try_up_ms("reqid", cur_ms, rec)
    try_up_ms("category_id", cur_ms, rec)
    try_up_ms("channel_old", cur_ms, rec)
    try_up_ms("player_version", cur_ms, rec)
    try_up_ms("browser_name", cur_ms, rec)
    try_up_ms("browser_version", cur_ms, rec)
    try_up_ms("os_family", cur_ms, rec)
    try_up_ms("ip", cur_ms, rec)
    try_up_ms("region", cur_ms, rec)
    try_up_ms("country", cur_ms, rec)
    try_up_ms("device_type", cur_ms, rec)
    try_up_ms("user_agent", cur_ms, rec)
    try_up_ms("a_station", cur_ms, rec)
    try_up_ms("provider", cur_ms, rec)
    if rec.get("slots_arr", []) and isinstance(rec["slots_arr"], list):
        cur_ms["slots_arr"] |= set(rec["slots_arr"])


def make_string_keys(dct):
    return {str(k): v for k, v in dct.items()}


def normalify(dct):
    for k, v in dct.items():
        if isinstance(v, dict):
            dct[k] = normalify(v)
        if not isinstance(k, str):
            dct[str(k)] = v
            dct.pop(k)
    return dct


def get_counter_value(counter_collection, val=None):
    sources = ["rtb-dsp", "redir", "js_tracer", "strm"]
    last_hope = None
    for source in sources:
        cntr = counter_collection[source]
        mc = cntr.most_common(1)
        if mc:
            val = mc[0][0]
            if val.lower() in ["unknown", "", "null", "none"]:
                continue
            if getattr(cntr, "toint", False):
                val = int(val)
            return val
    return last_hope


def get_sources(sources):
    set_ = set()
    for k in sources:
        for kk in sources[k]:
            set_.add("{}_{}".format(k, kk))
    return ",".join(sorted(set_))


def ms_count_hits(ms_, ad_events):
    by_bidreqid = defaultdict(list)
    for event in ad_events:
        if event["countertype"] == 0 and event["win"] == 1:
            by_bidreqid[event["BidReqid"]].append(event["maxadscount"])
    ms_["hits_good"] = sum([by_bidreqid[br][0] for br in by_bidreqid])
    ms_["hits_block_good"] = len(by_bidreqid)


def ms_count_money(ms_, ad_events):
    ms_count_hits(ms_, ad_events)
    ms_["winhits_good"] = sum(
        [
            (
                1
                if (
                    x["countertype"] == 0
                    and x["win"] == 1
                    and x["DspID"] not in {5, 10}
                )
                else 0
            )
            for x in ad_events
        ]
    )
    ms_["winhits_block_good"] = sum(
        [
            (
                1
                if (
                    x["countertype"] == 0
                    and x["win"] == 1
                    and x["DspID"] not in {5, 10}
                    and x["position"] == 0
                )
                else 0
            )
            for x in ad_events
        ]
    )
    ms_["shows_good"] = sum(
        [
            (
                1
                if (
                    x["countertype"] == 1
                    and x["win"] == 1
                    and x["DspID"] not in {5, 10}
                )
                else 0
            )
            for x in ad_events
        ]
    )
    ms_["shows_block_good"] = sum(
        [
            (
                1
                if (
                    x["countertype"] == 1
                    and x["win"] == 1
                    and x["DspID"] not in {5, 10}
                    and x["position"] == 0
                )
                else 0
            )
            for x in ad_events
        ]
    )
    ms_["price"] = sum(
        [
            (
                x["price"]
                if (x["countertype"] == 1 and x["DspID"] not in {5, 10})
                else 0
            )
            for x in ad_events
        ]
    )
    ms_["partner_price"] = sum(
        [
            (
                x["partnerprice"]
                if (x["countertype"] == 1 and x["DspID"] not in {5, 10})
                else 0
            )
            for x in ad_events
        ]
    )


def microsessions_process_recs(recs):
    last_ts = None
    last_chunk_ts = None
    vcid = None
    cur_ms = None
    msid_ts = None
    bad_msid = False
    last_resolution = "start"
    ms = {}
    for rec in recs:
        if (
            rec["UUID"] and vcid and rec["UUID"] != vcid
            # for situations where a new sessions starts later without vcid
            or (rec["timestamp"] - (last_ts or 0)) >= (60 * 15)
        ):
            # start new microsession
            if cur_ms:
                cur_ms["add_info"]["last_timestamp"] = last_ts
                ms[(vcid, msid_ts)] = cur_ms
                cur_ms = None
            vcid = rec["UUID"]
            msid_ts = rec["timestamp"]
            last_chunk_ts = None
            last_resolution = "start"
            bad_msid = False
        if bad_msid:
            continue
        if not vcid and rec["UUID"]:
            vcid = rec["UUID"]
        if not msid_ts:
            msid_ts = rec["timestamp"]
        if not cur_ms:
            cur_ms = new_microsession()
        cur_ms["add_info"]["sources"][rec["source"]][rec["event"]] += 1
        if cur_ms["add_info"]["sources"]["rtb-dsp"]["rtb-dsp"] >= 5000:
            bad_msid = True
            continue
        if rec["event"] == "heartbeat":
            common_error_chunk(cur_ms, rec)
            hb = rec["add_info"]["heartbeat_src"]
            cur_ms["heartbeats"][rec["source"]][hb] += 1
        elif rec["event"] in {"start", "playlist", "create_player"}:
            common_error_chunk(cur_ms, rec)
        elif rec["event"] == "error":
            common_error_chunk(cur_ms, rec)
            if len(cur_ms["errors"]) < 5000:
                error = {
                    "id": rec["error_id"],
                    "id_raw": rec["add_info"].get("error_id_raw") or "",
                    "resolution": last_resolution,
                    "rel_time": rec["timestamp"] - msid_ts,
                    "details": rec["add_info"].get("error_details") or {},
                    "source": rec["source"],
                }
                cur_ms["errors"].append(error)
            if rec["add_info"].get("myshell_marker") or 0:
                cur_ms["add_info"]["myshell_marker"] = 1
        elif rec["event"] == "chunk":
            common_error_chunk(cur_ms, rec)
            if rec["timestamp"] and last_chunk_ts:
                cur_ms["add_info"]["durations"]["chunks"] += (
                    rec["timestamp"] - last_chunk_ts
                )
            cur_ms["add_info"]["chunks"] += 1
            cur_ms["bytes_sent"] += rec["add_info"].get("bytes_sent", 0)
            cur_ms["tcpinfo_total_retrans"] += rec["add_info"].get(
                "tcpinfo_total_retrans", 0
            )
            last_chunk_ts = rec["timestamp"]
            if rec.get("resolution", 0):
                cur_ms["add_info"]["resolutions"][rec["resolution"]] += 1
                last_resolution = rec["resolution"]
            cur_ms["add_info"]["chunks_types"][
                rec["add_info"]["extension"]
            ] += 1
            if rec["add_info"].get("myshell_marker") or 0:
                cur_ms["add_info"]["myshell_marker"] = 1
        elif rec["event"] == "rtb-dsp":
            common_error_chunk(cur_ms, rec)
            cur_ms["ad_events"].append(rec["add_info"])
        last_ts = rec["timestamp"]
        if cur_ms:
            cur_ms["add_info"]["prev_event"] = cur_ms["add_info"].get(
                "last_event"
            )
            cur_ms["add_info"]["last_event"] = rec["event"]
    if cur_ms:
        cur_ms["add_info"]["last_timestamp"] = last_ts
        ms[(vcid, msid_ts)] = cur_ms
    return ms


def microsessions_reducer(groups):
    for key, recs in groups:
        ms = microsessions_process_recs(recs)
        for msid in sorted(ms, key=lambda x: x[1]):
            ms[msid]["video_content_id"] = msid[0]
            ms[msid]["timestamp"] = msid[1]
            ms[msid]["vsid"] = key["vsid"]
            ms[msid]["add_info"]["resolutions"] = dict(
                ms[msid]["add_info"]["resolutions"]
            )
            ms[msid]["add_info"]["chunks_types"] = dict(
                ms[msid]["add_info"]["chunks_types"]
            )
            ms[msid]["add_info"]["sources"] = {
                k: dict(v) for k, v in ms[msid]["add_info"]["sources"].items()
            }
            counters = [x for x in ms[msid] if x.endswith("_counter")]
            for c in counters:
                c_sp = c.split("_counter")[0]
                val = get_counter_value(ms[msid][c], val=c_sp)
                if c_sp == "region" and isinstance(val, basestring):
                    val = 0
                ms[msid][c_sp] = val
                ms[msid]["add_info"][c] = dict(ms[msid][c])
                ms[msid].pop(c)
            ms[msid]["slots_arr"] = sorted(ms[msid]["slots_arr"])
            new_hbdict = {}
            for source in ms[msid]["heartbeats"]:
                hbdict = dict(ms[msid]["heartbeats"][source])
                new_hbdict[source] = hbdict
                if source in {"js_tracer", "strm"}:
                    if "30SecHeartbeat" in hbdict:
                        jst_duration = 30 * hbdict["30SecHeartbeat"]
                    elif "20SecWatched" in hbdict:
                        jst_duration = 20
                    elif "10SecWatched" in hbdict:
                        jst_duration = 10
                    ms[msid]["add_info"]["durations"][source] = jst_duration
                elif source == "redir":
                    redir_duration = 30 * hbdict["redir_heartbeat"]
                    ms[msid]["add_info"]["durations"]["redir"] = redir_duration
            ms[msid]["heartbeats"] = new_hbdict
            duration = None
            durdict = ms[msid]["add_info"]["durations"]
            good_durations = [
                durdict[k]
                for k in durdict
                if k in {"js_tracer", "redir", "strm"}
            ]
            if good_durations:
                duration = min(good_durations)
            if duration is None and "chunks" in durdict:
                duration = durdict["chunks"]
            ms[msid]["view_time"] = duration or 0
            yu_hash = ms[msid].get("yu_hash", "")
            if yu_hash:
                ms[msid]["user_id"] = ms[msid]["yu_hash"]
            ms[msid]["add_info"]["errors_additional"] = dict(
                ms[msid]["add_info"]["errors_additional"]
            )
            ms[msid]["add_info"] = normalify(ms[msid]["add_info"])
            ms[msid]["timestamp"] = msid[1]
            if ms[msid]["ad_events"]:
                ad_events = ms[msid]["ad_events"]
                ms_count_money(ms[msid], ad_events)
            ms[msid]["add_info"]["sources_aggr"] = get_sources(
                ms[msid]["add_info"]["sources"]
            )
            yield Record(**ms[msid])


def add_session_markers(groups):
    for _, recs in groups:
        ad_sessions = set()
        last_active_timestamp = 0
        for rec in recs:
            ads_pair = (rec["vsid"], rec["channel"])
            is_view_old = rec["add_info"]["durations"].get("chunks", 0) > 0
            is_view_new = (
                rec.get("view_time", 0) > 0
                or rec.get("price", 0) > 0
                or rec.get("partner_price", 0) > 0
            )
            if ads_pair not in ad_sessions:
                new_ad_session_crutch = 1
                ad_sessions.add(ads_pair)
            else:
                new_ad_session_crutch = 0
            new_user_session = int(
                rec["timestamp"] - last_active_timestamp >= 1800
            )
            markers = {
                "is_view_old": is_view_old,
                "is_view_new": is_view_new,
                "new_ad_session_crutch": new_ad_session_crutch,
                "new_user_session": new_user_session,
            }
            yield Record(rec, **markers)
            last_active_timestamp = rec["timestamp"] + rec["view_time"]
