#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Parse browser-metrika-mobile-log and superapp-metrika-mobile-log and prepare daily tables
More info about logs https://wiki.yandex-team.ru/jandexmetrika/data/appmetricatables/metrika-mobile-log
"""
from crypta.profile.runners.log_parsing.lib.base_parser import LogParser
from crypta.profile.utils.config import config


mobile_query = """
PRAGMA yt.DefaultMemoryLimit = '2048M';
PRAGMA yt.AutoMerge = 'disabled';
PRAGMA yson.DisableStrict;

$parse_url_opened = ($p) -> {{
    RETURN AsStruct(
        CAST($p["yandexuid.ru"] AS Uint64) AS yandexuid,
        Url::NormalizeWithDefaultHttpScheme($p["url"]) ?? '' AS url,
        Url::NormalizeWithDefaultHttpScheme($p["referer"]) ?? '' AS referer,
        $p["title"] AS title,
        CryptaUrlUtils::ExtractHost($p["url"]) AS host,
        CryptaUrlUtils::ExtractHost($p["referer"]) AS referer_host,
        Url::GetTail(Url::NormalizeWithDefaultHttpScheme($p["url"]) ?? '') AS url_tail,
        IF(
            $p["title"] IS NOT NULL AND LENGTH($p["title"]) < 65536 AND Unicode::IsUtf($p["title"]),
            Crypta::GetNotUniqueLemmas(
                SearchRequest::NormalizeSimple(CAST($p["title"] AS Utf8)),
                "ru"
            )
        ) AS lemmas
    )
}};

$parse_history_api = ($p) -> {{
    RETURN AsStruct(
        Url::NormalizeWithDefaultHttpScheme($p["url"]) ?? '' AS referer,
        CryptaUrlUtils::ExtractHost($p["url"]) ?? '' AS referer_host,
        Url::NormalizeWithDefaultHttpScheme($p["nav"]) ?? '' AS url,
        CryptaUrlUtils::ExtractHost($p["nav"]) AS host,
        Url::GetTail(Url::NormalizeWithDefaultHttpScheme($p["nav"]) ?? '') AS url_tail
    )
}};

INSERT INTO `{intermediate_table}` WITH TRUNCATE
SELECT
    DeviceID AS mm_device_id,
    CryptaID AS crypta_id,
    CAST(Params.yandexuid AS Uint64) AS yandexuid,
    CAST(EventTimestamp AS Uint64) AS `timestamp`,
    Params.url ?? '' AS url,
    Params.referer ?? '' AS referer,
    Params.host ?? '' AS host,
    Params.referer_host ?? '' AS referer_host,
    Params.url_tail ?? '' AS url_tail,
    Params.title AS title,
    Params.lemmas AS lemmas,
FROM (
    SELECT
        DeviceID,
        CryptaID,
        CASE
            WHEN EventName == "url opened" THEN $parse_url_opened(Yson::ConvertToStringDict(Yson::ParseJson(EventValue, Yson::Options(false as Strict))))
            WHEN EventName == "history api" THEN $parse_history_api(Yson::ConvertToStringDict(Yson::ParseJson(EventValue, Yson::Options(false as Strict))))
            ELSE Null
        END AS Params,
        EventName,
        EventTimestamp
    FROM $input
    WHERE EventName == "url opened" OR EventName == "history api"
)
"""


class MobileParser(LogParser):
    def __init__(self, log_name, log_dir):
        super(MobileParser, self).__init__(
            log_name=log_name,
            log_dir=log_dir,
            output_schema={
                'mm_device_id': 'string',
                'crypta_id': 'uint64',
                'yandexuid': 'uint64',
                'timestamp': 'uint64',
                'url': 'string',
                'referer': 'string',
                'host': 'string',
                'referer_host': 'string',
                'url_tail': 'string',
                'title': 'string',
                'lemmas': 'any',
            },
            query=mobile_query,
            title='log_parsing {}'.format(self.__class__.__name__),
            udf_url_dict={
                'libcrypta_url_utils_udf.so': config.CRYPTA_URL_UTILS_UDF_URL,
            },
        )


def mobile_bar_runner():
    log_name = 'mobile_bar'
    parser = MobileParser(log_name=log_name, log_dir=config.BROWSER_METRIKA_MOBILE_LOG_5MIN)
    parser.run()


def pp_runner():
    log_name = 'pp'
    parser = MobileParser(log_name=log_name, log_dir=config.SUPERAPP_METRIKA_MOBILE_LOG_5MIN)
    parser.run()
