#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
from __future__ import unicode_literals
import sys
import re
import os
import codecs
import argparse
import datetime
from datetime import timedelta
import math
import json
import traceback
from collections import Counter, defaultdict
from itertools import groupby, chain
import scipy.stats
import requests
import numpy as np
from yql.api.v1.client import YqlClient
from nile.api.v1 import clusters
from videolog_common import (
    get_cluster,
    get_driver,
    apply_replacements,
    YqlRunner,
)

try:
    long
except NameError:
    long = int
try:
    bytes
except NameError:
    bytes = str

html_stub = (
    '<html><head><meta charset="UTF-8">'
    '<script src="https://pcode-ci.s3.mds.yandex.net/'
    'nirvana-report/ims-player-fast-metrics/index.js"></script>'
    '<link rel="stylesheet" href="https://pcode-ci.s3.mds.yandex.net/'
    'nirvana-report/ims-player-fast-metrics/index.css">'
    '<script> var PROJECT="{project}"; var START_TS = {start_ts}; var END_TS = {end_ts};</script>'
    "</head><body>"
)
api = "https://st-api.yandex-team.ru/v2"
headers = {
    "Content-Type": "application/json",
    "Authorization": "OAuth {}".format(os.environ["STARTREK_TOKEN"]),
}
AD_TYPES = [
    "preroll",
    "midroll",
    "fullscreen",
    "in_app",
    "inpage",
    "interstitial",
    "pauseroll",
    "postroll",
    "overlay",
    "motion",  # тип продукта/креатива TODO: вообще говоря плохо смешивать типы блоков и тип продукта. Возможно есть смысл разделить сущности
    "interactive_viewer",  # тип продукта/креатива. В основном состоит из креативов AWAPSa, потому будем искать их фильтром dspid!=1
]
PER_VSID_VIEW = [
    "30SecHeartbeat",
    "20SecWatched",
    "10SecWatched",
    "AD_IS_ALREADY_PLAYING",
    "AdStart",
    "AdEnd",
    "AdPodStart",
    "AdPodEnd",
    "BroadcastAdInit",
    "BroadcastAdReceived",
    "BROADCAST_AD_BLOCK_INIT_ERROR",
    "ReplacedBroadcastAdPodStart",
    "ReplacedBroadcastAdPodEnd",
    "NotReplacedBroadcastAdPodStart",
    "NotReplacedBroadcastAdPodEnd",
    "End",
]

events_positive_directions = {
    "30SecHeartbeat": "up",
    "20SecWatched": "up",
    "10SecWatched": "up",
    "Start": "up",
    "End": "up",
    "CreatePlayer": "up",
    "AdPodStart": "up",
    "AdPodEnd": "up",
    "AdStart": "up",
    "AdEnd": "up",
    "FatalError": "down",
    "audioTrackLoadTimeOut": "down",
    "fragLoadTimeOut": "down",
    "manifestLoadTimeOut": "down",
    "levelLoadTimeOut": "down",
    "internalException": "down",
    "NoFragLoad": "down",
    "TabCrash": "down",
    "UNHANDLED_GLOBAL_ERROR": "down",
    "BROADCAST_AD_BLOCK_INIT_ERROR": "down",
    "REFRESH_SUBTITLES_ERROR": "down",
    "NO_PLAYABLE_STREAMS": "down",
    "MULTIROLL_INIT_ERROR": "down",
    "PIPELINE_ERROR_DECODE": "down",
    "POST_MESSAGE_GET_CONFIG_TIMEOUT": "down",
    "STRM_ABD_INVALID_RESPONSE": "down",
    "NATIVE_HLS_NOT_SUPPORTED_ERROR": "down",
    "MPD_SHAKA_INIT_ERROR": "down",
    "MEDIA_ERR_SRC_NOT_SUPPORTED": "down",
    "UNABLE_TO_LOAD_PLAYER_IFRAME": "down",
    "SAGA_ERROR": "down",
    "SCRIPT_INIT_ERROR": "down",
    "MEDIA_ERR_DECODE": "down",
    "UNSUPPORTED_MEDIA_TYPE": "down",
    "ReportLog": "unknown",
}


def daily_path_filter(path, from_, to_):
    from_ = from_.split("T")[0]
    to_ = to_.split("T")[0]
    path = path.split("/")[-1]
    return from_ <= path <= to_

def all_tables(from_, to_, old):
    tables_ = list()
    print(type(from_))
    from_ = datetime.datetime.strptime(from_,'%Y-%m-%dT%H:%M:%S')
    to_ = datetime.datetime.strptime(to_,'%Y-%m-%dT%H:%M:%S')
    print(type(from_))
    i = from_
    if old:
       while i <= to_:
            tables_.append(i.datetime.strftime('%Y-%m-%dT%H:%M:%S'))
            i += timedelta(minutes=30)
    else:
        while i <= to_:
            tables_.append(i.strftime('%Y-%m-%dT%H:%M:%S'))
            i += timedelta(hours=1)
    return tables_



def fast_path_filter(path, from_, to_, dailies):
    daily_thresh = dailies[-1].split("/")[-1] if dailies else ""
    path = path.split("/")[-1]
    if daily_thresh:
        return from_ <= path <= to_ and path > daily_thresh and daily_thresh not in path
    return from_ <= path <= to_


def colname(s):
    return re.sub("[^a-zA-Z_0-9]", "_", s)


class Moscow(datetime.tzinfo):
    def utcoffset(self, dt):
        return datetime.timedelta(hours=3)

    def tzname(self, dt):
        return "Moscow"

    def dst(self, dt):
        return datetime.timedelta(0)


moscow = Moscow()
dtformat = "%Y-%m-%dT%H:%M:%S"


get_eventname = """
$getEventName = ($en, $et) -> {
    $et = $et ?? "event";
    $en = String::ReplaceAll($en, ".", "_") ?? "NULL";
    RETURN $et || "_" || $en
};

$getBundlename = ($eventName, $data) -> {
    RETURN IF(
        $eventName in (
            "VastTracking_impression",
            "VastTracking_clickThrough",
            "VastTracking_mute",
            "VastTracking_unmute",
            "VastTracking_start",
            "VastTracking_firstQuartile",
            "VastTracking_midpoint",
            "VastTracking_thirdQuartile",
            "VastTracking_complete",
        ),
        Yson::ConvertToString(Yson::YPath($data, "/bundleName")),
        NULL
    )
};
"""

get_versions = """
$VERSIONS = ListExtend(
    AsList($CONTROL_VERSION),
    String::SplitToList($EXPERIMENT_VERSIONS, ",")
);
$VERSIONS_Re = Pire::Grep(".+("||String::JoinFromList($VERSIONS, '|')||").+");
"""


prefix = """
pragma yt.Pool = "@pool";
pragma yt.PoolTrees = "physical";
pragma yt.TentativePoolTrees = "cloud";
pragma yt.MaxRowWeight = "64M";
pragma DqEngine = "disable";
pragma SimpleColumns;
$service = '@service';
$CONTROL_VERSION = '@control_version';
$EXPERIMENT_VERSIONS = '@experiment_versions';
$FROM = '@from';
$TO = '@to';
pragma yson.DisableStrict;
pragma AnsiInForEmptyOrNullableItemsCollections;
"""
avglogs_import = """
pragma library("quality_report_avglog_common.sql");
import quality_report_avglog_common symbols $parsePlayerState, $avglogProcessList, $avglogPRSkipZeroes, $getPlayerStateFromData;
"""

prefix2 = """
$fielddateFormat = DateTime::Format("%Y-%m-%dT%H:%M:%S");

$wrapTs = ($ts) -> {
    $ts = cast(IF(length(cast($ts as String)) = 13, $ts / 1000, $ts) as Uint32);
    $tm = AddTimezone(DateTime::FromSeconds($ts), "Europe/Moscow");
    RETURN $fielddateFormat($tm)
};

$checkTs = ($ts) -> ($wrapTs($ts) between $FROM and $TO);

$getBucket = ($vsid) -> {
    RETURN Digest::CityHash($vsid || "my-awesome-salt") % 100
};
$nullwrap = ($x) -> {
    RETURN IF(
        not ($x >= 0) or cast($x as String) in ("-inf", "inf", "nan"),
        null,
        $x
    )
};
$w = ($x) -> (IF($x in ("", "-", "0", "undefined"), null, $x));
$getPlatformFromUA = ($useragent) -> {
    $parsed = UserAgent::Parse($useragent);
    RETURN CASE
    WHEN $parsed.isRobot == true THEN "other"
    WHEN $parsed.isMobile == false and $parsed.isTV == false THEN "Desktop"
    WHEN $parsed.isMobile == true and $parsed.OSFamily == "Android" then "Android"
    WHEN $parsed.isMobile == true and $parsed.OSFamily == "iOS" then "iOS"
    ELSE "other"
    END
};
$getPlatformDSP = ($devicetype, $detaileddevicetype) -> {
    RETURN CASE
    WHEN $devicetype == 5 THEN "Desktop"
    WHEN $detaileddevicetype == "Android" then "Android"
    WHEN $detaileddevicetype == "iOS" then "iOS"
    ELSE "Other"
    END
};
$diffchars  = ($s) -> (ListLength(ListUniq(String::ToByteList($s))));
$getViewTime = ($vt30, $vt20, $vt10) -> {
    $result = CASE
    WHEN ($vt30 ?? 0) > 0 THEN $vt30
    WHEN ($vt20 ?? 0) > 0 THEN $vt20
    WHEN ($vt10 ?? 0) > 0 THEN $vt10
    ELSE 0.0
    END;
    RETURN CAST($result as Double) ?? 0.0
};
$getViewTimeCat = ($x) -> {
    RETURN CASE
    WHEN $x <= 1200 THEN "20m"
    WHEN 1200 < $x and $x <= 2400 THEN "40m"
    WHEN 2400 < $x and $x <= 3600 THEN "60m"
    WHEN 3600 < $x and $x <= 5400 THEN "90m"
    ELSE "90mplus"
    END
};
$getViewTimeWithCat = ($vt30, $vt20, $vt10, $cat) -> {
    $vt = $getViewTime($vt30, $vt20, $vt10);
    $cat_true = $getViewTimeCat($vt);
    RETURN IF($cat == $cat_true, $vt, 0)
};
$wrapMinusOne = ($x) -> (IF($x == -1, NULL, $x));
/*StreamPlayer
$parseSwitch = ($data) -> {
    $prev_auto = Yson::YPathBool($data, "/data/previous/auto");
    $next_auto = Yson::YPathBool($data, "/data/next/auto");
    $prev_height = Yson::YPathInt64($data, "/data/previous/height");
    $next_height = Yson::YPathInt64($data, "/data/next/height");
    return case
    when not ($prev_auto and not $next_auto) then null
    when $next_height > $prev_height then "up"
    when $next_height < $prev_height then "down"
    else null
    end
};
$getDataStruct = (
    $eventName,
    $eventType,
    $stalledReason,
    $stalledDuration,
    $data
) -> {
    RETURN AsStruct(
        $eventType == 'fatal' as isFatal,
        $stalledReason as stalledReason,
        $stalledDuration as stalledDuration,
        IF($eventName = "SetVideoTrack", $parseSwitch($data)) as switch
    )
};
StreamPlayer*/
/*VAS
$getDataStruct = ($data, $eventName, $eventType) -> {
    RETURN AsStruct(
        IF(
            $eventName == "VastTracking_thirdQuartile",
            $wrapMinusOne(Yson::ConvertToDouble(Yson::YPath($data, "/data/videoFramesInfo/droppedVideoFramesRatio"))),
            NULL
        ) as droppedVideoFramesRatio,
        $wrapMinusOne(Yson::ConvertToDouble(Yson::YPath($data, "/data/trackingEventDetails/timeToStart"))) as timeToStart,
        $wrapMinusOne(Yson::ConvertToDouble(Yson::YPath($data, "/data/videoPixelRatio"))) as videoPixelRatio,
        IF(
            $eventName == "VastTracking_thirdQuartile",
            $wrapMinusOne(Yson::ConvertToDouble(Yson::YPath($data, "/data/bufferingTimeRatio"))),
            NULL
        ) as bufferingTimeRatio,
        IF(
            $eventName in (
                "VastTracking_impression",
                "VastTracking_clickThrough",
                "VastTracking_mute",
                "VastTracking_unmute",
                "VastTracking_start",
                "VastTracking_firstQuartile",
                "VastTracking_midpoint",
                "VastTracking_thirdQuartile",
                "VastTracking_complete",
            ),
            Yson::ConvertToString(Yson::YPath($data, "/bundleName")),
            NULL
        ) as bundleName,
        IF(
            $eventType == "values",
            ListFilter(
                ListMap(
                    DictItems(Yson::ConvertToDict(Yson::YPath($data, "/tags"))),
                    ($x)->(AsTuple($x.0, Yson::ConvertToDouble($x.1)))
                ), ($x)->($x.0 != ("values_" || $eventName) and String::IsAscii($x.0))
            )
        ) as values_tags --sensors
    )
};
VAS*/
"""

sensors_fragment = """,
        DictItems(Yson::ConvertToStringDict(Yson::YPath($data, "/sensors"))) as sensors"""

source_stub = """
    SELECT
        Yson::ParseJson(@Data) as @Data, src.* without @Data
    FROM RANGE(
        `@basedir`, `@from`, `@to`
    ) as src
    WHERE
        @Service == $service
        and $checkTs(clientTimestamp)
        and String::IsAscii(@EventName)
        /*with_testids_join
        and $VERSIONS_Re(@Data)
        with_testids_join*/
        /*without_testids_join
        and IF(
            ListLength(
                $VERSIONS) > 0,
                ListHas($VERSIONS, @Version ?? "BAD"),
                True
            )
        without_testids_join*/
        and @UserAgent NOT LIKE "%AdsBot-Google%"
        and IF(
            @Service = 'VAS',
            @EventName != 'EnormousLogDetected',
            True
        )
"""

parse_player_state = """\
IF(@[eventName] == 'PlayerAlive', $parsePlayerState(
    $getPlayerStateFromData(Yson::SerializeJson(data))
), null) as state,
"""

adsdk_totalize = """
$totalizer = ($row)->{
    /*sensors
    $sensors = ListNotNull(ListMap($row.dataStruct.sensors, ($s)->( 
        "sensor_" || ($s.0 ?? "-") || "_" || ($s.1 ?? "-")
    )));
    sensors*/
    $result = ListNotNull([
        $row,
        IF($row.dataStruct.bundleName is not null, AddMember(
            RemoveMember($row, "EventName"),
            "EventName",
            $row.dataStruct.bundleName || "_" || $row.EventName
        ))
    ]);
    /*sensors
    $add_sensors = ListNotNull(ListFlatten(ListMap($sensors, ($s)->(ListMap(
        $result, ($r)->(AddMember(RemoveMember($r, "EventName"), "EventName", $s || "_" || $r.EventName))
    )))));
    $result = ListExtend($result, $add_sensors);
    sensors*/
    return $result
};
"""

map1 = """
$getTestid = ($test_buckets) -> {
    $testids_wb = ListMap(
        String::SplitToList($test_buckets, ";"),
        ($x)->(unwrap(String::SplitToList($x, ",")))
    );
    $testids_wb = ListMap($testids_wb, ($x)->(
        AsTuple($x[0], cast($x[2] as Int32))
    ));
    $testids = ListMap($testids_wb, ($x)->($x.0));
    $intersection = DictKeys(SetIntersection(
        ToSet($VERSIONS),
        ToSet($testids)
    ));
    RETURN IF(
        listlength($intersection) == 1,
        unwrap(ListFilter($testids_wb, ($x)->($x.0 in $intersection))[0]),
        null
    )
};
$map_events__ = (
    select
        @[vsid] as VSID,
        @[clientTimestamp] as ClientTimestamp,
        @[ADSDK]
        @[version] as Version,
        @[bucket],
        @[fake_bucket],
        $getEventName(@[eventName], @[eventType]) as EventName,
        @[userAgent] as UserAgent,
        /*VAS
        $getDataStruct(
            @[data], @[eventName], @[eventType]
        ) as dataStruct,
        VAS*/
        /*StreamPlayer
        $getDataStruct(
            eventName, eventType, labels_reason, data_stalledDuration, data
        ) as dataStruct,
        StreamPlayer*/
        @[service] as Service,
        @[parsePlayerState]
    from $source
);
/*sessions
$vsid_to_testid = (
    select
        VSID, max(testid) as Version, max(Bucket) as Bucket,
        $getBucket(
            max(puid)
            ?? max(icookie)
            ?? max(device_id)
            ?? max(yandexuid)
            ?? VSID
        ) as FakeBucket
    from (
        select
            vsid as VSID,
            $w(puid) as puid,
            $w(device_id) as device_id,
            $w(icookie) as icookie,
            $w(yandexuid) as yandexuid,
            $getTestid(test_buckets).0 as testid,
            $getTestid(test_buckets).1 as Bucket,
        from range(
            `cubes/video-strm`, `@[date_from]`, `@[date_to]`, `sessions`
        )
        where $getTestid(test_buckets) is not null
    )
    group by VSID
);
sessions*/
/*testids_self_join
$vsid_to_testid = (
    select
        VSID,
        max(testid) as Version,
        max(Bucket) as Bucket,
        $getBucket(
            max(puid)
            ?? max(device_id)
            ?? max(icookie)
            ?? max(yandexuid)
            ?? VSID
        ) as FakeBucket
    from (
        select
            vsid as VSID,
            $w(puid) as puid,
            $w(device_id) as device_id,
            $w(xYandexICookie) as icookie,
            $w(yandexuid) as yandexuid,
            $getTestid(@[testids_source]).0 as testid,
            $getTestid(@[testids_source]).1 as Bucket,
        from $source
        where $getTestid(@[testids_source]) is not null
    )
    group by VSID
);
testids_self_join*/
$map_events_ = (
    select
        /*without_testids_join
        m.Bucket @[fake_bucket_fallback] as Bucket,
        m.* without m.Bucket
        without_testids_join*/
        /*with_testids_join
        s.Bucket @[fake_bucket_fallback_s] as Bucket, s.Version as Version,
        m.* without m.Version, m.Bucket
        with_testids_join*/
    from $map_events__ as m
    /*with_testids_join
    inner join any $vsid_to_testid as s using (VSID)
    with_testids_join*/
    where
        Service == $service
        /*without_testids_join
        and ListHas(
            $VERSIONS,
            Version ?? "BAD"
        )
        without_testids_join*/
        @[VSIDNOTNULL]
);
@[adsdk_totalize]
$map_events = @[map_events];
-- @money
"""

map2 = """
$map_events_2 = (
    select
        @VSID,
        Version,
        UserAgent,
        Bucket,
        ClientTimestamp,
        /*StreamPlayer
        IF(EventName == "event_Stalled", 1, 0) AS Stalled,
        IF(EventName == "event_Stalled" and dataStruct.stalledReason == "Other", 1, 0) AS StalledOther,
        IF(EventName == "event_Stalled" and dataStruct.stalledReason == "Init", 1, 0) AS StalledInit,
        IF(EventName == "event_Stalled" and dataStruct.stalledReason == "SetSource", 1, 0) AS StalledSetSource,
        IF(EventName == "event_Stalled" and dataStruct.stalledReason == "Seek", 1, 0) AS StalledSeek,
        IF(EventName == "event_Stalled" and dataStruct.stalledReason == "AdEnd", 1, 0) AS StalledAdEnd,
        IF(EventName == "event_Stalled" and dataStruct.stalledReason == "Recover", 1, 0) AS StalledRecover,
        IF(EventName == "event_Stalled" and dataStruct.stalledReason == "VideoTrackChange", 1, 0) AS StalledVideoTrackChange,
        IF(dataStruct.switch = "up", 1, 0) AS switchUp,
        IF(dataStruct.switch = "down", 1, 0) AS switchDown,
        IF(dataStruct.isFatal == true, 1, 0) as Fatal,
        IF(EventName == "event_30SecHeartbeat", 30, 0) as viewTime,
        IF(EventName == "event_20SecWatched", 20, 0) as viewTime20,
        IF(EventName == "event_10SecWatched", 10, 0) as viewTime10,
        dataStruct.stalledDuration as stalledDuration,
        StreamPlayer*/
        /*VAS
        dataStruct.droppedVideoFramesRatio as droppedVideoFramesRatio,
        dataStruct.timeToStart as timeToStart,
        dataStruct.videoPixelRatio as videoPixelRatio,
        dataStruct.bufferingTimeRatio as bufferingTimeRatio,
        VAS*/
        @state
    from $map_events_
);
"""

main_metrics_stub = """CAST(SUM(Stalled) ?? 0 AS Double) as Stalled,
        CAST((SUM(Stalled) ?? 0) > 0 AS Double) as hasStalled,
        CAST(SUM(StalledOther) ?? 0 as Double) as StalledOther,
        CAST((SUM(StalledOther) ?? 0) > 0 as Double) as hasStalledOther,
        CAST(SUM(StalledInit) ?? 0 as Double) as StalledInit,
        CAST((SUM(StalledInit) ?? 0) > 0 as Double) as hasStalledInit,
        CAST(SUM(StalledSetSource) ?? 0 as Double) as StalledSetSource,
        CAST((SUM(StalledSetSource) ?? 0) > 0 as Double) as hasStalledSetSource,
        CAST(SUM(StalledSeek) ?? 0 as Double) as StalledSeek,
        CAST((SUM(StalledSeek) ?? 0) > 0 as Double) as hasStalledSeek,
        CAST(SUM(StalledAdEnd) ?? 0 as Double) as StalledAdEnd,
        CAST((SUM(StalledAdEnd) ?? 0) > 0 as Double) as hasStalledAdEnd,
        CAST(SUM(StalledRecover) ?? 0 as Double) as StalledRecover,
        CAST((SUM(StalledRecover) ?? 0) > 0 as Double) as hasStalledRecover,
        CAST(SUM(StalledVideoTrackChange) ?? 0 as Double) as StalledVideoTrackChange,
        CAST((SUM(StalledVideoTrackChange) ?? 0) > 0 as Double) as hasStalledVideoTrackChange,
        CAST((SUM(Fatal) ?? 0) as Double) as Fatal,
        CAST((SUM(Fatal) ?? 0) > 0 as Double) as hasFatal,
        (MAX(ClientTimestamp) - MIN(ClientTimestamp)) / 1000.0 as viewTime_tsdiff,
        $getViewTime(SUM(viewTime), MAX(viewTime20), MAX(viewTime10)) as viewTime,
        $getViewTimeWithCat(
            SUM(viewTime), MAX(viewTime20), MAX(viewTime10),
            "20m"
        ) as viewTimeCat_20m,
        $getViewTimeWithCat(
            SUM(viewTime), MAX(viewTime20), MAX(viewTime10),
            "40m"
        ) as viewTimeCat_40m,
        $getViewTimeWithCat(
            SUM(viewTime), MAX(viewTime20), MAX(viewTime10),
            "60m"
        ) as viewTimeCat_60m,
        $getViewTimeWithCat(
            SUM(viewTime), MAX(viewTime20), MAX(viewTime10),
            "90m"
        ) as viewTimeCat_90m,
        $getViewTimeWithCat(
            SUM(viewTime), MAX(viewTime20), MAX(viewTime10),
            "90mplus"
        ) as viewTimeCat_90mplus,
        cast(SUM(switchUp) ?? 0 as Double) as switchUp,
        cast(SUM(switchDown) ?? 0 as Double) as switchDown,
        CAST((SUM(stalledDuration) ?? 0) as Double) as stalledDuration,
        CAST((SUM(IF(StalledInit > 0, stalledDuration, 0)) ?? 0) as Double) as stalledInitDuration,
        CAST((SUM(IF(StalledOther > 0, stalledDuration, 0)) ?? 0) as Double) as stalledOtherDuration,
        CAST((SUM(IF(StalledSeek > 0, stalledDuration, 0)) ?? 0) as Double) as stalledSeekDuration,
        CAST((SUM(IF(StalledSetSource > 0, stalledDuration, 0)) ?? 0) as Double) as stalledSetSourceDuration,
        CAST((SUM(IF(StalledAdEnd > 0, stalledDuration, 0)) ?? 0) as Double) as stalledAdEndDuration,
        (CAST(SUM(stalledDuration) ?? 0 as Double) / $getViewTime(SUM(viewTime), MAX(viewTime20), MAX(viewTime10))) ?? 0.0 as stalledDurationShare,
        (
            ListSum($avglogProcessList(
                listsort(aggregate_list(state), ($x)->($x.`timestamp`))
            ).avglogs) / CAST(
                ListLength($avglogProcessList(
                listsort(aggregate_list(state), ($x)->($x.`timestamp`))
            ).avglogs) as Double
            )
        ) as avglogs,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_ld_stalled_time as p_ld_stalled_time,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_ld_stalled_count as p_ld_stalled_count,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_ld_watched_time as p_ld_watched_time,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_sd_stalled_time as p_sd_stalled_time,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_sd_stalled_count as p_sd_stalled_count,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_sd_watched_time as p_sd_watched_time,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_hd_stalled_time as p_hd_stalled_time,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_hd_stalled_count as p_hd_stalled_count,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_hd_watched_time as p_hd_watched_time,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_fhd_stalled_time as p_fhd_stalled_time,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_fhd_stalled_count as p_fhd_stalled_count,
        $avglogPRSkipZeroes($avglogProcessList(
            listsort(aggregate_list(state), ($x)->($x.`timestamp`))
        ).resolutions).p_fhd_watched_time as p_fhd_watched_time,
"""

adsdk_mm_stub = """
        SOME(timeToStart) as timeToStart,
        SOME(videoPixelRatio) as videoPixelRatio,
        SOME(droppedVideoFramesRatio) as droppedVideoFramesRatio,
        SOME(bufferingTimeRatio) as bufferingTimeRatio,
"""

vsid_view_stub = """IF($getViewTime(SUM(viewTime), MAX(viewTime20), MAX(viewTime10)) > 0, VSID, null) as Vsid_View,"""

reduce1 = """
$reduce_events = (
    select
        @VSID,
        @vsidView
        SOME(Bucket) as Bucket,
        Version,
        $getPlatformFromUA(MAX(UserAgent)) as Platform,
        @main_metrics
    from $map_events_2
    group by @VSID, Version
    having count(*) < 300000
);
$reduce_dynamic_events_1 = (
    select
        @VSID,
        SOME(Bucket) as Bucket,
        MAX(Version) as Version,
        $getPlatformFromUA(MAX(UserAgent)) as Platform,
        EventName,
        COUNT(*) as `count`
    from $map_events
    group by @VSID, EventName
);
$reduce_dynamic_events_2 = (
    select
        Platform,
        Version,
        Bucket,
        EventName,
        sum(`count`) as EventCount,
        CountDistinctEstimate(@VSID) as vsids
    from $reduce_dynamic_events_1
    group by Platform,
        Version,
        Bucket,
        EventName
);
"""

values_reduce = """

$map_dynamic_values_1 = (
    select
        Bucket,
        Version,
        $getPlatformFromUA(UserAgent) as Platform,
        dataStruct.values_tags as values_tags
    from $map_events
);
$map_dynamic_values_2 = (
    select
        Bucket,
        Version,
        Platform,
        values_tags.0 as tagName,
        values_tags.1 as tagValue
    from $map_dynamic_values_1
    flatten list by values_tags
);
$reduce_dynamic_values = (
    select
        Platform,
        Version,
        Bucket,
        tagName,
        count(*) as tagValueCount,
        PERCENTILE(tagValue, 0.50) as tagValue_p50,
        PERCENTILE(tagValue, 0.75) as tagValue_p75,
        PERCENTILE(tagValue, 0.90) as tagValue_p90,
        PERCENTILE(tagValue, 0.95) as tagValue_p95,
    from $map_dynamic_values_2
    group by Platform,
        Version,
        Bucket,
        tagName
);
$reduce_dynamic_values_2 = (
    select * from $reduce_dynamic_values
    where tagValue_p50 is not null
    and tagValue_p75 is not null
    and tagValue_p90 is not null
    and tagValue_p95 is not null
);
"""

reduce1_join = """
$reduce_events_joined = (
    select
        r.*,
        m.adsids as adsids,
        CAST(m.price AS Double) ?? 0.0 as price,
        CAST(m.impressions AS Double) ?? 0.0 as impressions,
        CAST(m.partner_price as Double) ?? 0.0 as partner_price,
        m.fraud_events ?? 0 as fraud_events,
        m.fraud_vsid as fraud_vsid,
        /*chtracking
        direct_events,
        chtracking*/
        @price_madness
    from $reduce_events as r
    left join $money_reduced as m
    using (@SID)
);
"""

events_for_metrics_main = """
        SUM(viewTime_tsdiff) as TVT_tsdiff,
        SUM(viewTime) as TVT,
        SUM(viewTimeCat_20m) as TVT_20m,
        SUM(viewTimeCat_40m) as TVT_40m,
        SUM(viewTimeCat_60m) as TVT_60m,
        SUM(viewTimeCat_90m) as TVT_90m,
        SUM(viewTimeCat_90mplus) as TVT_90mplus,
        SUM(switchUp) as switchUp,
        SUM(switchDown) as switchDown,
        SUM(stalledDuration) as TotalStalledDuration,
        SUM(stalledInitDuration) as stalledInitDuration,
        SUM(stalledOtherDuration) as stalledOtherDuration,
        SUM(stalledSeekDuration) as stalledSeekDuration,
        SUM(stalledSetSourceDuration) as stalledSetSourceDuration,
        SUM(stalledAdEndDuration) as stalledAdEndDuration,
        SUM(Stalled) as Stalled_total,
        SUM(hasStalled) as Stalled_vsids,
        SUM(StalledOther) as StalledOther_total,
        SUM(hasStalledOther) as StalledOther_vsids,
        SUM(StalledInit) as StalledInit_total,
        SUM(hasStalledInit) as StalledInit_vsids,
        SUM(StalledSetSource) as StalledSetSource_total,
        SUM(hasStalledSetSource) as StalledSetSource_vsids,
        SUM(StalledSeek) as StalledSeek_total,
        SUM(hasStalledSeek) as StalledSeek_vsids,
        SUM(StalledAdEnd) as StalledAdEnd_total,
        SUM(hasStalledAdEnd) as StalledAdEnd_vsids,
        SUM(StalledRecover) as StalledRecover_total,
        SUM(hasStalledRecover) as StalledRecover_vsids,
        SUM(StalledVideoTrackChange) as StalledVideoTrackChange_total,
        SUM(hasStalledVideoTrackChange) as StalledVideoTrackChange_vsids,
        SUM(Fatal) as Fatal_total,
        SUM(hasFatal) as Fatal_vsids,
        $nullwrap(percentile(avglogs, 0.25)) as avglog_p25,
        $nullwrap(percentile(avglogs, 0.50)) as avglog_p50,
        $nullwrap(percentile(avglogs, 0.75)) as avglog_p75,
        $nullwrap(percentile(avglogs, 0.95)) as avglog_p95,
        $nullwrap(percentile(avglogs, 0.99)) as avglog_p99,
        $nullwrap(SUM(p_fhd_watched_time)) as p_fhd_watched_time,
        $nullwrap(SUM(p_hd_watched_time)) as p_hd_watched_time,
        $nullwrap(SUM(p_sd_watched_time)) as p_sd_watched_time,
        $nullwrap(SUM(p_ld_watched_time)) as p_ld_watched_time,
        $nullwrap(SUM(p_fhd_stalled_time)) as p_fhd_stalled_time,
        $nullwrap(SUM(p_hd_stalled_time)) as p_hd_stalled_time,
        $nullwrap(SUM(p_sd_stalled_time)) as p_sd_stalled_time,
        $nullwrap(SUM(p_ld_stalled_time)) as p_ld_stalled_time,
        $nullwrap(SUM(p_fhd_stalled_count)) as p_fhd_stalled_count,
        $nullwrap(SUM(p_hd_stalled_count)) as p_hd_stalled_count,
        $nullwrap(SUM(p_sd_stalled_count)) as p_sd_stalled_count,
        $nullwrap(SUM(p_ld_stalled_count)) as p_ld_stalled_count,
        $nullwrap(percentile(p_ld_stalled_count, 0.95)) as p_ld_stalled_count_p95,
        $nullwrap(percentile(p_sd_stalled_count, 0.95)) as p_sd_stalled_count_p95,
        $nullwrap(percentile(p_hd_stalled_count, 0.95)) as p_hd_stalled_count_p95,
        $nullwrap(percentile(p_fhd_stalled_count, 0.95)) as p_fhd_stalled_count_p95,
        $nullwrap(percentile(p_ld_stalled_time, 0.95)) as p_ld_stalled_time_p95,
        $nullwrap(percentile(p_sd_stalled_time, 0.95)) as p_sd_stalled_time_p95,
        $nullwrap(percentile(p_hd_stalled_time, 0.95)) as p_hd_stalled_time_p95,
        $nullwrap(percentile(p_fhd_stalled_time, 0.95)) as p_fhd_stalled_time_p95,
"""

adsdk_percentiles_stub = """
        PERCENTILE(droppedVideoFramesRatio, 0.50) as droppedVideoFramesRatio_p50,
        PERCENTILE(droppedVideoFramesRatio, 0.75) as droppedVideoFramesRatio_p75,
        PERCENTILE(droppedVideoFramesRatio, 0.90) as droppedVideoFramesRatio_p90,
        PERCENTILE(droppedVideoFramesRatio, 0.95) as droppedVideoFramesRatio_p95,
        PERCENTILE(timeToStart, 0.50) as timeToStart_p50,
        PERCENTILE(videoPixelRatio, 0.50) as videoPixelRatio_p50,
        PERCENTILE(bufferingTimeRatio, 0.50) as bufferingTimeRatio_p50,
        PERCENTILE(timeToStart, 0.75) as timeToStart_p75,
        PERCENTILE(videoPixelRatio, 0.75) as videoPixelRatio_p75,
        PERCENTILE(bufferingTimeRatio, 0.75) as bufferingTimeRatio_p75,
        PERCENTILE(timeToStart, 0.90) as timeToStart_p90,
        PERCENTILE(videoPixelRatio, 0.90) as videoPixelRatio_p90,
        PERCENTILE(bufferingTimeRatio, 0.90) as bufferingTimeRatio_p90,
        PERCENTILE(timeToStart, 0.95) as timeToStart_p95,
        PERCENTILE(videoPixelRatio, 0.95) as videoPixelRatio_p95,
        PERCENTILE(bufferingTimeRatio, 0.95) as bufferingTimeRatio_p95,
"""

events_for_metrics_money = """
        SUM(adsids) as adsids,
        SUM(price) / 1000000.0 AS PriceRub,
        CountDistinctEstimate(fraud_vsid) as fraud_vsids,
        SUM(fraud_events) as fraud_events,
        SUM(partner_price) / 1000000.0 as PartnerPriceRub,
        SUM(price_preroll) / 1000000.0 AS price_preroll_Rub,
        SUM(price_midroll) / 1000000.0 AS price_midroll_Rub,
        SUM(price_fullscreen) / 1000000.0 AS price_fullscreen_Rub,
        SUM(price_in_app) / 1000000.0 AS price_in_app_Rub,
        SUM(price_inpage) / 1000000.0 AS price_inpage_Rub,
        SUM(price_interstitial) / 1000000.0 AS price_interstitial_Rub,
        SUM(price_pauseroll) / 1000000.0 AS price_pauseroll_Rub,
        SUM(price_postroll) / 1000000.0 AS price_postroll_Rub,
        SUM(price_overlay) / 1000000.0 AS price_overlay_Rub,
        SUM(price_motion) / 1000000.0 AS price_motion_Rub,
        SUM(price_interactive_viewer) / 1000000.0 as price_interactive_viewer_Rub,
        SUM(impressions) AS impressions,
        SUM(impressions_preroll) AS impressions_preroll,
        SUM(impressions_midroll) AS impressions_midroll,
        SUM(impressions_fullscreen) AS impressions_fullscreen,
        SUM(impressions_in_app) AS impressions_in_app,
        SUM(impressions_inpage) AS impressions_inpage,
        SUM(impressions_interstitial) AS impressions_interstitial,
        SUM(impressions_pauseroll) AS impressions_pauseroll,
        SUM(impressions_postroll) AS impressions_postroll,
        SUM(impressions_overlay) AS impressions_overlay,
        SUM(impressions_motion) AS impressions_motion,
        SUM(impressions_interactive_viewer) as impressions_interactive_viewer
"""

events_for_metrics = """
$events_for_metrics = (
    select
        @grouping_sel,
        COUNT(DISTINCT @sid) as Vsids,
        @vsidView
        @events_for_metrics_main
        @events_for_metrics_money
    from @reduce_source
    group by @grouping_gr
);
@postfix
select * from $events_for_metrics_postprocess;
select * from $reduce_dynamic_events_2;
/*VAS
select * from $reduce_dynamic_values_2;
VAS*/
/*chtracking
$direct_events_1 = (
    select
        Version,
        Bucket,
        Platform,
        @sid,
        direct_events.0 as EventName,
        direct_events.1 as EventCount
    from @reduce_source flatten list by direct_events
);
$reduce_direct_events = (
    select
        Version,
        Bucket,
        Platform,
        EventName,
        CountDistinctEstimate(@sid) as vsids,
        SUM(EventCount) as EventCount,
    from $direct_events_1
    group by Version, Bucket, Platform, EventName
);
select * from $reduce_direct_events;
chtracking*/
"""

postprocess = """
$events_for_metrics_postprocess = (
    select
        * @without
    from $events_for_metrics as r
    @joins
);
"""

tmp_tables_insertion_postfix = """
insert into `@tmp_table` with truncate
select * from @reduce_events;
"""

TITLE = "PCODE Fast Metrics | YQL"


def make_source(fields, fasts, dailies, args):
    """
    Parameters
    ----------
    fields : словать маппинга имён колонок
    fasts : быстрые события
    dailies : события
    args : аргументы из main
    Returns
    -------
    result : сгенерированный подзапрос для получения всех
    данных из все требуемых даблиц с событиями
    """
    result = (
        (
            'pragma yt.DefaultMemoryLimit = "@[memory_limit]";\n'
            'pragma yt.DefaultMemoryReserveFactor = "@[memory_reserve_factor]";\n'
            "$source = (\n"
        )
        .replace("@[memory_limit]", args.memory_limit)
        .replace("@[memory_reserve_factor]", args.memory_reserve_factor)
    )

    if args.mode in ("sessions", "testids_self_join", "testids_each_row"):
        source_stub_replaced = apply_replacements(
            source_stub,
            [
                ("/*with_testids_join", ""),
                ("with_testids_join*/", ""),
            ],
        )
    else:
        source_stub_replaced = apply_replacements(
            source_stub,
            [
                ("/*without_testids_join", ""),
                ("without_testids_join*/", ""),
            ],
        )

    if dailies:
        basedir = "/".join(dailies[0].split("/")[:-1])
        replacements = fields.copy()
        replacements.update({
            "@basedir": basedir,
            "@service": args.service,
            "@from": dailies[0].split("/")[-1],
            "@to": dailies[-1].split("/")[-1],
        })
        result += apply_replacements(
            source_stub_replaced,
            replacements,
        )
    if dailies and fasts:
        result += "\nUNION ALL\n"
    if fasts:
        basedir = "/".join(fasts[0].split("/")[:-1])
        replacements = fields.copy()
        replacements.update({
            "@basedir": basedir,
            "@service": args.service,
            "@from": fasts[0].split("/")[-1],
            "@to": fasts[-1].split("/")[-1],
        })
        result += apply_replacements(
            source_stub_replaced,
            replacements,
        )

    result += "\n);\n\n"
    return result


def make_map2(args):
    """
    Parameters
    ----------s
    args : TYPE
        DESCRIPTION.
    Returns
    -------
    транспорированный кусок n колонок(1 колонока - 1 event)
    """
    return apply_replacements(
        map2,
        {
            "@VSID": "VSID",
            "@state": "state,"
            if args.service in ("StreamPlayer", "AndroidPlayer")
            else "",
        },
    )


def make_reduce1(args):
    """
    Parameters
    ----------
    args : TYPE
        DESCRIPTION.
    Returns
    -------
    считаем события и деньги по VSID
    """
    if args.service in ("StreamPlayer", "AndroidPlayer"):
        main_metrics_ = main_metrics_stub
    elif args.service == "VAS":
        main_metrics_ = adsdk_mm_stub
    result = apply_replacements(
        (
            reduce1
            if args.service in ("StreamPlayer", "AndroidPlayer")
            else reduce1 + values_reduce
        ),
        {
            "@VSID": "VSID",
            "@2VSID": "VSID",
            "@main_metrics": main_metrics_,
            "@vsidView": "" if args.service == "VAS" else vsid_view_stub,
        },
    )
    if args.service != "VAS":
        separator = ",\n" + " " * 8
        price_madness = separator.join(
            "CAST(m.price_{x} as Double) ?? 0.0 as price_{x}".format(x=x) for x in AD_TYPES
        )
        price_madness += separator + separator.join(
            "CAST(m.impressions_{x} as Double) ?? 0.0 as impressions_{x}".format(x=x)
            for x in AD_TYPES
        )
        result += apply_replacements(
            reduce1_join,
            {
                "@price_madness": price_madness,
                "@SID": "VSID",
            },
        )
    return result


def make_events_for_metrics(args):
    sid = "VSID"
    aggregate_list_madness = [
        "Vsids",
    ]
    postprocess_ = apply_replacements(  # теперь постпроцессинг ничего не делает, но в будущем мб снова будет
        postprocess, {"@without": "", "@joins": ""}
    )
    grouping_gr = "Version, Bucket, Platform"
    grouping_sel = "Version, Bucket, Platform"
    if args.service in ("StreamPlayer", "AndroidPlayer"):
        aggregate_list_madness += [
            "PriceRub",
            "impressions",
            "PartnerPriceRub",
            "adsids",
        ]
        reduce_source = "$reduce_events_joined"
        events_for_metrics_main_ = apply_replacements(
            events_for_metrics_main, {"@sid": sid}
        )
        events_for_metrics_money_ = events_for_metrics_money + ","
        aggregate_list_madness += [
            "Vsids_View",
            "TVT",
            "TVT_tsdiff",
            "TVT_20m",
            "TVT_40m",
            "TVT_60m",
            "TVT_90m",
            "TVT_90mplus",
            "TotalStalledDuration",
            "Stalled_total",
            "Stalled_vsids",
            "switchUp",
            "switchDown",
        ]
        for type_ in ["Other", "Init", "SetSource", "Seek", "AdEnd"]:
            aggregate_list_madness.append("stalled{}Duration".format(type_))
        for type_ in [
            "Other",
            "Init",
            "SetSource",
            "Seek",
            "AdEnd",
            "Recover",
            "VideoTrackChange",
        ]:
            aggregate_list_madness.append("Stalled{}_total".format(type_))
            aggregate_list_madness.append("Stalled{}_vsids".format(type_))
        aggregate_list_madness += ["Fatal_total", "Fatal_vsids"]
    else:
        reduce_source = "$reduce_events"
        events_for_metrics_main_ = adsdk_percentiles_stub
        events_for_metrics_money_ = ''
        for metric in [
            "timeToStart",
            "videoPixelRatio",
            "bufferingTimeRatio",
            "droppedVideoFramesRatio",
        ]:
            for percentile in ["50", "75", "90", "95"]:
                aggregate_list_madness.append("{}_p{}".format(metric, percentile))
    aggregate_list_madness = map(
        lambda x: "aggregate_list(AsStruct(Bucket as Bucket, {x} as Value)) as {x}".format(
            x=x
        ),
        aggregate_list_madness,
    )
    aggregate_list_madness = (",\n" + " " * 8).join(aggregate_list_madness)
    if args.service == "VAS":
        vsidView = ""
    else:
        vsidView = "COUNT(DISTINCT Vsid_View) as Vsids_View,"
    return apply_replacements(
        events_for_metrics,
        {
            "@grouping_sel": grouping_sel,
            "@grouping_gr": grouping_gr,
            "@sid": sid,
            "@reduce_source": reduce_source,
            "@events_for_metrics_main": events_for_metrics_main_,
            "@events_for_metrics_money": events_for_metrics_money_,
            "@postfix": postprocess_,
            "@vsidView": vsidView,
            "@aggregate_list_madness": aggregate_list_madness,
        },
    )


def parse_date(s):
    if "T" in s:
        return datetime.datetime.strptime(s, dtformat).replace(tzinfo=moscow)
    return datetime.datetime.strptime(s, "%Y-%m-%d").replace(tzinfo=moscow)


def hist_to_counter(hist):
    return Counter({x["Position"]: int(x["Frequency"]) for x in hist["Bins"]})


def _get_result(table):
    table.fetch_full_data()
    colnames = table.column_names
    result = []
    for row in table.rows:
        result.append(dict(list(zip(colnames, row))))
    return result


def get_results(query):
    """
    Получить результаты из YQL запроса
    :param query:
    :return: list словарей
    """
    res = list(query.get_results())
    return [_get_result(x) for x in res]


def calc_ranksum(counter, value_to_rank):
    return sum(value_to_rank[k] * v for k, v in counter.items())


def calc_u(counter, value_to_rank):
    ranksum = calc_ranksum(counter, value_to_rank)
    n = sum(counter.values())
    return ranksum - (n * (n + 1) / 2)


def calc_vtr_and_ties(counter1, counter2):
    c_unified = counter1 + counter2
    sum_ = sum(c_unified.values())
    value_to_rank = {}
    srt = sorted(c_unified, reverse=True)
    ties = 0
    for k in srt:
        n = c_unified[k]
        high = sum_ + 1
        low = sum_ - n + 1
        rank = sum(range(low, high)) / n
        value_to_rank[k] = rank
        sum_ -= n
        ties += n ** 3 - n
    return value_to_rank, ties


def calc_ties(counter1, counter2):
    ties = 0
    for k in set(counter1) & set(counter2):
        t = counter1[k] + counter2[k]
        ties += t ** 2 - t
    return ties


def calc_sd(n1, n2, ties):
    nBig = n1 + n2
    return math.sqrt((n1 * n2) / (nBig * (nBig - 1)) * (nBig ** 3 - nBig - ties) / 12.0)


def mw_from_counter(counter1, counter2, alternative="less"):
    value_to_rank, ties = calc_vtr_and_ties(counter1, counter2)
    n1 = sum(counter1.values())
    n2 = sum(counter2.values())
    u2 = calc_u(counter1, value_to_rank)  # dunno why it works like that
    u1 = n1 * n2 - u2
    sd = calc_sd(n1, n2, ties)
    meanrank = n1 * n2 / 2 + 0.5
    if alternative == "less":
        bigu = u1
    elif alternative == "greater":
        bigu = u2
    elif alternative == "two-sided":
        bigu = max(u1, u2)
    try:
        z = (bigu - meanrank) / sd
    except ZeroDivisionError:
        return (0, 1)
    p = scipy.stats.distributions.norm.sf(z)
    return (u2, p)


def try_div(num, den):
    num = num or 0
    den = den or 0
    try:
        return num / float(den)
    except ZeroDivisionError:
        return 0


def sget(obj, value):
    try:
        return obj[value]
    except KeyError:
        raise
    except TypeError:
        return getattr(obj, value, 0)


def get_comparison(row, metric_field, share_field=None, percentile_correction=False):
    """
    Производит работу с бакетами и получает единое значение для каждой метрки
    От списка словарей переходим к списку чисел
    Здесь мы фактически ушли от бакетов.
    :param row:
    :param metric_field:
    :param share_field:
    :param percentile_correction:
    :return: tuple (значения по  корзинам; одно число - свертка по корзинам)
    row_bucketed = [1, 4, 7, 0, 5, ..] список из 100 значений метрики по бакетам
    row_val = одно значение, либо sum(row_bucketed), либо sum(row_bucketed) / 100.0,
    либо sum(row_bucketed для metric_field) / sum(row_bucketed для share_field)
    """
    if not share_field:
        row_bucketed = [sget(x, "Value") or 0 for x in row[metric_field]]
        if not row_bucketed:
            row_bucketed = [0] * 100
        row_val = sum(row_bucketed) or 0
    else:
        numerator = [
            sget(x, "Value") or 0
            for x in sorted(row[metric_field], key=lambda x: sget(x, "Bucket"))
        ]
        if not numerator:
            numerator = [0] * 100
        denominator = [
            sget(x, "Value") or 0
            for x in sorted(row[share_field], key=lambda x: sget(x, "Bucket"))
        ]
        if not denominator:
            denominator = [0] * 100
        row_bucketed = [try_div(x[0], x[1]) for x in zip(numerator, denominator)]
        row_val = try_div(sum(numerator), sum(denominator)) or 0
    if percentile_correction:
        row_val /= 100.0
    if len(row_bucketed) < 100:
        row_bucketed += [0] * (100 - len(row_bucketed))
    return row_bucketed, row_val


def mw_wrapper(*args, **kwargs):
    try:
        return scipy.stats.mannwhitneyu(*args, **kwargs)
    except ValueError:
        return (0, 1)


def calc_metric(
    cnt,
    exps,
    metric_field,
    share_field=None,
    metric_name=None,
    positive_direction=None,
    percentile_correction=False,
):
    """
    Расчет pvalue
    :param cnt: словарь с метриками для контроля
    :param exps: список словарей с экспериментами
    :param metric_field: имя метрики(ключ в словарях)
    :param share_field: имя поля со знаменателем( если он будет нужен)
    :param metric_name: имя для отображения в html
    :param positive_direction: если не задано, красим в желтый;вниз: зеленый, красный; вверх: наоборот
    :param percentile_correction: флаг, для усреденения персентиля по бакетам(делим всегда на 100 - количество бакетов)
    :return: словарь по которому потому может генериться html
    """
    control_buckets, control_value = get_comparison(
        cnt,
        metric_field,
        share_field,
        percentile_correction=percentile_correction,
    )
    row = {
        "value": control_value,
        "version_control": cnt["Version"],
        "platform": cnt["Platform"],
        "exps": [],
    }
    row["metric"] = metric_name if metric_name else metric_field
    print("calculating {}".format(row["metric"]))
    row["positive_direction"] = positive_direction

    row["std"] = np.std(control_buckets)
    row["range"] = max(control_buckets) - min(control_buckets)
    for exp in exps:
        exp_buckets, exp_value = get_comparison(
            exp,
            metric_field,
            share_field,
            percentile_correction=percentile_correction,
        )
        exp_row = {
            "value": exp_value or 0,
            "version_experiment": exp["Version"],
        }
        exp_row["diff"] = (exp_row["value"] or 0) - (row["value"] or 0)
        try:
            exp_row["perc_diff"] = exp_row["diff"] / float(row["value"])
        except ZeroDivisionError:
            exp_row["perc_diff"] = 1

        exp_row["pvalue_less"] = mw_wrapper(
            control_buckets, exp_buckets, alternative="less"
        )[1]
        exp_row["pvalue_greater"] = mw_wrapper(
            control_buckets, exp_buckets, alternative="greater"
        )[1]

        exp_row["pvalue"] = min(exp_row["pvalue_less"], exp_row["pvalue_greater"])
        if exp_row["pvalue_less"] < exp_row["pvalue_greater"]:
            exp_row["direction"] = "up"
        elif exp_row["pvalue_greater"] < exp_row["pvalue_less"]:
            exp_row["direction"] = "down"
        else:
            exp_row["direction"] = "none"

        exp_row["std"] = np.std(exp_buckets)
        exp_row["range"] = max(exp_buckets) - min(exp_buckets)
        row["exps"].append(exp_row)

    return row


class MetricFormatter(object):
    def __init__(self, control=None, experiments=None, task=None, additional_data=None):
        self.cnt = control
        self.exps = sorted(experiments.split(","))
        self.task = task
        self.additional_data = additional_data or {}

    def add_metrics_groups(self, metrics_groups, replace=False):
        dct = {
            "main_metrics": metrics_groups[0],
            "event_metrics": metrics_groups[1],
            "quality_metrics": metrics_groups[2],
            "values_tags_metrics": metrics_groups[3],
        }
        for k in sorted(dct):
            if replace:
                setattr(self, k, dct[k])
            else:
                if not getattr(self, k, None):
                    setattr(self, k, [])
                getattr(self, k).extend(dct[k])

    @staticmethod
    def format_diff(diff):
        if not isinstance(diff, (int, float, long)):
            return "–"
        return "{sign}{diff:.04f}".format(
            sign="+" if diff > 0 else "", diff=diff
        ).replace("-", "–")

    @staticmethod
    def _format_pvalue(perc_diff, pvalue, positive_direction="unknown", format_="ST"):
        real_direction = "up" if (perc_diff is None or perc_diff > 0) else "down"
        if pvalue >= 0.01 or (perc_diff is not None and perc_diff == 0):
            color = "gray"
        elif positive_direction == "unknown":
            color = "yellow"
        elif positive_direction == real_direction:
            color = "green"
        else:
            color = "red"
        if format_ == "ST":
            return "!!({color}){pvalue:.04f}!!".format(color=color, pvalue=pvalue)
        elif format_ == "raw_color":
            return color

    def format_pvalue(self, metric, positive_direction="unknown"):
        if "pvalue" not in metric:
            return "–"
        return self._format_pvalue(
            metric["perc_diff"],
            metric["pvalue"],
            positive_direction=positive_direction,
        )

    @staticmethod
    def get_abstract_pvalue(metric, positive_direction="unknown"):
        if "pvalue" not in metric:
            return "-"
        perc_diff = metric["perc_diff"]
        pvalue = metric["pvalue"]
        real_direction = metric["direction"]
        result = {
            "type": "pvalue",
            "color": "gray",
            "value": pvalue,
            "direction": real_direction,
        }
        if pvalue >= 0.01 or (perc_diff is not None and perc_diff == 0):
            return result
        if positive_direction == "unknown":
            result["color"] = "yellow"
        elif positive_direction == real_direction:
            result["color"] = "green"
        elif real_direction == "none":
            result["color"] = "gray"
        else:
            result["color"] = "red"
        return result

    @staticmethod
    def st_format_elem(elem):
        if isinstance(elem, dict):
            return "!!({color}){pvalue:.04f}!!".format(
                color=elem["color"], pvalue=elem["value"]
            )
        return format(elem)

    def st_format_row(self, row):
        return "|| " + " | ".join(map(self.st_format_elem, row)) + " ||"

    @staticmethod
    def html_format_elem(elem, tag="td"):
        if isinstance(elem, dict):
            return '<{tag} class="{color}">{pvalue:.04f} ({direction})</{tag}>'.format(
                color=elem["color"],
                pvalue=elem["value"],
                tag=tag,
                direction=elem["direction"],
            )
        if isinstance(elem, bytes):
            elem = elem.decode("utf8", errors="replace")
        return "<{tag}>{elem}</{tag}>".format(elem=elem, tag=tag)

    def html_format_row(self, row, header=False):
        if not header:
            formatted_row = [self.html_format_elem(row[0], tag="th")] + list(
                map(self.html_format_elem, row[1:])
            )
        else:
            formatted_row = list(map(lambda x: self.html_format_elem(x, tag="th"), row))
        return "<tr>" + "".join(formatted_row) + "</tr>"

    def st_format_table(self, rows):
        rows = ["#|"] + [self.st_format_row(row) for row in rows] + ["|#"]
        return "\n".join(rows)

    def html_format_table(self, rows):
        header, rows = rows[0], rows[1:]
        return (
            "<table>\n"
            + "<thead>\n"
            + self.html_format_row(header, header=True)
            + "</thead>\n<tbody>\n"
            + "\n".join(map(self.html_format_row, rows))
            + "\n</tbody>\n</table>"
        )

    def get_add_data_by_row(self, dct):
        res = "std = " + str(dct["std"]) + " |range = " + str(dct["range"])
        return res

    def generate_metric_table(self, metrics, exp_id=None):
        result = []
        # 1. строим заголовок
        row = ["Metric", "{} value".format(self.cnt)]  # заголовок
        if self.additional_data["args"].stddev:
            row.append("{} add_data".format(self.cnt))
        for exp in self.exps if exp_id is None else [self.exps[exp_id]]:
            row += [
                "{} value".format(exp),
                "{} diff".format(exp),
                "{} Percent diff".format(exp),
                "{} pValue".format(exp),
            ]
            if self.additional_data["args"].stddev:
                row.append("{} add_data".format(exp))
        result.append(row)
        # 2. формируем содержимое таблицы
        for metric in metrics:
            row = [metric["metric"], metric["value"]]
            if self.additional_data["args"].stddev:
                row.append(self.get_add_data_by_row(metric))
            if exp_id is not None and exp_id >= len(metric["exps"]):
                sys.stderr.write("unable to calculate {}\n".format(metric["metric"]))
                continue
            for exp in metric["exps"] if exp_id is None else [metric["exps"][exp_id]]:
                row += [
                    exp["value"],
                    self.format_diff(exp["diff"]),
                    self.format_diff(exp["perc_diff"]),
                    self.get_abstract_pvalue(
                        exp,
                        positive_direction=metric.get("positive_direction")
                        or "unknown",
                    ),
                ]
                if self.additional_data["args"].stddev:
                    row.append(self.get_add_data_by_row(exp))
            result.append(row)
        return result

    def generate_st_comment(self):
        ad = self.additional_data
        result = [
            "**Запуск стартовал**: {}".format(ad.get("start_ts")),
            "**От**: {}".format(ad.get("from_")),
            "**До**: {}".format(ad.get("to_")),
            "**Контроль**: {}".format(self.cnt),
            "**Эксперименты**: {}".format(",".join(map(str, self.exps))),
            "<{{Строчка запуска для дебага\n%%{}%%\n}}>".format(" ".join(sys.argv)),
        ]
        if ad.get("share_urls"):
            result.append(
                "**Ссылки на запросы**: {}".format(", ".join(ad["share_urls"]))
            )
        result.append("\nМетрики см. в html-файлах ниже.")
        return "\n".join(result)

    def _get_platform_obj(self, platform):
        if platform in self.platform_dict:
            return self.platform_dict[platform]
        self.platform_dict[platform] = {
            "slice_values": {"platform": platform},
            "metrics": [],
        }
        return self.platform_dict[platform]

    def _wrap_exp_data(self, exp, metric):
        return {
            "version_experiment": exp["version_experiment"],
            "value_experiment": exp["value"],
            "diff_abs": exp["diff"],
            "diff_perc": exp["perc_diff"],
            "pvalue": exp["pvalue"],
            "color": self._format_pvalue(
                exp["perc_diff"],
                exp["pvalue"],
                metric["positive_direction"],
                format_="raw_color",
            ),
            "direction": exp["direction"],
            "stddev": exp["std"],
            "range": exp["range"],
        }

    def _json_process_metric(self, metric, mg):
        po = self._get_platform_obj(metric["platform"])
        metric_json = {
            "metric_group": mg,
            "metric_name": metric["metric"],
            "value_control": metric["value"],
            "positive_direction": metric["positive_direction"],
            "exp_data": [self._wrap_exp_data(x, metric) for x in metric["exps"]],
        }
        po["metrics"].append(metric_json)

    def generate_json(self):
        self.platform_dict = {}

        result = {
            "start_ts": int(
                datetime.datetime.strptime(
                    self.additional_data["from_"], "%Y-%m-%dT%H:%M:%S"
                ).strftime("%s")
                + "000"
            ),
            "end_ts": int(
                datetime.datetime.strptime(
                    self.additional_data["to_"], "%Y-%m-%dT%H:%M:%S"
                ).strftime("%s")
                + "000"
            ),
            "service": self.additional_data["args"].service,
            "version_control": self.cnt,
            "versions_experiments": self.exps,
        }
        for mg in (
            "main_metrics",
            "event_metrics",
            "quality_metrics",
            "values_tags_metrics",
        ):
            metrics = getattr(self, mg)
            for metric in metrics:
                self._json_process_metric(metric, mg)
        result["slices"] = sorted(
            self.platform_dict.values(),
            key=lambda x: x["slice_values"]["platform"],
        )
        return result

    @staticmethod
    def is_regular_event(event_name):
        return (
            not event_name.startswith("warning_")
            and not event_name.startswith("error_")
            and not event_name.startswith("fatal_")
        )

    def generate_html(self, exp=None):
        result = [
            html_stub.format(
                start_ts=datetime.datetime.strptime(
                    self.additional_data["from_"], "%Y-%m-%dT%H:%M:%S"
                ).strftime("%s")
                + "000",
                end_ts=datetime.datetime.strptime(
                    self.additional_data["to_"], "%Y-%m-%dT%H:%M:%S"
                ).strftime("%s")
                + "000",
                project=self.additional_data["args"].service,
            )
        ]
        result.append(
            "<h1>Main metrics ({})</h1>".format(self.additional_data["platform"])
        )
        result.append(
            self.html_format_table(
                self.generate_metric_table(self.main_metrics, exp_id=exp)
            )
        )
        if self.quality_metrics:
            result.append("<h1>Quality metrics</h1>")
            result.append(
                self.html_format_table(
                    self.generate_metric_table(self.quality_metrics, exp_id=exp)
                )
            )
        result.append("<h1>Event metrics</h1>")
        result.append(
            self.html_format_table(
                self.generate_metric_table(
                    [x for x in self.event_metrics if self.is_regular_event(x["metric"])],
                    exp_id=exp,
                )
            )
        )
        if self.values_tags_metrics:
            result.append("<h1>Tags percentile metrics</h1>")
            result.append(
                self.html_format_table(
                    self.generate_metric_table(self.values_tags_metrics, exp_id=exp)
                )
            )
        result.append("<h1>Warning metrics</h1>")
        result.append(
            self.html_format_table(
                self.generate_metric_table(
                    [x for x in self.event_metrics if "warning_" in x["metric"]],
                    exp_id=exp,
                )
            )
        )
        result.append("<h1>Error metrics</h1>")
        result.append(
            self.html_format_table(
                self.generate_metric_table(
                    [x for x in self.event_metrics if "error_" in x["metric"]],
                    exp_id=exp,
                )
            )
        )
        result.append("<h1>Fatal metrics</h1>")
        result.append(
            self.html_format_table(
                self.generate_metric_table(
                    [x for x in self.event_metrics if "fatal_" in x["metric"]],
                    exp_id=exp,
                )
            )
        )
        result.append("<h1>Tracking metrics</h1>")
        result.append(
            self.html_format_table(
                self.generate_metric_table(
                    [x for x in self.event_metrics if "DirectTracking_" in x["metric"]],
                    exp_id=exp,
                )
            )
        )
        result.append("</body></html>")
        return "\n".join(result)

    @staticmethod
    def _post_comment(text, task, attachments=None):
        json_ = {"text": text}
        if attachments:
            json_["attachmentIds"] = attachments
        kwargs = dict(headers=headers, json=json_, verify=False)
        req = requests.post("{}/issues/{}/comments".format(api, task), **kwargs)
        return req

    @staticmethod
    def _st_upload_file(text, task, filename=None):
        if not filename:
            filename = "{}-{}.html".format(task, datetime.datetime.now().strftime("%s"))
        headers_ = headers.copy()
        headers_.pop("Content-Type")
        with codecs.open("tmpfile", "w", "utf8") as f:
            f.write(text)
        files = {"file": open("tmpfile", "rb")}
        req = requests.post(
            "{api}/attachments?filename={filename}".format(api=api, filename=filename),
            files=files,
            headers=headers_,
            verify=False,
        )
        os.remove("tmpfile")
        return req

    def st_upload_file(self, content, filename):
        req = self._st_upload_file(content, self.task, filename=filename)
        try:
            id_ = req.json()["id"]
        except:
            raise Exception(
                "unsuccessful query: {} {}".format(req.status_code, req.text)
            )
        return id_

    def generate_comment_and_attachments(self):
        comment = self.generate_st_comment()
        attachments = []
        output_files = []
        output_type = self.additional_data["args"].output_type
        if output_type == "html":
            for i, exp in enumerate(self.exps):
                html = self.generate_html(exp=i)
                output_files.append(html)
                exp_id = exp.split("-")[-1]
                filename = "{}_{}_{}.html".format(
                    exp_id, self.task, self.additional_data["platform"]
                )
                id_ = self.st_upload_file(html, filename)
                attachments.append(id_)
        elif output_type == "json":
            json_ = self.generate_json()
            json_dumps = json.dumps(json_, indent=2)
            filename = "metrics_{}.json".format(self.task)
            id_ = self.st_upload_file(json_dumps, filename)
            attachments.append(id_)
            output_files.append(json_dumps)
        return (comment, attachments, output_files)

    def post_comment(self, data=None):
        if not data:
            comment, attachments, output_files = self.generate_comment_and_attachments()
        else:
            comment, attachments, output_files = data
        req = self._post_comment(comment, self.task, attachments=attachments)
        return req, comment, output_files


def calc_metrics(cnt, exps, events, values_tags, additional_data):
    """
    Обсчет pvalue
    :param cnt: контрольная группа - словарь
    :param exps: эксперименты - список словарей
    :param events: список динамических метрик для обсчета
    :param values_tags: список динамических метрик AdSDK по перцентилям для обсчета
    :param additional_data: доп данные(словарь)
    :return: 3 списка(main,event, quality) - в каждом списке объект посчитанной метрики
    """
    args = additional_data["args"]
    main_metrics = []
    quality_metrics = []
    # 1. Считаем общее количество VSIDs
    main_metrics.append(calc_metric(cnt, exps, "Vsids", None))

    if args.service in ("StreamPlayer", "AndroidPlayer"):
        main_metrics.append(calc_metric(cnt, exps, "Vsids_View", None))
        main_metrics.append(
            calc_metric(cnt, exps, "TVT", None, positive_direction="up")
        )
        main_metrics.append(
            calc_metric(cnt, exps, "TVT_tsdiff", None, positive_direction="up")
        )
        for vtm in ["TVT_20m", "TVT_40m", "TVT_60m", "TVT_90m", "TVT_90mplus"]:
            main_metrics.append(
                calc_metric(
                    cnt,
                    exps,
                    vtm,
                    None,
                    positive_direction="up",
                    metric_name={
                        "TVT_20m": "TVT 0–20m",
                        "TVT_40m": "TVT 21–40m",
                        "TVT_60m": "TVT 41–60m",
                        "TVT_90m": "TVT 61–90m",
                        "TVT_90mplus": "TVT 91m and up",
                    }[vtm],
                )
            )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "TVT",
                "Vsids",
                metric_name="TVT per vsid",
                positive_direction="up",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "TVT",
                "Vsids_View",
                metric_name="TVT per vsid_view",
                positive_direction="up",
            )
        )
        main_metrics.append(
            calc_metric(cnt, exps, "Stalled_total", None, positive_direction="down")
        )
        main_metrics.append(
            calc_metric(cnt, exps, "Stalled_vsids", None, positive_direction="down")
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "TotalStalledDuration",
                None,
                positive_direction="down",
            )
        )
        for type_ in ["Other", "Init", "SetSource", "Seek", "AdEnd"]:
            main_metrics.append(
                calc_metric(
                    cnt,
                    exps,
                    "stalled{}Duration".format(type_),
                    None,
                    positive_direction="down",
                )
            )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "TotalStalledDuration",
                "Vsids",
                metric_name="Stalled duration per vsid",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "TotalStalledDuration",
                "TVT",
                metric_name="Stalled duration share",
                positive_direction="down",
            )
        )
        for type_ in [
            "Other",
            "Init",
            "SetSource",
            "Seek",
            "AdEnd",
            "Recover",
            "VideoTrackChange",
        ]:
            main_metrics.append(
                calc_metric(
                    cnt,
                    exps,
                    "Stalled{}_total".format(type_),
                    None,
                    positive_direction="down",
                )
            )
            main_metrics.append(
                calc_metric(
                    cnt,
                    exps,
                    "Stalled{}_vsids".format(type_),
                    None,
                    positive_direction="down",
                )
            )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "Stalled_total",
                "Vsids",
                metric_name="Stalled per vsid",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "Stalled_total",
                "Vsids_View",
                metric_name="Stalled per vsid_view",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "Stalled_vsids",
                "Vsids",
                metric_name="Stalled vsid share",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "Stalled_vsids",
                "Vsids_View",
                metric_name="Stalled vsid_view share",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "StalledOther_total",
                "Vsids",
                metric_name="StalledOther per vsid",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "StalledOther_total",
                "Vsids_View",
                metric_name="StalledOther per vsid",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "StalledOther_vsids",
                "Vsids",
                metric_name="StalledOther vsid share",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "StalledOther_vsids",
                "Vsids_View",
                metric_name="StalledOther vsid_view share",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(cnt, exps, "Fatal_total", None, positive_direction="down")
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "Fatal_total",
                "Vsids",
                metric_name="Fatal per vsid",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "Fatal_vsids",
                "Vsids",
                metric_name="Fatal vsid share",
                positive_direction="down",
            )
        )
        for switch in ("Up", "Down"):
            main_metrics.append(
                calc_metric(
                    cnt,
                    exps,
                    "switch{}".format(switch),
                    None,
                    positive_direction="down",
                )
            )
            main_metrics.append(
                calc_metric(
                    cnt,
                    exps,
                    "switch{}".format(switch),
                    "Vsids",
                    metric_name="switch{} per vsid".format(switch),
                    positive_direction="down",
                )
            )
        for quality in ["fhd", "hd", "sd", "ld"]:
            for metric in ["watched_time", "stalled_time", "stalled_count"]:
                quality_metrics.append(
                    calc_metric(
                        cnt,
                        exps,
                        "p_{}_{}".format(quality, metric),
                        None,
                        "{} {}".format(quality.upper(), metric.replace("_", " ")),
                        positive_direction="down" if "stalled" in metric else None,
                    )
                )
            for metric in ["stalled_time", "stalled_count"]:
                quality_metrics.append(
                    calc_metric(
                        cnt,
                        exps,
                        "p_{}_{}_p95".format(quality, metric),
                        None,
                        "{} {} (95th percentile)".format(
                            quality.upper(), metric.replace("_", " ")
                        ),
                        percentile_correction=True,
                        positive_direction="down" if "stalled" in metric else None,
                    )
                )
    else:
        for metric in [
            "timeToStart",
            "videoPixelRatio",
            "bufferingTimeRatio",
            "droppedVideoFramesRatios",
        ]:
            for percentile in ["50", "75", "90", "95"]:
                main_metrics.append(
                    calc_metric(
                        cnt,
                        exps,
                        "{}_p{}".format(metric, percentile),
                        None,
                        metric_name="{}_p{}".format(metric, percentile),
                        percentile_correction=True,
                    )
                )
    if args.service != "VAS":
        main_metrics.append(
            calc_metric(cnt, exps, "adsids", None, metric_name="total adsids")
        )
        main_metrics.append(
            calc_metric(cnt, exps, "adsids", "Vsids", metric_name="adsids per (v)sid")
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "PriceRub",
                None,
                metric_name="Price in rubles",
                positive_direction="up",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "impressions",
                None,
                metric_name="Ad impressions from dsp-logs",
                positive_direction="up",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "PriceRub",
                "Vsids",
                metric_name="Price per vsid in rubles",
                positive_direction="up",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "PartnerPriceRub",
                None,
                metric_name="Partner Price in rubles",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "PartnerPriceRub",
                "Vsids",
                metric_name="Partner Price per vsid in rubles",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "fraud_events",
                None,
                metric_name="Total fraud events",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "fraud_vsids",
                None,
                metric_name="Vsids with fraud events",
                positive_direction="down",
            )
        )
        main_metrics.append(
            calc_metric(
                cnt,
                exps,
                "fraud_events",
                "Vsids",
                metric_name="Fraud events per vsid",
                positive_direction="down",
            )
        )

    event_metrics = []
    for event in events:
        positive_direction = events_positive_directions.get(event)
        use_vsid_view = event in PER_VSID_VIEW and args.service in (
            "StreamPlayer",
            "AndroidPlayer",
        )
        denominator = "Vsids" if not use_vsid_view else "Vsids_View"
        per = "vsid" if not use_vsid_view else "vsid_view"
        try:
            event_metrics.append(
                calc_metric(
                    cnt,
                    exps,
                    "{}_total".format(colname(event)),
                    None,
                    metric_name=event,
                    positive_direction=positive_direction,
                )
            )
            event_metrics.append(
                calc_metric(
                    cnt,
                    exps,
                    "{}_total".format(colname(event)),
                    denominator,
                    metric_name="{} per {}".format(event, per),
                    positive_direction=positive_direction,
                )
            )
            event_metrics.append(
                calc_metric(
                    cnt,
                    exps,
                    "{}_vsids".format(colname(event)),
                    None,
                    metric_name="{} (vsids)".format(event),
                    positive_direction=positive_direction,
                )
            )
            event_metrics.append(
                calc_metric(
                    cnt,
                    exps,
                    "{}_vsids".format(colname(event)),
                    denominator,
                    metric_name="{} ({} share)".format(event, per),
                    positive_direction=positive_direction,
                )
            )
        except Exception as e:
            sys.stderr.write(traceback.format_exc(e) + "\n\n")
    values_tags_metrics = []
    for metric in values_tags:
        values_tags_metrics.append(
            calc_metric(
                cnt,
                exps,
                "tagValue_{}_count".format(metric),
                None,
                metric_name="{}_count".format(metric),
            )
        )
        if metric.startswith("sensor_"):
            continue
        for percentile in ["50", "75", "90", "95"]:
            values_tags_metrics.append(
                calc_metric(
                    cnt,
                    exps,
                    "tagValue_{}_p{}".format(metric, percentile),
                    None,
                    metric_name="{}_p{}".format(metric, percentile),
                    percentile_correction=True,
                )
            )
    return main_metrics, event_metrics, quality_metrics, values_tags_metrics


def wrap_buckets(
    raw_results, raw_event_results, raw_values_results, raw_tracking_results
):
    """
    Приводим результаты к более удобному виду
    :param raw_results: главные метрики
    :param raw_event_results: динамические событийные метрики
    :param raw_values_results: метрики AdSDK по перцентилям tagValues
    :return: по версии платформы и метрике имеем готовый список значений(like pivot)
    """
    def u(s):
        if isinstance(s, bytes):
            return s.decode("utf8", errors="replace")
        return s
    result = {}
    for rec in raw_results:
        if not rec["Version"] or not rec["Platform"]:
            continue
        if (rec["Version"], rec["Platform"]) not in result:
            result[(rec["Version"], rec["Platform"])] = defaultdict(list)
            result[(rec["Version"], rec["Platform"])]["Version"] = rec["Version"]
            result[(rec["Version"], rec["Platform"])]["Platform"] = rec["Platform"]
        for k in rec.keys():
            if isinstance(rec[k], (int, float, long)):
                result[(rec["Version"], rec["Platform"])][k].append(
                    {"Bucket": rec["Bucket"], "Value": rec[k]}
                )
    events = set()
    raw_tracking_results = raw_tracking_results or []
    for rec in raw_tracking_results:
        rec["EventName"] = "DirectTracking_{}".format(u(rec["EventName"]))
    for rec in raw_event_results + raw_tracking_results:
        if not rec["Version"] or not rec["Platform"]:
            continue
        if (rec["Version"], rec["Platform"]) not in result:
            continue
        events.add(u(rec["EventName"]))
        result[(rec["Version"], rec["Platform"])][
            "{}_total".format(u(rec["EventName"]))
        ].append({"Bucket": rec["Bucket"], "Value": rec["EventCount"]})
        result[(rec["Version"], rec["Platform"])][
            "{}_vsids".format(u(rec["EventName"]))
        ].append({"Bucket": rec["Bucket"], "Value": rec["vsids"]})
    # values нужны только для AdSDK пока, для плеера будет пусто
    values_tags = set()
    for rec in raw_values_results or []:
        if not rec["Version"] or not rec["Platform"]:
            continue
        if (rec["Version"], rec["Platform"]) not in result:
            continue
        values_tags.add(u(rec["tagName"]))
        result[(rec["Version"], rec["Platform"])][
            "tagValue_{}_count".format(u(rec["tagName"]))
        ].append({"Bucket": rec["Bucket"], "Value": rec["tagValueCount"]})
        for percentile in ["50", "75", "90", "95"]:
            result[(rec["Version"], rec["Platform"])][
                "tagValue_{}_p{}".format(u(rec["tagName"]), percentile)
            ].append(
                {
                    "Bucket": rec["Bucket"],
                    "Value": rec["tagValue_p{}".format(percentile)],
                }
            )
    return result.values(), events, values_tags


def process_results(req, control, experiments, task=None, additional_data=None):
    """
    Считет pvalue для каждой метрики и формирует HTML(+
    :param req: объект запроса(сущность) - позволяет достать результаты запроса
    :param control: ID контроля
    :param experiments: IDs экспериментов
    :param task: ID task в стартреке куда будет поститься результат
    :param events: динамический список идеднтификаторов событий для которых будут считаться метрики
    :param additional_data: словарь доп. параметры
    :return:
    """
    if additional_data is None:
        additional_data = {}
    # 1. Получили результаты запроса и привели к удобному виду
    query_results = get_results(req)
    main_res = query_results[0]
    event_res = query_results[1]
    values_res = None
    tracking_res = None
    if len(query_results) > 2:
        if len(query_results) == 4:
            values_res = query_results[2]
            tracking_res = query_results[3]
        elif len(query_results) == 3 and additional_data["args"].chtracking:
            tracking_res = query_results[2]
        elif len(query_results) == 3 and not additional_data["args"].chtracking:
            values_res = query_results[2]
    res, events, values_tags = wrap_buckets(
        main_res, event_res, values_res, tracking_res
    )

    req = None  # скорее всего идет проверка None/Not None
    attachments = []
    output_files = []
    comment = ""
    output_type = additional_data["args"].output_type
    formatter = MetricFormatter(
        control=control,
        experiments=experiments,
        task=task,
        additional_data=additional_data,
    )
    # 2. 4 html для кажого экса
    for platform in ["Desktop", "Android", "iOS", "iPad", "iPhone", "MacOS", "other"]:
        additional_data["platform"] = platform
        # получили данные по контролю
        try:
            cnt = [
                x for x in res if x["Version"] == control and x["Platform"] == platform
            ][0]
        except IndexError:
            print(
                "data on version {} and platform {} is not present. skipping".format(
                    control, platform
                )
            )
            continue  # если для какой-то из платформ нет данных по контролю, то не обсчитываем ее

        # получаем данные по экспериментам
        exps = sorted(
            [x for x in res if x["Version"] != control and x["Platform"] == platform],
            key=lambda x: x["Version"],
        )

        # 3. Считаем pvalue для каждой метрики
        metrics_groups = calc_metrics(
            cnt, exps, events, values_tags, additional_data=additional_data
        )
        formatter.add_metrics_groups(metrics_groups, replace=(output_type == "html"))
        if output_type == "html":
            comment, att_, htmls_ = formatter.generate_comment_and_attachments()
            attachments.extend(att_)
            output_files.extend(htmls_)
    if output_type == "json":
        (
            comment,
            attachments,
            output_files,
        ) = formatter.generate_comment_and_attachments()
    if task:
        req, comment, output_files = formatter.post_comment(
            data=(comment, attachments, output_files)
        )
    return (comment, req, output_files)


dspids_pageids_stub = """
$pageimps_table = "//home/bs/users/yabs-analytics/video/pageimp_ids_video_with_inapp";
$page_imp = (
    SELECT CAST(pageid as Int64) as pageid, CAST(impid as Int64) as impid, SOME(video_type) as video_type
    FROM $pageimps_table
    group by pageid, impid
);
$dsp_ids = (SELECT DISTINCT dspid FROM $pageimps_table);
"""


fast_money_map = """
$rtb_map = (
    SELECT
        CAST(bidreqid As Int64) as bidreqid,
        Url::GetCGIParam("?" || queryargs, "769") as VSID,
        Url::GetCGIParam("?" || queryargs, "864") as SID,
        Url::GetCGIParam("?" || queryargs, "782") as adsid,
        Url::GetCGIParam("?" || queryargs, "757") as adsdkver
    FROM RANGE(
        `logs/bs-rtb-log/1h`, `@rtb_date_from`, `@rtb_date_to`
    )
    WHERE Length(Url::GetCGIParam("?" || queryargs, "@769")) == 64
    and $checkTs(cast(unixtime as Uint32))
    @adsdkfilter
);
$dsp_map = (
    SELECT
        CAST(bidreqid As Int64) AS bidreqid,
        CAST(devicetype AS Int64) AS devicetype,
        /*chtracking
        cast(bidid as Uint64) as bidid,
        cast(position as Uint64) as position,
        chtracking*/
        detaileddevicetype,
        CAST(dspid AS Int64) AS dspid,
        CAST(pageid AS Int64) AS pageid,
        CAST(impid AS Int64) AS impid,
        dspfraudbits != "0" or dspeventflags != "0" as is_fraud,
        IF(
            dspfraudbits != "0" or dspeventflags != "0",
            0.0,
            CAST(price as Double)
        ) as price,
        IF(
            dspfraudbits != "0" or dspeventflags != "0",
            0.0,
            CAST(partnerprice as Double)
        ) as partner_price,
        producttype
    FROM RANGE(
        `logs/bs-dsp-checked-log/1h`, `@dsp_date_from`, `@dsp_date_to`
    )
    WHERE countertype == "1"
    and not ListHas(AsList("5", "10"), dspid ?? "")
    and $checkTs(cast(unixtime as Uint32))
);
/*chtracking
$chtracking_fast_0 = (
    select
        cast(hitlogid as Uint64) as hitlogid,
        cast(position as Uint64) as position,
        `action`,
    from range(`//logs/bs-chtracking-log/1h`, `@dsp_date_from`, `@dsp_date_to`)
    where fraudbits = "0"
    and $checkTs(cast(unixtime as Uint32))
);
$chtracking_fast_1 = (
    select
        hitlogid,
        position,
        `action`,
        count(*) as `count`
    from $chtracking_fast_0
    group by hitlogid, position, `action`
);
$chtracking_fast = (
    select
        hitlogid,
        position,
        AGGREGATE_LIST(AsTuple(`action`, `count`)) as chtracking_events
    from $chtracking_fast_1
    group by hitlogid, position
);
$chevent_fast_0 = (
    select
        cast(hitlogid as Uint64) as hitlogid,
        cast(position as Uint64) as position,
    from range(`//logs/bs-chevent-log/1h`, `@dsp_date_from`, `@dsp_date_to`)
    where fraudbits = "0" and countertype = "2" and $checkTs(cast(unixtime as Uint32))
);
$chevent_fast = (
    select
        hitlogid, position, [AsTuple("click", count(*))] as chevent_events
    from $chevent_fast_0
    group by hitlogid, position
);
chtracking*/
$dsp_rtb_joined = (
    SELECT
        VSID,
        SID,
        dsp.bidreqid as bidreqid,
        adsid,
        adsdkver,
        devicetype,
        detaileddevicetype,
        price,
        /*chtracking
        ListExtend(
            chtracking_events ?? ListCreate(Tuple<String?, Uint64>),
            chevent_events ?? ListCreate(Tuple<String?, Uint64>),
        ) as direct_events,
        chtracking*/
        IF(video_type == "preroll", price, 0) as price_preroll,
        IF(video_type == "midroll", price, 0) as price_midroll,
        IF(video_type == "fullscreen", price, 0) as price_fullscreen,
        IF(video_type == "in_app", price, 0) as price_in_app,
        IF(video_type == "inpage", price, 0) as price_inpage,
        IF(video_type == "interstitial", price, 0) as price_interstitial,
        IF(video_type == "pauseroll", price, 0) as price_pauseroll,
        IF(video_type == "postroll", price, 0) as price_postroll,
        IF(video_type == "overlay", price, 0) as price_overlay,
        IF(producttype like '%motion%', price, 0) as price_motion,
        IF(dspid!=1, price, 0) as price_interactive_viewer,
        partner_price,
        IF(is_fraud, 1, 0) as is_fraud
    FROM $dsp_map as dsp
    INNER JOIN ANY $rtb_map as rtb
    ON (rtb.bidreqid == dsp.bidreqid)
    LEFT SEMI JOIN $dsp_ids as di
    ON (dsp.dspid == di.dspid)
    LEFT SEMI JOIN $page_imp as pi
    ON (dsp.pageid == pi.pageid)
    LEFT JOIN $page_imp as pi2
    ON (dsp.pageid == pi2.pageid and dsp.impid == pi2.impid)
    /*chtracking
    LEFT JOIN ANY $chtracking_fast as cf
    ON (dsp.bidid = cf.hitlogid and dsp.position = cf.position)
    LEFT JOIN ANY $chevent_fast as cef
    ON (dsp.bidid = cef.hitlogid and dsp.position = cef.position)
    chtracking*/
);
"""

daily_money_map = """
$rtb_dsp_map = (
    SELECT
        dspid,
        bidreqid,
        impid,
        pageid,
        devicetype,
        detaileddevicetype,
        /*chtracking
        bidid,
        position,
        chtracking*/
        dspfraudbits != 0 or dspeventflags != 0 as is_fraud,
        Url::GetCGIParam("?" || queryargs, "769") as VSID,
        Url::GetCGIParam("?" || queryargs, "864") as SID,
        Url::GetCGIParam("?" || queryargs, "782") as adsid,
        Url::GetCGIParam("?" || queryargs, "757") as adsdkver,
        producttype,
        IF(
            dspfraudbits != 0 or dspeventflags != 0,
            0.0,
            CAST(price as Double)
        ) as price,
        IF(
            dspfraudbits != 0 or dspeventflags != 0,
            0.0,
            CAST(partnerprice as Double)
        ) as partner_price,
    FROM RANGE(
        `//statbox/cooked_logs/bs-dsp-cooked-log/v1/1d`,
        `@date_from`, `@date_to`
    )
    WHERE countertype == 1
    and $checkTs(cast(unixtime as Uint32))
    and Length(Url::GetCGIParam("?" || queryargs, "@769")) == 64
    @adsdkfilter
    and not ListHas(AsList(5, 10), CAST(dspid ?? -22 AS Int32))
);
/*chtracking
$chtracking_daily_0 = (
    select
        cast(hitlogid as Uint64) as hitlogid,
        cast(position as Uint64) as position,
        `action`,
    from range(`//logs/bs-chtracking-log/1d`, `@date_from`, `@date_to`)
    where fraudbits = "0" and $checkTs(cast(unixtime as Uint32))
);
$chtracking_daily_1 = (
    select
        hitlogid,
        position,
        `action`,
        count(*) as `count`
    from $chtracking_daily_0
    group by hitlogid, position, `action`
);
$chtracking_daily = (
    select
        hitlogid,
        position,
        AGGREGATE_LIST(AsTuple(`action`, `count`)) as chtracking_events
    from $chtracking_daily_1
    group by hitlogid, position
);
$chevent_daily_0 = (
    select
        cast(hitlogid as Uint64) as hitlogid,
        cast(position as Uint64) as position,
    from range(`//cooked_logs/bs-chevent-cooked-log/1d`, `@date_from`, `@date_to`)
    where fraudbits = 0 and countertype = 2 and $checkTs(cast(unixtime as Uint32))
);
$chevent_daily = (
    select
        hitlogid, position, [AsTuple("click", count(*))] as chevent_events
    from $chevent_daily_0
    group by hitlogid, position
);
chtracking*/
$rtb_dsp_filtered = (
    SELECT
        @VSID,
        adsid,
        bidreqid,
        adsdkver,
        devicetype,
        detaileddevicetype,
        price,
        /*chtracking
        ListExtend(
            chtracking_events ?? ListCreate(Tuple<String?, Uint64>),
            chevent_events ?? ListCreate(Tuple<String?, Uint64>),
        ) as direct_events,
        chtracking*/
        IF(is_fraud, 1, 0) as is_fraud,
        IF(video_type == "preroll", price, 0) as price_preroll,
        IF(video_type == "midroll", price, 0) as price_midroll,
        IF(video_type == "fullscreen", price, 0) as price_fullscreen,
        IF(video_type == "in_app", price, 0) as price_in_app,
        IF(video_type == "inpage", price, 0) as price_inpage,
        IF(video_type == "interstitial", price, 0) as price_interstitial,
        IF(video_type == "pauseroll", price, 0) as price_pauseroll,
        IF(video_type == "postroll", price, 0) as price_postroll,
        IF(video_type == "overlay", price, 0) as price_overlay,
        IF(producttype like '%motion%', price, 0) as price_motion,
        IF(dspid!=1, price, 0) as price_interactive_viewer,
        partner_price
    FROM $rtb_dsp_map as rdmap
    LEFT SEMI JOIN $dsp_ids as di
    ON (rdmap.dspid == di.dspid)
    LEFT SEMI JOIN $page_imp as pi
    ON (rdmap.pageid == pi.pageid)
    LEFT JOIN $page_imp as page_imp
    ON (rdmap.pageid == page_imp.pageid and rdmap.impid == page_imp.impid)
    /*chtracking
    LEFT JOIN ANY $chtracking_daily as cd
    ON (rdmap.bidid = cd.hitlogid and rdmap.position = cd.position)
    LEFT JOIN ANY $chevent_daily as ced
    ON (rdmap.bidid = ced.hitlogid and rdmap.position = ced.position)
    chtracking*/
);
"""


money_reduce = """
$money_map_source = (@money_map_source);
$AG_FL = AggregateFlatten(AggregationFactory("AGGREGATE_LIST"));
$money_reduced = (
    SELECT
        @VSID_sel,
        COUNT(DISTINCT adsid) as adsids,
        MAX(devicetype) as devicetype,
        MAX(detaileddevicetype) as detaileddevicetype,
        IF((SUM(is_fraud) ?? 0) > 0, @VSID_sel, null) as fraud_vsid,
        COUNT_IF(is_fraud == 0) as impressions,
        SUM(is_fraud) as fraud_events,
        SUM(price) as price,
        /*chtracking
        AggregateBy(direct_events, $AG_FL) as direct_events,
        chtracking*/
        COUNT_IF(is_fraud = 0 and price_preroll > 0) as impressions_preroll,
        COUNT_IF(is_fraud = 0 and price_midroll > 0) as impressions_midroll,
        COUNT_IF(is_fraud = 0 and price_fullscreen > 0) as impressions_fullscreen,
        COUNT_IF(is_fraud = 0 and price_in_app > 0) as impressions_in_app,
        COUNT_IF(is_fraud = 0 and price_inpage > 0) as impressions_inpage,
        COUNT_IF(is_fraud = 0 and price_interstitial > 0) as impressions_interstitial,
        COUNT_IF(is_fraud = 0 and price_pauseroll > 0) as impressions_pauseroll,
        COUNT_IF(is_fraud = 0 and price_postroll > 0) as impressions_postroll,
        COUNT_IF(is_fraud = 0 and price_overlay > 0) as impressions_overlay,
        COUNT_IF(is_fraud = 0 and price_motion > 0) as impressions_motion,
        COUNT_IF(is_fraud = 0 and price_interactive_viewer > 0) as impressions_interactive_viewer,
        SUM(price_preroll) as price_preroll,
        SUM(price_midroll) as price_midroll,
        SUM(price_fullscreen) as price_fullscreen,
        SUM(price_in_app) as price_in_app,
        SUM(price_inpage) as price_inpage,
        SUM(price_interstitial) as price_interstitial,
        SUM(price_pauseroll) as price_pauseroll,
        SUM(price_postroll) as price_postroll,
        SUM(price_overlay) as price_overlay,
        SUM(price_motion) as price_motion,
        SUM(price_interactive_viewer) as price_interactive_viewer,
        SUM(partner_price) as partner_price
    FROM $money_map_source
    GROUP BY @VSID_gr
);
"""


def make_money_map(money_fasts, money_dailies, args):
    """
    Parameters
    ----------
    money_fasts : TYPE
        DESCRIPTION.
    money_dailies : TYPE
        DESCRIPTION.
    args : TYPE
        DESCRIPTION.
    Returns
    -------
    строка - запрос к деньгам
    """
    result = dspids_pageids_stub
    sid = "VSID"
    sid_qa = "769"
    bucket = sid
    # if args.service == "VAS":
    #     adsdkfilter = 'and (Url::GetCGIParam("?" || queryargs, "757")?? "") in ListExtend(AsList($CONTROL_VERSION),String::SplitToList($EXPERIMENT_VERSIONS, ","))'
    # else:
    #     adsdkfilter = ""
    adsdkfilter = ""  # TODO: BSDEV-80021
    if money_fasts:
        result += apply_replacements(
            fast_money_map,
            {
                "@rtb_date_from": money_fasts[0].split("/")[-1],
                "@dsp_date_from": money_fasts[0].split("/")[-1],
                "@rtb_date_to": money_fasts[-1].split("/")[-1],
                "@dsp_date_to": money_fasts[-1].split("/")[-1],
                "@VSID": sid,
                "@bucket": sid,
                "@769": sid_qa,
                "@adsdkfilter": adsdkfilter,
            },
        )
    if money_dailies:
        result += apply_replacements(
            daily_money_map,
            {
                "@date_from": money_dailies[0].split("/")[-1],
                "@date_to": money_dailies[-1].split("/")[-1],
                "@VSID": sid,
                "@769": sid_qa,
                "@adsdkfilter": adsdkfilter,
            },
        )
    if money_fasts and money_dailies:
        money_map_source = (
            "SELECT * FROM $dsp_rtb_joined \n"
            "UNION ALL SELECT * FROM $rtb_dsp_filtered"
        )
    elif money_fasts:
        money_map_source = "SELECT * FROM $dsp_rtb_joined"
    elif money_dailies:
        money_map_source = "SELECT * FROM $rtb_dsp_filtered"
    else:
        raise Exception("no money tables")
    result += apply_replacements(
        money_reduce,
        {
            "@money_map_source": money_map_source,
            "@VSID_sel": sid,
            "@VSID_gr": sid,
            "@bucket": sid,
        },
    )
    return result


def get_version(args, gogol, before_parsing=False):
    data = "data" if gogol else "Data"
    if before_parsing:
        data = "Yson::ParseJson({})".format(data)
    if args.mode == "testids_each_row":
        return (
            '$getTestid(Yson::YPathString({}, "/testIds")).0'.format(
                data
            )
        )
    elif args.mode == "adsdk_content_player_version":
        return 'Yson::YPathString({data}, "/contentPlayerVersion")'.format(data=data)
    elif args.mode == "adsdk_pcode_version":
        return 'Yson::YPathString({data}, "/pcodeVersion")'.format(data=data)
    elif gogol:
        return "version"
    else:
        return "Version"


def get_bucket(args, gogol):
    if args.mode == "testids_each_row":
        data = "data" if gogol else "Data"
        return '$getTestid(Yson::YPathString({}, "/testIds")).1 as Bucket'.format(
            data
        )
    if gogol:
        return "$getBucket(vsid) as Bucket"
    else:
        return "$getBucket(SID) as Bucket"


def get_fake_bucket(_, gogol):
    if gogol:
        return "$getBucket($w(puid) ?? $w(device_id) ?? $w(xYandexICookie) ?? $w(yandexuid) ?? $w(vsid)) as FakeBucket"
    else:
        return "$getBucket($w(puid) ?? $w(Yandexuid) ?? $w(SID)) as FakeBucket"


def generate_query(args, fasts, dailies, money_fasts, money_dailies):
    """
    Parameters
    ----------
    args : аргументы main
    fasts : быстрые данные по событиям
    dailies : данные по событиям
    events :  set имен уникальных событий
    money_fasts : быстрые данные по деньгам
    money_dailies : данные по деньгам
    Returns
    -------
    строка - кодосгенерированный запрос YQL
    """
    try:
        gogol = "gogol" in (dailies + fasts)[0]
    except IndexError:
        raise Exception("no gogol/jstracer tables")

    if gogol:
        fields_mapping = {
            "@Data": "data",
            "@Service": "service",
            "@EventName": "eventName",
            "@Version": get_version(args, gogol, before_parsing=True),
            "@UserAgent": "userAgent",
        }
    else:
        fields_mapping = {
            "@Data": "Data",
            "@Service": "Service",
            "@EventName": "EventName",
            "@Version": get_version(args, gogol, before_parsing=True),
            "@UserAgent": "UserAgent",
        }

    thequery = (
        apply_replacements(
            prefix,
            {
                "@pool": args.pool,
                "@service": args.service,
                "@control_version": args.control,
                "@experiment_versions": args.experiment,
                "@from": getattr(args, "from").split("+")[0],
                "@to": getattr(args, "to").split("+")[0],
            },
        )
        + avglogs_import
        + prefix2
        + get_eventname
        + get_versions
    )
    if args.no_cache:
        thequery += '\npragma yt.QueryCacheMode = "disable";\n'
    if args.sensors:
        thequery = thequery.replace(" --sensors", sensors_fragment)

    # получили все таблицы из которых будем брать события
    source = make_source(fields_mapping, fasts, dailies, args)

    thequery += source

    # берем все собыия в определнном формате
    map1_ = apply_replacements(
        map1,
        [
            (
                "@[ADSDK]",
                (
                    'vsid as SID,'
                    if args.service == "VAS"
                    else ""
                ),
            ),
            (
                "@[VSIDNOTNULL]",
                (
                    "and m.VSID is not null and LENGTH(m.VSID) == 64 and $diffchars(unwrap(m.VSID)) >= 5"
                    if args.service in ("StreamPlayer", "AndroidPlayer")
                    else ""
                ),
            ),
            (
                "@[fake_bucket_fallback]",
                "" if args.disable_fake_buckets else "?? m.FakeBucket",
            ),
            (
                "@[fake_bucket_fallback_s]",
                "" if args.disable_fake_buckets else "?? s.FakeBucket",
            ),
        ],
    )
    from_ = getattr(args, "from").split("T")[0]
    to_ = getattr(args, "to").split("T")[0]
    map1_ = apply_replacements(
        map1_,
        [
            ("@[date_from]", from_),
            ("@[date_to]", to_),
        ],
    )
    map1_ = apply_replacements(
        map1_,
        [
            ("/*{}".format(args.mode), ""),
            ("{}*/".format(args.mode), ""),
        ],
    )
    if args.testids_column == "data":
        map1_ = apply_replacements(
            map1_,
            [
                (
                    "@[testids_source]",
                    'Yson::YPathString(Yson::ParseJson(data), "/testIds")',
                ),
            ],
        )
    else:
        map1_ = apply_replacements(
            map1_,
            [
                ("@[testids_source]", args.testids_column),
            ],
        )
    if args.mode in ("sessions", "testids_self_join"):
        map1_ = apply_replacements(
            map1_,
            [
                ("/*with_testids_join", ""),
                ("with_testids_join*/", ""),
            ],
        )
    else:
        map1_ = apply_replacements(
            map1_,
            [
                ("/*without_testids_join", ""),
                ("without_testids_join*/", ""),
            ],
        )
    map1_ = apply_replacements(
        map1_,
        [
            (
                "@[adsdk_totalize]",
                adsdk_totalize if args.service == "VAS" else "",
            ),
            (
                "@[map_events]",
                "select * from (process $map_events_ using $totalizer(TableRow()))"
                if args.service == "VAS"
                else "select * from $map_events_",
            ),
        ],
    )
    if args.sensors:
        map1_ = map1_.replace("/*sensors", "").replace("sensors*/", "")
    # достаем еще одну колонку для плеера
    map1_ = apply_replacements(
        map1_, [("@[parsePlayerState]", parse_player_state if gogol else "")]
    )
    map1_ = apply_replacements(
        map1_,
        [
            ("@[vsid]", "vsid" if gogol else "VSID"),
            (
                "@[clientTimestamp]",
                "clientTimestamp" if gogol else "ClientTimestamp",
            ),
            ("@[version]", get_version(args, gogol)),
            ("@[bucket]", get_bucket(args, gogol)),
            ("@[fake_bucket]", get_fake_bucket(args, gogol)),
            ("@[eventName]", "eventName" if gogol else "EventName"),
            ("@[eventType]", "eventType" if gogol else "EventType"),
            ("@[data]", "data" if gogol else "Data"),
            ("@[userAgent]", "userAgent" if gogol else "UserAgent"),
            ("@[service]", "service" if gogol else "Service"),
        ],
    )
    thequery += map1_
    # В этой точке мы пока только пофильтровали логи с событиями - убрали
    # взяли из столбца с событиями  data только то, что нам интересно

    # дальше начинаем работать с деньгами
    if args.service != "VAS":
        thequery += make_money_map(money_fasts, money_dailies, args)
    # В этой точке мы пока только пофильтровали логи с деньгами - убрали
    # взяли из столбца с событиями  data только то, что нам интересно
    # Все группируеются по VSID

    # pivot по событиям
    map2_ = make_map2(args)
    thequery += map2_

    # reduce по событиям + join с деньгами
    reduce1_ = make_reduce1(args)
    thequery += reduce1_
    if args.sensors:
        thequery = thequery.replace("/*sensors", "").replace("sensors*/", "")

    # group by по платформе и бакетам()
    events_for_metrics_ = make_events_for_metrics(args)
    thequery += events_for_metrics_

    if args.tmp_tables_root:
        if args.service in ("StreamPlayer", "AndroidPlayer"):
            reduce_events = "$reduce_events_joined"
        elif args.service == "VAS":
            reduce_events = "$reduce_events"
        tmp_table = "{}/{}_{}_{}_{}".format(
            args.tmp_tables_root,
            args.control,
            args.experiment,
            getattr(args, "from"),
            getattr(args, "to"),
        )
        tmp_table = re.sub("[^a-zA-Z_0-9-/]", "_", tmp_table)
        thequery += apply_replacements(
            tmp_tables_insertion_postfix,
            {"@tmp_table": tmp_table, "@reduce_events": reduce_events},
        )
    thequery = apply_replacements(
        thequery,
        {"/*{}".format(args.service): "", "{}*/".format(args.service): ""},
    )
    if args.service == "AndroidPlayer":
        thequery = apply_replacements(
            thequery,
            {
                "/*StreamPlayer".format(args.service): "",
                "StreamPlayer*/".format(args.service): "",
            },
        )

    if args.chtracking:
        thequery = thequery.replace("/*chtracking", "").replace("chtracking*/", "")

    return thequery


def ensure_utf8(s):
    if isinstance(s, bytes):
        return s.decode("utf8", errors="replace")
    return s.encode("utf8", errors="surrogateescape").decode("utf8", errors="replace")


def main():
    ####################################################################################################################
    # 1. Предварительная работа. Аргументы, даты, настройка  YQL.
    # 1.1 Читаем аргументы
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--mode",
        choices=[
            "default",
            "sessions",
            "testids_each_row",
            "testids_self_join",
            "adsdk_content_player_version",
            "adsdk_pcode_version",
        ],
        default="default",
    )
    parser.add_argument("--disable-fake-buckets", action="store_true")
    parser.add_argument("--control")  # ID контрольной сборки(единственный)
    parser.add_argument(
        "--experiment"
    )  # ID exp(может быть несколько) и RC(может быть несколько) через запятую
    parser.add_argument(
        "--from"
    )  # дата начала анализируемого периода YYYY-MM-DDTHH:MM:SS
    parser.add_argument("--to")  # дата конца анализируемого периода YYYY-MM-DDTHH:MM:SS
    parser.add_argument("--task")  # ID таска в трекере куда постить результаты
    parser.add_argument("--memory_limit", default="1G")  # настройки YQL
    parser.add_argument("--memory_reserve_factor", default="1.0")  # настройки YQL
    parser.add_argument(
        "--old", action="store_true"
    )  # использовать jstracer-log для статистики StreamPlayer вместо gogol-log
    parser.add_argument("--service", default="StreamPlayer")  # StreamPlayer или VAS
    parser.add_argument("--proxy", default="hahn")  #
    parser.add_argument(
        "--output_file", default="output.json"
    )  # имя output-файла нирваны
    parser.add_argument(
        "--output_html", default="output.html"
    )  # имя output-файла нирваны
    parser.add_argument(
        "--title", default="Experiments"
    )  # кажется нигде не используется
    parser.add_argument(
        "--tmp_tables_root"
    )  # переопределить корневую папку временных таблиц
    parser.add_argument("--pool", default="search-research_pecheny")  # YT pool
    parser.add_argument("--force_hourly",type = bool, default = False)
    parser.add_argument(
        "--force_events_query"
    )  # айдишник YQL-запроса за событиями, если хочется взять из кеша
    parser.add_argument(
        "--force_results_query"
    )  # айдишник YQL-запроса за статистикой, если хочется взять из кеша
    parser.add_argument(
        "--force_results_tables"
    )  # список таблиц, чтобы зафорсить чтение результатов из них
    parser.add_argument(
        "--stddev",
        action="store_true"
        # выводить стандартное отклонение и другие доп. показатели
    )
    parser.add_argument(
        "--no-cache",
        action="store_true"
        # не использовать кеш
    )
    parser.add_argument("--output_type", default="html", choices=["html", "json"])
    parser.add_argument("--chtracking", action="store_true")
    parser.add_argument("--testids_column", default="data")
    parser.add_argument("--sensors", action="store_true")
    args = parser.parse_args()
    if args.mode == "sessions" and args.service == "VAS":
        args.mode = "testids_each_row"

    # 1.2 Предварительная работа для настройки окружения YT/YQL
    cluster = get_cluster(clusters, args)
    client = YqlClient(db=args.proxy, token=os.environ["YQL_TOKEN"])
    yr = YqlRunner(client=client, title=TITLE)
    yt = get_driver(cluster).client

    start_ts = datetime.datetime.now()
    print("started at: {}".format(start_ts.strftime("%Y-%m-%dT%H:%M:%S")))

    # 1.3 Работа с датами
    force_hourly = getattr(args, "force_hourly")
    from_ = getattr(args, "from").split("+")[0]
    to_ = getattr(args, "to").split("+")[0]
    if not to_ or to_ == "now":
        to_d = datetime.datetime.now(moscow)
        to_ = to_d.strftime(dtformat)
    print("from: {}".format(from_))
    print("to: {}".format(to_))

    ####################################################################################################################
    # 2. Предварительная работа с логами
    # 2.1 Событийные логи. Берем имена таблиц
    source_daily_root = (
        "//logs/jstracer-log/1d"
        if args.old
        else "//logs/strm-gogol-log/1d"
    )
    source_fast_root = (
        "//logs/jstracer-log/30min"
        if args.old
        else "//logs/strm-gogol-log/1h"
    )
    """
    Если включен флаг force_hourly, то берем почасовые таблицы.
    Берем имена таблиц соответствующие диапазону дат.
    Сортировка нужна чтобы взять поседнюю дату и быстрые данные брать только после этой даты.
    """
    if force_hourly:
        dailies = list()
        fasts = list()
        list_name_tables = all_tables(from_, to_, args.old)
        list_name_tables.sort()
        day_tables_groups = [list(day_tables) for day, day_tables in groupby(list_name_tables, key=lambda i: i[:10])]
        for dtg in day_tables_groups:
            dailies_tmp = list()
            fasts_tmp = list()
            for d in dtg:
                if yt.exists(source_fast_root + '/' + d):
                    fasts_tmp.append(source_fast_root + '/' + d)
                else:
                    dailies_tmp.append(source_daily_root+'/'+d.split('T')[0])
                    fasts_tmp = list()
                    break
            dailies = dailies+dailies_tmp
            fasts = fasts+fasts_tmp
        print("daily jstracer tables: {}".format(dailies))
        print("fast jstracer tables: {}".format(fasts))

    else:
        dailies = sorted(
            yt.search(
                root=source_daily_root,
                path_filter=lambda x: daily_path_filter(x, from_, to_),
            )
        )
        print("daily jstracer tables: {}".format(dailies))

        # берем имена таблиц с быстрыми данными
        fasts = sorted(
            yt.search(
                root=source_fast_root,
                path_filter=lambda x: fast_path_filter(x, from_, to_, dailies),
            )
        )
        print("fast jstracer tables: {}".format(fasts))


    # 2.2 Денежные логи. Берем имена таблиц
    if args.service == "VAS":
        money_dailies = []
    else:
        money_dailies = sorted(
            yt.search(
                root="//statbox/cooked_logs/bs-dsp-cooked-log/v1/1d",
                path_filter=lambda x: daily_path_filter(x, from_, to_),
            )
        )
    print("daily money tables: {}".format(money_dailies))

    # берем имена таблиц с быстрыми данными
    if args.service == "VAS":
        money_fasts = []
    else:
        money_fasts = sorted(
            yt.search(
                root="//logs/bs-dsp-checked-log/1h",
                path_filter=lambda x: fast_path_filter(x, from_, to_, money_dailies),
            )
        )
    print("fast money tables: {}".format(money_fasts))
    share_urls = []

    ####################################################################################################################
    # 4. Получаем количественные значения всех метрик
    # 4.1 Кодогенрация YQL скрипта который считает ТОЛЬКО количественные значения метрик(счетчики event и деньги).
    if args.force_results_query:
        # по необходимости берем результаты из кеша
        thequery = "SHOW RESULTS {}".format(args.force_results_query)
        print("getting results from query {}".format(args.force_results_query))
    elif args.force_results_tables:
        tables = args.force_results_tables.split(",")
        thequery = "use hahn;\n"
        for t in tables:
            thequery += "select * from `{}`;\n".format(t)
    else:
        thequery = generate_query(args, fasts, dailies, money_fasts, money_dailies)
        print("getting results from query {}".format(args.force_results_query))
    # 4.2 Запуск YQL скрипта сгенерированного выше
    req = yr.run(
        thequery,
        wait=False,
        attachments=[
            {
                "path": "analytics/videolog/strm-stats/strm_cube_2/stability/quality_report_avglog/quality_report_avglog_common.sql",
                "rev": 6774521,
            },
            {
                "path": "analytics/videolog/strm-stats/strm_cube_2/stability/quality_report_avglog/list_processor.py",
                "rev": 7183066,
            },
        ],
    )
    print("share url is {}".format(req.share_url))
    if req.share_url:
        share_urls.append(req.share_url)
    # ждем пока запрос добежит
    req.wait_progress()
    if req.status != "COMPLETED":
        raise Exception("request {} has failed".format(req.share_url))

    ####################################################################################################################
    # 5. Обсчет pvalue, генерация HTML и постинг результатов в стартрек
    comment, req, htmls = process_results(
        req,
        args.control,
        args.experiment,
        task=args.task,
        additional_data=dict(
            share_urls=share_urls,
            start_ts=start_ts,
            from_=from_,
            to_=to_,
            dailies=dailies,
            fasts=fasts,
            money_dailies=money_dailies,
            money_fasts=money_fasts,
            args=args,
        ),
    )

    ####################################################################################################################
    # 6. все что ниже относится к нирване - отчеты и др.
    output = "comment posted at: {}\n\n{}".format(
        "https://st.yandex-team.ru/{}#{}".format(
            args.task, req.json().get("longId", "ERROR") if req else None
        ),
        comment,
    )

    with codecs.open(args.output_file, "w", "utf8") as f:
        f.write(output)

    if isinstance(htmls, list):
        html = "\n\n".join(htmls)
    else:
        html = htmls
    with codecs.open(args.output_html, "w", "utf8") as f:
        f.write(html)


if __name__ == "__main__":
    main()
