USE hahn;
pragma yt.Pool = '@[pool]';
-- pragma yt.DefaultOperationWeight = "100";
-- pragma yt.MaxJobCount = "99999";
pragma yt.DataSizePerJob = "256M";
pragma AnsiInForEmptyOrNullableItemsCollections;
PRAGMA LIBRARY('providers.sql');
IMPORT providers SYMBOLS $providers, $mobile_whitelist, $only_mobile;
-- PRAGMA LIBRARY('formulae.sql');
-- IMPORT formulae SYMBOLS @[formula];

$date_from = "@[date_from]";
$date_to = "@[date_to]";
$map_root = "@[map_root]";
$json_root = "@[json_root]";
$research_root = "@[research_root]";
$json_table = $json_root || "/" || $date_to;
$research_table = $research_root || "/" || $date_to;
$research_table_distribution = $research_root || "/" || $date_to || "_distribution";

define subquery $research_source() as
select * from range($map_root, $date_from, $date_to);
end define;

define subquery $json_source() as
select * from range($map_root, $date_to, $date_to);
end define;

-- $formula = @[formula];
$formula = Python::calc_score(
    Callable<(Double?, Double?)->Double?>, FileContent("formula.py")
);

$rft = select ToDict(AGGREGATE_LIST(AsTuple(region_id, ListTakeWhile(parents, ($x)->($x != 10001))))) from `//home/videoquality/vh_analytics/mma-4376/regions_for_total_fix2`;

$regionsForTotal = ($x) -> {
    RETURN IF(DictContains($rft, $x), unwrap($rft[$x]), ListCreate(Int32))
};

$totalize = ($row)->{
    $rft = $regionsForTotal(CAST($row.region_id as Int32));
    $result = ListMap(
        $rft, ($x)->(AddMember(RemoveMember($row, "region_id"), "region_id", $x))
    );
    $add = ListMap(
        $result,
        ($x)->(AddMember(RemoveMember($x, "isp"), "isp", "_total_"))
    );
    $result = ListUnionAll($result, $add);
    return $result
};

$userAgentCheck = ($ua)->{
    $parsed = UserAgent::Parse($ua);
    RETURN (
        not $parsed.isTablet and not $parsed.isTV and not $parsed.isRobot
    )
};

define subquery $aggregate_by_user($source) as
$tmp_ = (
    select
        region_id,
        connection_type,
        UserAgent::Parse(user_agent).isMobile as is_mobile,
        Geo::IsMobile(ip) as geo_is_mobile,
        ip,
        operator as isp,
        downlink,
        rtt,
        ect,
        yandexuid,
        $formula(downlink, rtt) AS req_score_2,
    from $source()
    WHERE
        $userAgentCheck(user_agent)
        AND operator IS NOT NULL
        AND operator != ''
);

$tmp = (
    select
        region_id,
        yandexuid,
        isp,
        is_mobile,
        geo_is_mobile,
        connection_type,
        mode(ip)[0].Value as ip,
        avg(downlink) as downlink,
        avg(rtt) as rtt,
        mode(ect)[0].Value as ect,
        avg(req_score_2) as req_score_2,
        median(req_score_2) as req_score_2_median,
        count(*) as requests
    from $tmp_
    group by region_id,
        yandexuid,
        isp,
        is_mobile,
        geo_is_mobile,
        connection_type
);

$desktop = (
    select * from $tmp
    where is_mobile == false
    and geo_is_mobile == false
    and DictContains($providers, isp)
    and isp not in $only_mobile
);

$mobile = (
    select * from $tmp as t
    where is_mobile == true
    and geo_is_mobile == true
    and connection_type == 'CONN_CELL'
    and DictContains($providers, isp)
    and isp in $mobile_whitelist
);

$tmp_unified = (
    select * from $desktop
    union all
    select * from $mobile
);
select * from $tmp_unified;
end define;

define subquery $aggregate_data($source) as
$tmp_unified = select * from $aggregate_by_user($source);
$totalized = process $tmp_unified using $totalize(TableRow());

$for_json = (
    SELECT
        region_id,
        $date_to as fielddate,
        isp,
        is_mobile,
        requests,
        users,
        score_30,
        score_30_avg_median,
        score_30_avg_avg,
        score_30_median_avg,
        score_30_median_median,
        p2g,
        p3g,
        p4g
    FROM (
        SELECT
            region_id,
            isp,
            is_mobile,
            MAX_OF(0.0, Math::Round(MEDIAN(req_score_2_median) * 10.0) / 10.0) AS score_30,
            MAX_OF(0.0, Math::Round(MEDIAN(req_score_2) * 10.0) / 10.0) AS score_30_avg_median,
            MAX_OF(0.0, Math::Round(AVG(req_score_2) * 10.0) / 10.0) AS score_30_avg_avg,
            MAX_OF(0.0, Math::Round(AVG(req_score_2_median) * 10.0) / 10.0) AS score_30_median_avg,
            MAX_OF(0.0, Math::Round(MEDIAN(req_score_2_median) * 10.0) / 10.0) AS score_30_median_median,
            sum(IF(ect == '2g', 100.0, 0.0)) / sum(1.0) AS p2g,
            sum(IF(ect == '3g', 100.0, 0.0)) / sum(1.0) AS p3g,
            sum(IF(ect == '4g', 100.0, 0.0)) / sum(1.0) AS p4g,
            sum(requests) AS requests,
            count(*) as users
        FROM $totalized as t
        GROUP BY
            t.region_id as region_id,
            IF(isp == "_total_", isp, $providers[t.isp]) as isp,
            t.is_mobile as is_mobile
    )
);
select * from $for_json;
end define;

insert into $json_table with truncate
select * from $aggregate_data($json_source)
where users > 10
ORDER BY
    region_id,
    fielddate,
    is_mobile,
    users desc
;

insert into $research_table with truncate
select * from $aggregate_data($research_source)
where users > 10
ORDER BY
    region_id,
    fielddate,
    is_mobile,
    users desc
;


insert into $research_table_distribution with truncate
select
    is_mobile,
    score_round,
    count(*) as users
from $aggregate_by_user($research_source)
group by is_mobile, Math::Round(req_score_2_median, -1) as score_round
order by is_mobile, score_round;
