use hahn;
pragma yt.Pool = "@[pool]";
pragma yson.DisableStrict;
pragma yt.DataSizePerJob = "512M";
PRAGMA AnsiInForEmptyOrNullableItemsCollections;
pragma library("stability_common.sql");
import stability_common symbols $remappedWrapRefFrom, $shiftDate;
pragma config.flags("LLVM", "OFF");
PRAGMA Library("yandex_shows.sql");
IMPORT yandex_shows SYMBOLS $is_yandex_show, $is_good_session;
PRAGMA Library("vh_cube_2_common.sql");
IMPORT vh_cube_2_common SYMBOLS $getChannelsList, $getChannel;
PRAGMA File(
    "content_ids_mapping.tsv",
    'https://proxy.sandbox.yandex-team.ru/last/VH_TO_UGC_BLOGGERS_MAPPING?attrs={"released":"stable"}&salt=1593700687.52'
);

$date_from = "@[date_from]";
$date_from_prism = $shiftDate($date_from, -7);
$date_to = "@[date_to]";


$ib = (
    select `UUID`, `path`, computed_channel as channel, computed_program as program
    from `//home/videolog/strm_meta/iron_branch/concat`
    where `UUID` == `JoinKey`
);

$wrap_yandex_show = ($ch, $ch_orig, $program) -> {
    RETURN IF(
        $is_yandex_show($ch_orig, $program),
        "YANDEXSHOW__" || $ch,
        $ch
    )
};

$wrapDeviceType = ($dt)->(CASE
WHEN $dt in ("desktop", "tv", "touch") then $dt
WHEN $dt in ("phone", "tablet") then "touch"
ELSE "other"
END);

$countVT = ($vt, $vtpl) -> (IF(
    $vt == 0 and $vtpl <= 10,
    CAST($vtpl as Int64),
    $vt
));

$isShow = ($x)->(
    Yson::LookupInt64($x, "countertype") == 1
    and Yson::LookupInt64($x, "win") == 1
    and Yson::LookupInt64($x, "DspID") not in (5, 10)
);

$getSpecialPrices = ($ae) -> {
    $ae = Yson::ConvertToList($ae);
    $shows = ListFilter($ae, $isShow);
    $prerolls = ListFilter($shows, ($x)->(Yson::LookupString($x, "video_type") == "preroll"));
    $prerolls = ListMap($prerolls, ($x)->(Yson::LookupInt64($x, "price")));
    $midrolls = ListFilter($shows, ($x)->(Yson::LookupString($x, "video_type") == "midroll"));
    $midrolls = ListMap($midrolls, ($x)->(Yson::LookupInt64($x, "price")));
    $overlay = ListFilter($shows, ($x)->(Yson::LookupString($x, "video_type") == "overlay"));
    $overlay = ListMap($overlay, ($x)->(Yson::LookupInt64($x, "price")));
    $motion = ListFilter($shows, ($x)->(Yson::LookupString($x, "producttype") like "%motion%"));
    $motion = ListMap($motion, ($x)->(Yson::LookupInt64($x, "price")));
    return <|
        overlay_price: ListSum($overlay),
        motion_price: ListSum($motion),
        prerolls_count: ListLength($prerolls),
        prerolls_price: ListSum($prerolls),
        midrolls_count: ListLength($midrolls),
        midrolls_price: ListSum($midrolls),
    |>
};

$generate_vcid = Python::generate_vcid(@@#py
import binascii
from base64 import urlsafe_b64encode, urlsafe_b64decode
from yql.typing import *


def generate_vcid(video_meta_id: Optional[Uint64]) -> Optional[String]:
    encoded_meta_id = urlsafe_b64encode(video_meta_id.to_bytes(8, "little"))
    video_url_id = encoded_meta_id.decode().strip("=")
    video_url_id = f"v{video_url_id}"
    return video_url_id
@@);
$parseContentIdsMapping = ($mapping) -> {
    $mapping = String::SplitToList($mapping, "\n");
    $mapping = ListFilter($mapping, ($x)->(FIND($x, "\t") is not null));
    $mapping = ListMap($mapping, ($x)->(AsTuple(
        unwrap(String::SplitToList($x, "\t")[0]), unwrap($generate_vcid(cast(String::SplitToList($x, "\t")[1] as UInt64)))
    )));
    return ToDict($mapping)
};

$content_ids_mapping = $parseContentIdsMapping(FileContent("content_ids_mapping.tsv"));

$wrapUUID = ($uuid) -> (IF(
    DictContains($content_ids_mapping, $uuid),
    unwrap($content_ids_mapping[$uuid]),
    $uuid
));

$source = (
    select $wrapUUID(`UUID`) as `UUID`,
    fielddate,
    `timestamp`,
    video_content_id,
    shows_good,
    IF(shows_good > 0 or price > 0, page_id) as page_id,
    channel,
    program,
    yandexuid,
    yandexuid ?? puid as user_id,
    fraud is not null and fraud == 1 as fraud,
    $remappedWrapRefFrom(ref_from) as ref_from,
    $wrapDeviceType(device_type) as platform,
    channel_old,
    channel_id,
    ParentUUID,
    license,
    IF($is_good_session(TableRow()), 1, 0) as is_good_session,
    $countVT(view_time, view_time_player_alive) as view_time,
        IF(
            $countVT(view_time, view_time_player_alive) < 30,
            0,
            Math::Log(CAST($countVT(view_time, view_time_player_alive) as Double))
        ) as lvt,
        IF(
            view_time_non_muted < 30,
            0,
            Math::Log(CAST(view_time_non_muted as Double))
        ) as lvt_non_muted,
        IF(
            view_time_non_muted < 120,
            0,
            Math::Log(CAST(view_time_non_muted as Double))
        ) as lvt_non_muted_2m,
        view_time_non_muted,
        price,
        $getSpecialPrices(ad_events).overlay_price as overlay_price,
        $getSpecialPrices(ad_events).motion_price as motion_price,
        $getSpecialPrices(ad_events).prerolls_price as prerolls_price,
        $getSpecialPrices(ad_events).midrolls_price as midrolls_price,
        $getSpecialPrices(ad_events).prerolls_count as prerolls_count,
        $getSpecialPrices(ad_events).midrolls_count as midrolls_count,
        ad_tracking_events,
        bytes_sent,
        IF(view_time > 0 or sources_aggr like "%start%" or price > 0, 1, 0) as old_ms
    from range(
        `cubes/video-strm`, $date_from, $date_to, `sessions`
    ) as s
    where vsid not like 'rtbdsp%' and (
        view_time > 0 or sources_aggr like "%start%" or price > 0 or bytes_sent > 0
    )
);

$without_uuid = (
    select s.*, "-" as path
    from $source as s
    where `UUID` is null
);

$w = ($x) -> (IF($x is not null, unwrap($x), $x));

$with_uuid_ = (
    select path ?? "-" as path,
        channel,
        program,
        s.*
    from (
        select s.* without channel, program
        from $source as s
        where `UUID` is not null
    ) as s
    left join any $ib as i using (`UUID`)
);
$with_uuid = (
    select
        $wrap_yandex_show($getChannel(channel, channel_id, video_content_id, ParentUUID, program), channel, program) as channel,
        s.* without channel
    from $with_uuid_ as s
);

$concat = (
    select channel ?? "-" as channel, s.* without channel from $without_uuid as s
    union all
    select channel ?? "-" as channel, s.* without channel from $with_uuid as s
);

$prism_segments = (
    SELECT 
        yandexuid,
        MAX_BY(prism_segment, tn) AS prism_segment
    FROM (
        SELECT 
            yandexuid,
            prism_segment,
            TableName() AS tn
        FROM RANGE(`home/prism/user_weights`, $date_from_prism, $date_to)
    )
    GROUP BY yandexuid
);

$with_yandexuid = (
    select * from $concat where yandexuid is not null
);
$without_yandexuid = (
    select * from $concat where yandexuid is null
);

$join_prism_segments = (
    select s.*, prism_segment
    from $with_yandexuid as s
    left join any $prism_segments as p using (yandexuid)
);

$concat2 = (
    select $w(channel) as channel, s.* without channel
    from (
        select * from $join_prism_segments
        union all
        select * from $without_yandexuid
    ) as s
);

$totalize = ($row) -> {
    $channelsList = $getChannelsList($row);
    $row = AddMember(RemoveMember($row, "channel"), "channel", unwrap($channelsList[0]));
    $channelsList = ListSkip($channelsList, 1);
    $result = AsList($row);
    $add = ListCollect(ListMap($result, ($x)->(AddMember(RemoveMember($x, "ref_from"), "ref_from", "_total_"))));
    $result = ListCollect(ListUnionAll($result, $add));
    $add = ListCollect(ListMap($result, ($x)->(AddMember(RemoveMember($x, "platform"), "platform", "_total_"))));
    $result = ListCollect(ListUnionAll($result, $add));
    $add = ListCollect(ListFlatMap(
        $result, ($x)->(IF(
            $x.channel == $row.channel,
            ListMap($channelsList, ($f)->(AddMember(RemoveMember($x, "channel"), "channel", $f))),
            NULL
        ))
    ));
    $add = ListCollect(ListFlatMap($add, ($x)->($x)));
    $result = ListUnionAll($result, $add);
    RETURN $result
};

$totalized = process $concat2 using $totalize(TableRow());

$for_ad_load = (
    select
        fielddate,
        ref_from,
        platform,
        channel,
        user_id,
        view_time_non_muted,
        view_time,
        `timestamp`,
        price,
        ad_tracking_events
    from $totalized
);

$input_struct_type = Struct<
    fielddate:String?,
    ref_from:String?,
    platform:String?,
    channel:String?,
    user_id:String?,
    view_time_non_muted:Int64?,
    view_time:Int64?,
    `timestamp`:Int64?,
    price:Int64?,
    ad_tracking_events:Yson?,
>;

$output_struct_type = Struct<
    fielddate:String?,
    ref_from:String?,
    platform:String?,
    channel:String?,
    ad_load:Double?,
    ad_load_wo_leaves:Double?,
    ad_load_per_vtnm:Double?,
    ad_load_wo_leaves_per_vtnm:Double?,
    total_adv_time:Double?
>;

$ad_load_counter = Python::ad_load_counter(
    Callable<(
        Tuple<String?, String?, String?, String?, String?>,
        Stream<$input_struct_type>
    )->Stream<$output_struct_type>>,
    FileContent("ad_load_counter.py")
);

$ad_load_reduce = (
    reduce
        $for_ad_load
    presort `timestamp`
    on user_id, fielddate, ref_from, platform, channel
    using $ad_load_counter(TableRow())
);

$for_ad_load_grouped = (
    select
        fielddate,
        ref_from,
        platform,
        channel,
        COUNT(*) as ad_load_count,
        AVG(ad_load) as ad_load,
        AVG(ad_load_wo_leaves) as ad_load_wo_leaves,
        AVG(ad_load_per_vtnm) as ad_load_per_vtnm,
        AVG(ad_load_wo_leaves_per_vtnm) as ad_load_wo_leaves_per_vtnm,
        SUM(total_adv_time) as total_adv_time
    from $ad_load_reduce
    group by fielddate,
        ref_from,
        platform,
        channel
);

$isAvod = ($license) -> {
    $lower = String::AsciiToLower($license);
    return $license is null or $lower == "avod"
};


$grouped = (
    select
        fielddate,
        ref_from,
        platform,
        channel,
        sum_if(view_time, $isAvod(license)) as tvt_avod,
        (sum_if(price, $isAvod(license)) / 1000000.0) ?? 0 as price_rub_avod,
        CountDistinctEstimate(user_id) as user_ids,
        CountDistinctEstimate(IF(prism_segment == "p4", user_id)) as user_ids_prism_p4,
        CountDistinctEstimate(IF(prism_segment == "p5", user_id)) as user_ids_prism_p5,
        CountDistinctEstimate(IF(view_time >= 30 and prism_segment == "p4", user_id)) as user_ids_30s_prism_p4,
        CountDistinctEstimate(IF(view_time >= 30 and prism_segment == "p5", user_id)) as user_ids_30s_prism_p5,
        SUM_IF(view_time, prism_segment == "p4") ?? 0 as tvt_prism_p4,
        SUM_IF(view_time, prism_segment == "p5") ?? 0 as tvt_prism_p5,
        SUM_IF(price, prism_segment == "p4") / 1000000.0 ?? 0 as price_prism_p4,
        SUM_IF(price, prism_segment == "p5") / 1000000.0 ?? 0 as price_prism_p5,
        CountDistinctEstimate(IF(not fraud, user_id)) as user_ids_not_fraud,
        CountDistinctEstimate(IF(view_time >= 30, user_id, NULL)) as user_ids_30s,
        CountDistinctEstimate(IF(shows_good > 0, user_id, NULL)) as user_ids_ad_show,
        CountDistinctEstimate(IF(price > 0, user_id, NULL)) as user_ids_money,
        SUM(old_ms) ?? 0 as microsessions_with_start_or_money,
        SUM(is_good_session) as good_sessions,
        SUM(shows_good) as ad_shows,
        COUNT(*) as microsessions,
        COUNT_IF(not fraud) as microsessions_not_fraud,
        COUNT_IF(view_time >= 30) as microsessions_30s,
        COUNT_IF(view_time_non_muted >= 30) as microsessions_30s_non_muted,
        (SUM(price) / 1000000.0) ?? 0.0 as price_rub,
        (SUM(overlay_price) / 1000000.0) ?? 0.0 as overlay_price_rub,
        (SUM(motion_price) / 1000000.0) ?? 0.0 as motion_price_rub,
        (SUM(prerolls_price) / 1000000.0) ?? 0.0 as prerolls_price_rub,
        (SUM(midrolls_price) / 1000000.0) ?? 0.0 as midrolls_price_rub,
        SUM(prerolls_count) ?? 0.0 as prerolls_count,
        SUM(midrolls_count) ?? 0.0 as midrolls_count,
        SUM(view_time) ?? 0 as tvt,
        SUM_IF(view_time, not fraud) ?? 0 as tvt_not_fraud,
        SUM(lvt) ?? 0 as lvt,
        SUM(view_time_non_muted) ?? 0 as tvt_non_muted,
        SUM_IF(view_time_non_muted, not fraud) ?? 0 as tvt_non_muted_not_fraud,
        SUM(lvt_non_muted) ?? 0 as lvt_non_muted,
        SUM(lvt_non_muted_2m) ?? 0 as lvt_non_muted_2m,
        CountDistinctEstimate(video_content_id) as unique_content_ids,
        CountDistinctEstimate(page_id) as unique_page_ids,
        (SUM(bytes_sent) / Math::Pow(1024, 3)) ?? 0.0 as gbytes_sent_sessions
    from $totalized
    group by fielddate,
        ref_from,
        platform,
        channel
);

$join_ad_load = (
    select g.*,
        ad_load_count,
        ad_load,
        ad_load_wo_leaves,
        ad_load_per_vtnm,
        ad_load_wo_leaves_per_vtnm,
        total_adv_time
    from $grouped as g
    left join any $for_ad_load_grouped as a using (
        fielddate,
        ref_from,
        platform,
        channel
    )
);

$transformed = select platform ?? "-" as platform, ref_from ?? "UNMAPPED" as ref_from, s.* without platform, ref_from from $join_ad_load as s;

upsert into stat.`Video/Others/Strm/remapped_cube/daily` erase by (fielddate)
select * from $transformed;
