use hahn;
pragma yson.DisableStrict;
pragma yt.Pool = "@[pool]";

$date_from = "@[date_from]";
$date_to = "@[date_to]";
$output_root = "@[output_root]";

$tmp_table = $output_root || "/tmp/" || $date_from || "_" || $date_to;
$out_table = $output_root || "/stat/" || $date_from || "_" || $date_to;

$parseDate = ($x)->(DateTime::MakeDate(DateTime::Parse("%Y-%m-%d")($x)));
$formatDate = DateTime::Format("%Y-%m-%d");
$shiftDate = ($date, $days) -> ($formatDate(
    $parseDate($date) + DateTime::IntervalFromDays(cast($days as Int16))
));

$date_from_for_shift = $shiftDate($date_from, -1);

-- $channelFromChain = ($chain) -> {
--     $chain = Yson::ConvertToList($chain);
--     $channels = ListFilter(ListReverse($chain), ($x)->(Yson::LookupInt64($x, "ContentTypeID") == 2));
--     return IF(
--         ListLength($channels) == 0,
--         null,
--         unwrap(Yson::LookupString(unwrap($channels[0]), "Name"))
--     )
-- };

$dictToTuple = ($dict) -> {
    $dict = Yson::ConvertToDict($dict);
    $key = DictKeys($dict)[0];
    return AsTuple($key, unwrap(Yson::ConvertToString(unwrap($dict[$key]))))
};

$convertCgi = ($ysonlist) -> {
    $dicts = Yson::ConvertToList($ysonlist);
    $tuples = ListMap($dicts, $dictToTuple);
    return ToDict($tuples)
};

$yandexuidFromHeaders = ($headers) -> (
    Dsv::Parse(Yson::ConvertToString($headers["cookie"]) ?? "", "; ")["yandexuid"]
);

$parseUuids = ($uuids) -> {
    $uuids = ListMap(Yson::ConvertToList($uuids), Yson::ConvertToString);
    return ListMap(
        ListEnumerate($uuids),
        ($x)->(AsStruct(
            $x.0 + 1 as position,
            $x.1 as vcid
        ))
    )
};


$emptyWrap = ($x)->(IF($x == "", null, $x));

$preparse = (
    SELECT
        TableName() as fielddate,
        $yandexuidFromHeaders(headers) as yandexuid,
        $convertCgi(cgi)["carousel_id"] as carousel_id,
        $convertCgi(cgi)["reqid"] ?? $emptyWrap(user_reqid) ?? ($yandexuidFromHeaders(headers) || "_" || cast(`timestamp` as String)) as user_reqid,
        $parseUuids(uuids) as uuids_list
    from range(
        `//logs/vh-apphost-logs/1d`, $date_from_for_shift, $date_to
    )
    WHERE url == "public/carousel_videohub.json"
    and $emptyWrap($convertCgi(cgi)["carousel_id"]) is not null
    and $convertCgi(cgi)["from"] == "morda"
    and $yandexuidFromHeaders(headers) is not null
);

$flatten = (
    select
        fielddate,
        yandexuid,
        carousel_id,
        user_reqid,
        uuids_list.position as position,
        uuids_list.vcid as vcid
    from $preparse
    flatten list by uuids_list
);

$channels_from_ib = (
    select
        JoinKey,
        IF(
            computed_channel like 'UGC.%' or computed_channel like 'Youtube.%',
            computed_channel,
            ($emptyWrap(computed_channel) ?? "empty") || "." || ($emptyWrap(Name) ?? "empty")
        ) as channel
    from `//home/videolog/strm_meta/iron_branch/concat`
);

insert into @channels_from_ib with truncate
select * from $channels_from_ib
order by JoinKey;

insert into @flatten with truncate
select * from $flatten
order by vcid;
commit;

$joined = (
    select
        s.*, channel
    from @flatten as s
    left join any @channels_from_ib as ch on (s.vcid == ch.JoinKey)
);

insert into @joined with truncate
select * from $joined;
commit;

$regrouped = (
    select
        fielddate,
        yandexuid,
        user_reqid,
        carousel_id,
        ListTake(ListSort(AGGREGATE_LIST(
            AsStruct(
                position as position,
                vcid as vcid,
                channel as channel
            )
        ), ($x)->($x.position)), 50) as uuids_list
    from @joined
    group by fielddate, yandexuid, user_reqid, carousel_id
);

$for_same_recoms = (
    select
        fielddate,
        yandexuid,
        carousel_id,
        ListTake(ListSort(AGGREGATE_LIST_DISTINCT(vcid)), 50) as vcids
    from @joined
    group by fielddate, yandexuid, carousel_id
);

$for_same_recoms_shifted = (
    select
        $shiftDate(fielddate, 1) as fielddate,
        f.* without f.fielddate
    from $for_same_recoms as f
);

$for_same_recoms_joined = (
    select
        f.*, y.vcids as vcids_yesterday
    from $for_same_recoms as f
    left join $for_same_recoms_shifted as y using (fielddate, yandexuid, carousel_id)
);

$countSameness = ($today, $yesterday) -> {
    $intersection = SetIntersection(ToSet($today), ToSet($yesterday));
    return ListLength(DictKeys($intersection)) / cast(ListLength($today) as Double)
};

$for_same_recoms_same_share = (
    select
        f.*, $countSameness(vcids, vcids_yesterday) as same_vcids_share
    from $for_same_recoms_joined as f
    where vcids_yesterday is not null
    and ListLength(vcids) > 0
    and ListLength(vcids_yesterday) > 0
);

$for_same_recoms_grouped = (
    select
        fielddate,
        carousel_id,
        count(*) as yesterday_yandexuid_count,
        avg(same_vcids_share) as same_vcids_share_avg,
        percentile(same_vcids_share, 0.5) as same_vcids_share_p50,
        percentile(same_vcids_share, 0.75) as same_vcids_share_p75,
        percentile(same_vcids_share, 0.9) as same_vcids_share_p90
    from $for_same_recoms_same_share
    group by fielddate, carousel_id
);

-- select fielddate, yandexuid, user_reqid, count(*) as `count`
-- from $joined
-- group by fielddate, yandexuid, user_reqid
-- order by `count` desc
-- limit 20;

$makeCounter = ($list) -> {
    $uniq = ListUniq($list);
    $with_counts = ListMap(
        $uniq,
        ($x)->(AsTuple($x, ListLength(ListFilter($list, ($y)->($y == $x)))))
    );
    return ListSortDesc($with_counts, ($x)->($x.1))
};

$dominatingChannel = ($uuids_list) -> {
    $channelCounter = $makeCounter(ListMap($uuids_list, ($x)->($x.channel)));
    return IF(
        $channelCounter[0].1 / cast(ListLength($uuids_list) as Double) > 0.5,
        1,
        0
    )
};

$dominatingUuid = ($uuids_list) -> {
    $top10 = ListFilter($uuids_list, ($x)->($x.position <= 10));
    $uniq = ListUniq(ListMap($top10, ($x)->($x.vcid)));
    return IF(
        ListLength($uniq) == 1,
        1,
        0
    )
};

$uniqChannelsInTop10 = ($uuids_list) -> {
    $top10 = ListFilter($uuids_list, ($x)->($x.position <= 10));
    $uniq = ListUniq(ListMap($top10, ($x)->($x.channel)));
    return ListLength($uniq) ?? 0
};

$ottInTop10 = ($uuids_list) -> {
    $top10 = ListFilter($uuids_list, ($x)->($x.position <= 10));
    $ott = ListFilter($top10, ($x)->($x.channel like "ott.%" or $x.channel like "# VOD.%"));
    return ListLength($ott) ?? 0
};

$add_bads = (
    select
        s.*,
        $dominatingChannel(uuids_list) as dominating_channel,
        $dominatingUuid(uuids_list) as dominating_uuid,
        $uniqChannelsInTop10(uuids_list) as uniq_channels_in_top10,
        $ottInTop10(uuids_list) as ott_in_top10
    from $regrouped as s
    where fielddate > $date_from_for_shift
);

$pregroup_uniq = (
    select fielddate, yandexuid, carousel_id,
        MIN(uniq_channels_in_top10) as uniq_channels_in_top10_min,
        SUM(ott_in_top10) / CAST(COUNT(*) * 10 as Double) as ott_in_top10_share
    from $add_bads
    group by fielddate, yandexuid, carousel_id
);

$for_stat = (
    select
        fielddate, carousel_id,
        CountDistinctEstimate(yandexuid) as users_total,
        CountDistinctEstimate(IF(dominating_channel == 1, yandexuid, null)) ?? 0 as users_with_dominating_channel,
        CountDistinctEstimate(IF(dominating_uuid == 1, yandexuid, null)) ?? 0 as users_with_dominating_uuid
    from $add_bads
    group by fielddate, carousel_id
);

$for_stat_2 = (
    select fielddate, carousel_id,
        AVG(ott_in_top10_share) as ott_in_top10_share_avg,
        CountDistinctEstimate(IF(uniq_channels_in_top10_min == 1, yandexuid, null)) as p_01_channels_in_top10,
        CountDistinctEstimate(IF(uniq_channels_in_top10_min == 2, yandexuid, null)) as p_02_channels_in_top10,
        CountDistinctEstimate(IF(uniq_channels_in_top10_min == 3, yandexuid, null)) as p_03_channels_in_top10,
        CountDistinctEstimate(IF(uniq_channels_in_top10_min == 4, yandexuid, null)) as p_04_channels_in_top10,
        CountDistinctEstimate(IF(uniq_channels_in_top10_min == 5, yandexuid, null)) as p_05_channels_in_top10,
        CountDistinctEstimate(IF(uniq_channels_in_top10_min == 6, yandexuid, null)) as p_06_channels_in_top10,
        CountDistinctEstimate(IF(uniq_channels_in_top10_min == 7, yandexuid, null)) as p_07_channels_in_top10,
        CountDistinctEstimate(IF(uniq_channels_in_top10_min == 8, yandexuid, null)) as p_08_channels_in_top10,
        CountDistinctEstimate(IF(uniq_channels_in_top10_min == 9, yandexuid, null)) as p_09_channels_in_top10,
        CountDistinctEstimate(IF(uniq_channels_in_top10_min == 10, yandexuid, null)) as p_10_channels_in_top10
    from $pregroup_uniq
    group by fielddate, carousel_id
);

$for_stat_joined = (
    select f1.*,
        p_01_channels_in_top10 ?? 0 as p_01_channels_in_top10,
        p_02_channels_in_top10 ?? 0 as p_02_channels_in_top10,
        p_03_channels_in_top10 ?? 0 as p_03_channels_in_top10,
        p_04_channels_in_top10 ?? 0 as p_04_channels_in_top10,
        p_05_channels_in_top10 ?? 0 as p_05_channels_in_top10,
        p_06_channels_in_top10 ?? 0 as p_06_channels_in_top10,
        p_07_channels_in_top10 ?? 0 as p_07_channels_in_top10,
        p_08_channels_in_top10 ?? 0 as p_08_channels_in_top10,
        p_09_channels_in_top10 ?? 0 as p_09_channels_in_top10,
        p_10_channels_in_top10 ?? 0 as p_10_channels_in_top10,
        ott_in_top10_share_avg,
        yesterday_yandexuid_count,
        same_vcids_share_avg,
        same_vcids_share_p50,
        same_vcids_share_p75,
        same_vcids_share_p90,
    from $for_stat as f1
    left join $for_stat_2 as f2 on (
        f1.fielddate == f2.fielddate
        and f1.carousel_id == f2.carousel_id
    )
    left join $for_same_recoms_grouped as f3 on (
        f1.fielddate == f3.fielddate
        and f1.carousel_id == f3.carousel_id
    )
);

insert into $tmp_table WITH TRUNCATE
SELECT * from $add_bads;

INSERT INTO $out_table WITH TRUNCATE
SELECT * from $for_stat_joined;
