use hahn;
pragma yt.Pool = "@[pool]";
pragma yt.DataSizePerJob = "256M";
pragma yt.MaxJobCount = "99999";
pragma config.flags("LLVM", "OFF");
pragma library("stability_common.sql");
import stability_common symbols $parseDate, $shiftDate, $dateRange;
pragma library("lib_humans_0.sql");
pragma library("lib_humans_1.sql");
import lib_humans_0 symbols $get_human_cryptaids;

$date_from = "@[date_from]";
$date_to = "@[date_to]";
$date_from_shifted = $shiftDate($date_from, -7);
$output_table = "@[output_table]";


$outstream = (
    select pageid, impid
    from `//home/bs/users/yabs-analytics/video/pageimp_ids_video_with_inapp`
    where video_placement_name != "instream"
    group by pageid, impid
);

$source = (
    select
        unwrap(pageid) as pageid,
        unwrap(impid) as impid,
        unwrap(yandexuid) as yandexuid,
        category as category_old,
        case
        when category == "other" then partner_name
        when category like "%VH%" then category
        else "yandex"
        end as category,
        s.* without s.pageid, s.impid, s.yandexuid, s.category
    from range(
        `@[input_root]`, $date_from_shifted, $date_to
    ) as s
    left semi join $outstream as o using (pageid, impid)
);

-- select * from $source limit 50

$top100_advs = (
    select
        TOP_BY(advertiser_name, price, 100)
    from (
        select
            advertiser_name, sum(price) as price
        from $source
        group by advertiser_name
    )
);

$top100_categories = (
    select
        TOP_BY(category, price, 100)
    from (
        select
            category, sum(price) as price
        from $source
        group by category
    )
);



$get_last_10 = ($s) -> {
    $len = Length($s);
    return substring($s, cast($len - 10 as Uint32))
};
$nullify_fresh = ($yu, $fd) -> {
    $last10 = CAST($get_last_10(CAST($yu as String)) as Uint32);
    $fd_parsed = $parseDate($fd);
    $fd_from_yu = DateTime::MakeDate(DateTime::FromSeconds($last10));
    return IF(
        $fd_from_yu is null or $fd_from_yu >= $fd_parsed,
        null,
        $yu
    )
};
$nullify_zero = ($c) -> (IF($c == 0, NULL, $c));
$tmp =(
    select
        fielddate,
        IF(
            ListHas($top100_advs, advertiser_name),
            advertiser_name,
            "Other"
        ) as advertiser_name,
        campaign_name,
        -- IF(
        --     ListHas($top100_advs, advertiser_name),
        --     campaign_name,
        --     "Other"
        -- ) as campaign_name,
        producttype,
        IF(
            ListHas($top100_categories, category) or category like "VH%" or category == "yandex",
            category,
            "other"
        ) as category,
        video_type,
        gender ?? "unknown" as gender,
        age_segment ?? "unknown" as age_segment,
        crypta_id,
        $nullify_fresh(yandexuid, fielddate) as yandexuid_non_fresh,
        -- shows_block,
        shows,
        -- winhits_block,
        winhits,
        -- hits_block,
        hits,
        price,
        partnerprice,
        selfmincpm,
        dspmincpm,
        effective_mincpm
        -- partnerprice
    from $source
    where campaign_name is not null
    and advertiser_name is not null
);
-- $top_hosts = (
--     select unwrap(ToSet(ListMap(TOPFREQ(host, 100), ($x)->(unwrap($x.Value)))))
--     from $tmp_
-- );
-- $tmp = (
--     select
--         IF(DictContains($top_hosts, host), host, "other") as host,
--         t.* without host
--     from $tmp_ as t
-- );
$without_crypta_ids = (
    select * from $tmp where crypta_id is null or crypta_id == 0
);
$with_crypta_id = (
    select * from $tmp where crypta_id is not null or crypta_id != 0
);
$lastDateCheck = ($fd, $lad) -> {
    $fd = $parseDate($fd);
    $lad = $parseDate($lad);
    $diff = DateTime::ToDays($fd - $lad);
    return $fd is not null
    and $lad is not null
    and $diff is not null
    and $diff <= 30
};
$tmp_plus_humans = (
    select t.*, cryptaId as crypta_id_human, last_active_date
    from $with_crypta_id as t
    left join $get_human_cryptaids() as h on (t.crypta_id == CAST(h.cryptaId as UInt64))
);
$after_humans_join = (
    select * from $tmp_plus_humans
    union all
    select * from $without_crypta_ids
);
$totalize = ($row) -> {
    $result = AsList($row);
    -- $add = ListMap(
    --     $result,
    --     ($x)->(AddMember(RemoveMember($x, "gender"), "gender", "_total_"))
    -- );
    -- $result = ListUnionAll($result, $add);
    -- $add = ListMap(
    --     $result,
    --     ($x)->(AddMember(RemoveMember($x, "age_segment"), "age_segment", "_total_"))
    -- );
    -- $result = ListUnionAll($result, $add);
    $add = ListMap(
        $result,
        ($x)->(AddMember(RemoveMember($x, "advertiser_name"), "advertiser_name", "_total_"))
    );
    $result = ListUnionAll($result, $add);
    $add = ListMap(
        $result,
        ($x)->(AddMember(RemoveMember($x, "video_type"), "video_type", "_total_"))
    );
    $result = ListUnionAll($result, $add);
    $add = ListMap(
        $result,
        ($x)->(AddMember(RemoveMember($x, "category"), "category", "_total_"))
    );
    $result = ListUnionAll($result, $add);
    -- $add = ListMap(
    --     $result,
    --     ($x)->(AddMember(RemoveMember($x, "campaign_name"), "campaign_name", "_total_"))
    -- );
    -- $result = ListUnionAll($result, $add);
    $add = ListMap(
        $result,
        ($x)->(AddMember(RemoveMember($x, "producttype"), "producttype", "_total_"))
    );
    $result = ListUnionAll($result, $add);
    return $result
};
$totalized = process $after_humans_join using $totalize(TableRow());
$dim_1day = (
    select t.*, "1day" as window_size
    from $totalized as t
    where fielddate >= $date_from and fielddate <= $date_to
);
$dim_7days = (
    select * from (
        select
            "7days" as window_size,
            ListFilter($dateRange(
                fielddate,
                $shiftDate(fielddate, 7)
            ), ($x)->($x >= $date_from and $x <= $date_to)) as fielddate,
            t.* without t.fielddate
        from $totalized as t
        where fielddate >= $shiftDate($date_from, -7) and fielddate <= $date_to
    )
    flatten list by fielddate
);


$grouping1_source = (
    select * from $dim_1day
    union all
    select * from $dim_7days
    -- union all
    -- select * from $dim_30days
);

$grouped1 = (
    select
        window_size,
        fielddate,
        video_type,
        advertiser_name,
        -- gender,
        category,
        producttype,
        -- age_segment,
        CountDistinctEstimate(crypta_id) as crypta_ids,
        CountDistinctEstimate(IF(
            crypta_id_human is not null and $lastDateCheck(fielddate, last_active_date),
            crypta_id_human
        )) as crypta_ids_human,
        -- CountDistinctEstimate(crypta_id_nonfresh_only) as crypta_ids_nonfresh_only,
        -- CountDistinctEstimate(crypta_id_plus_nonfresh_yu) as crypta_ids_plus_nonfresh_yu,
        CountDistinctEstimate(yandexuid_non_fresh) as yandexuids,
        -- CountDistinctEstimate(content_id) as content_ids,
        -- sum(shows_block) as shows_block,
        sum(shows) as shows,
        -- sum(winhits_block) as winhits_block,
        sum(winhits) as winhits,
        sum(price) as price,
        sum(partnerprice) as partnerprice,
        -- sum(hits_block) as hits_block,
        IF(advertiser_name == "_total_", sum(hits), null) as hits,
        -- median(dspmincpm) as dspmincpm,
        -- median(selfmincpm) as selfmincpm,
        -- median(effective_mincpm) as effective_mincpm,
    from $grouping1_source
    group by
        window_size,
        fielddate,
        video_type,
        advertiser_name,
        producttype,
        -- gender,
        category
        -- age_segment
);

insert into $output_table with truncate
select * from $grouped1 where winhits >= 1000;
