use hahn;
pragma yt.Pool = "@[pool]";
-- pragma yt.DataSizePerJob = "256M";
-- pragma yt.MaxJobCount = "99999";
-- pragma config.flags("LLVM", "OFF");
pragma library("stability_common.sql");
pragma yt.DefaultMemoryLimit = "1G";
import stability_common symbols $parseDate, $shiftDate, $dateRange;
pragma library("lib_humans_0.sql");
pragma library("lib_humans_1.sql");
import lib_humans_0 symbols $get_human_cryptaids;

$date_from = "@[date_from]";
$date_to = "@[date_to]";
$date_from_shifted = $shiftDate($date_from, -7);

$output_table = "@[output_table]";

$source = (
    select
        unwrap(pageid) as pageid,
        unwrap(impid) as impid,
        unwrap(yandexuid) as yandexuid,
        s.* without pageid, impid, yandexuid
    from range(
        `//home/videoquality/vh_analytics/ad_squeeze_campaigns`, $date_from_shifted, $date_to
    ) as s
    where (producttype like "%awaps%" or producttype like "%reach%")
);

$top100_advs = (
    select
        TOP_BY(advertiser_name, price, 100)
    from (
        select
            advertiser_name, sum(price) as price
        from $source
        group by advertiser_name
    )
);

$get_last_10 = ($s) -> {
    $len = Length($s);
    return substring($s, cast($len - 10 as Uint32))
};
$nullify_fresh = ($yu, $fd) -> {
    $last10 = CAST($get_last_10(CAST($yu as String)) as Uint32);
    $fd_parsed = $parseDate($fd);
    $fd_from_yu = DateTime::MakeDate(DateTime::FromSeconds($last10));
    return IF(
        $fd_from_yu is null or $fd_from_yu >= $fd_parsed,
        null,
        $yu
    )
};
$nullify_zero = ($c) -> (IF($c == 0, NULL, $c));
$tmp =(
    select
        fielddate,
        IF(
            ListHas($top100_advs, advertiser_name),
            advertiser_name,
            "Other"
        ) as advertiser_name,
        campaign_name,
        -- IF(
        --     ListHas($top100_advs, advertiser_name),
        --     campaign_name,
        --     "Other"
        -- ) as campaign_name,
        producttype,
        case
        when category == "other" then "other"
        when category like "%VH%" then "vh"
        else "yandex"
        end as category,
        video_type,
        gender ?? "unknown" as gender,
        age_segment ?? "unknown" as age_segment,
        crypta_id,
        $nullify_fresh(yandexuid, fielddate) as yandexuid_non_fresh,
        -- shows_block,
        shows,
        -- winhits_block,
        winhits,
        -- hits_block,
        hits,
        price,
        partnerprice,
        selfmincpm,
        dspmincpm,
        effective_mincpm
        -- partnerprice
    from $source
    where campaign_name is not null
    and advertiser_name is not null
);
-- $top_hosts = (
--     select unwrap(ToSet(ListMap(TOPFREQ(host, 100), ($x)->(unwrap($x.Value)))))
--     from $tmp_
-- );
-- $tmp = (
--     select
--         IF(DictContains($top_hosts, host), host, "other") as host,
--         t.* without host
--     from $tmp_ as t
-- );
$without_crypta_ids = (
    select * from $tmp where crypta_id is null or crypta_id == 0
);
$with_crypta_id = (
    select * from $tmp where crypta_id is not null or crypta_id != 0
);
$lastDateCheck = ($fd, $lad) -> {
    $fd = $parseDate($fd);
    $lad = $parseDate($lad);
    $diff = DateTime::ToDays($fd - $lad);
    return $fd is not null
    and $lad is not null
    and $diff is not null
    and $diff <= 30
};
$tmp_plus_humans = (
    select t.*, cryptaId as crypta_id_human, last_active_date
    from $with_crypta_id as t
    left join $get_human_cryptaids() as h on (t.crypta_id == CAST(h.cryptaId as UInt64))
);
$after_humans_join = (
    select * from $tmp_plus_humans
    union all
    select * from $without_crypta_ids
);
$totalize = ($row) -> {
    $result = AsList($row);
    $add = ListMap(
        $result,
        ($x)->(AddMember(RemoveMember($x, "gender"), "gender", "_total_"))
    );
    $result = ListUnionAll($result, $add);
    $add = ListMap(
        $result,
        ($x)->(AddMember(RemoveMember($x, "age_segment"), "age_segment", "_total_"))
    );
    $result = ListUnionAll($result, $add);
    $add = ListMap(
        $result,
        ($x)->(AddMember(RemoveMember($x, "video_type"), "video_type", "_total_"))
    );
    $result = ListUnionAll($result, $add);
    $add = ListMap(
        $result,
        ($x)->(AddMember(RemoveMember($x, "category"), "category", "_total_"))
    );
    $result = ListUnionAll($result, $add);
    -- $add = ListMap(
    --     $result,
    --     ($x)->(AddMember(RemoveMember($x, "campaign_name"), "campaign_name", "_total_"))
    -- );
    -- $result = ListUnionAll($result, $add);
    $add = ListFlatMap(
        $result,
        ($x)->([
            AddMember(RemoveMember($x, "producttype"), "producttype", "_total_"),
            case
            when $x.producttype like "%awaps%" then AddMember(RemoveMember($x, "producttype"), "producttype", "_awaps_")
            else AddMember(RemoveMember($x, "producttype"), "producttype", "_direct_")
            end
        ])
    );
    $result = ListUnionAll($result, $add);
    return $result
};
$totalized = process $after_humans_join using $totalize(TableRow());
$dim_1day = (
    select t.*, "1day" as window_size
    from $totalized as t
    where fielddate >= $date_from and fielddate <= $date_to
);
$dim_7days = (
    select * from (
        select
            "7days" as window_size,
            ListFilter($dateRange(
                fielddate,
                $shiftDate(fielddate, 7)
            ), ($x)->($x >= $date_from and $x <= $date_to)) as fielddate,
            t.* without t.fielddate
        from $totalized as t
        where fielddate >= $shiftDate($date_from, -7) and fielddate <= $date_to
    )
    flatten list by fielddate
);
-- $dim_30days = (
--     select * from (
--         select
--             "30days" as window_size,
--             ListFilter($dateRange(
--                 fielddate,
--                 $shiftDate(fielddate, 30)
--             ), ($x)->($x >= $date_from and $x <= $date_to)) as fielddate,
--             t.* without t.fielddate
--         from $totalized as t
--         where fielddate >= $shiftDate($date_from, -30) and fielddate <= $date_to
--     )
--     flatten list by fielddate
-- );
$grouping1_source = (
    select * from $dim_1day
    union all
    select * from $dim_7days
    -- union all
    -- select * from $dim_30days
);
$grouped_by_adv_1 = (
    select
        window_size,
        fielddate,
        video_type,
        advertiser_name,
        gender,
        age_segment,
        campaign_name,
        producttype,
        category,
        sum(hits) as hits,
        sum(winhits) as winhits
    from $grouping1_source
    where campaign_name != "_total_"
    group by window_size,
        fielddate,
        video_type,
        advertiser_name,
        gender,
        age_segment,
        campaign_name,
        producttype,
        category
);

$grouped_by_adv_2 = (
    select
        window_size,
        fielddate,
        video_type,
        advertiser_name,
        "_total_" as campaign_name,
        gender,
        age_segment,
        producttype,
        category,
        CountDistinctEstimate(campaign_name) as campaigns,
        CountDistinctEstimate(
            IF(hits >= 10, campaign_name)
        ) as campaigns_with_10_plus_hits,
        CountDistinctEstimate(
            IF(winhits >= 10, campaign_name)
        ) as campaigns_with_10_plus_winhits
    from $grouped_by_adv_1
    group by window_size,
        fielddate,
        video_type,
        advertiser_name,
        gender,
        age_segment,
        producttype,
        category
);
$grouped1 = (
    select
        window_size,
        fielddate,
        video_type,
        advertiser_name,
        gender,
        category,
        producttype,
        age_segment,
        CountDistinctEstimate(crypta_id) as crypta_ids,
        CountDistinctEstimate(IF(
            crypta_id_human is not null and $lastDateCheck(fielddate, last_active_date),
            crypta_id_human
        )) as crypta_ids_human,
        -- CountDistinctEstimate(crypta_id_nonfresh_only) as crypta_ids_nonfresh_only,
        -- CountDistinctEstimate(crypta_id_plus_nonfresh_yu) as crypta_ids_plus_nonfresh_yu,
        CountDistinctEstimate(yandexuid_non_fresh) as yandexuids,
        -- CountDistinctEstimate(content_id) as content_ids,
        -- sum(shows_block) as shows_block,
        sum(shows) as shows,
        -- sum(winhits_block) as winhits_block,
        sum(winhits) as winhits,
        sum(price) as price,
        sum(partnerprice) as partnerprice,
        -- sum(hits_block) as hits_block,
        sum(hits) as hits,
        median(dspmincpm) as dspmincpm,
        median(selfmincpm) as selfmincpm,
        median(effective_mincpm) as effective_mincpm,
    from $grouping1_source
    group by
        window_size,
        fielddate,
        video_type,
        advertiser_name,
        producttype,
        gender,
        category,
        age_segment
);
-- 1,2-3,4-10,10+ показами рекламы
$yandexuids_shows_group = (
    select
        window_size,
        fielddate,
        video_type,
        advertiser_name,
        gender,
        category,
        producttype,
        age_segment,
        CountDistinctEstimate(IF(shows >= 2, yandexuid_non_fresh, null)) as yandexuids_with_2_plus_shows,
        CountDistinctEstimate(IF(shows >= 10, yandexuid_non_fresh, null)) as yandexuids_with_10_plus_shows,
        CountDistinctEstimate(IF(hits >= 2, yandexuid_non_fresh, null)) as yandexuids_with_2_plus_hits,
        CountDistinctEstimate(IF(hits >= 10, yandexuid_non_fresh, null)) as yandexuids_with_10_plus_hits,
    from (
        select
            window_size,
            fielddate,
            video_type,
            advertiser_name,
            gender,
            category,
            producttype,
            age_segment,
            yandexuid_non_fresh,
            sum(shows) as shows,
            sum(hits) as hits
        from $grouping1_source
        group by
            window_size,
            fielddate,
            video_type,
            advertiser_name,
            gender,
            category,
            producttype,
            age_segment,
            yandexuid_non_fresh
    )
    group by
        window_size,
        fielddate,
        video_type,
        advertiser_name,
        gender,
        category,
        producttype,
        age_segment
);
$crypta_ids_shows_group = (
    select
        window_size,
        fielddate,
        video_type,
        advertiser_name,
        gender,
        category,
        producttype,
        age_segment,
        CountDistinctEstimate(IF(shows >= 2, crypta_id_human, null)) as crypta_ids_with_2_plus_shows,
        CountDistinctEstimate(IF(shows >= 10, crypta_id_human, null)) as crypta_ids_with_10_plus_shows,
        CountDistinctEstimate(IF(hits >= 2, crypta_id_human, null)) as crypta_ids_with_2_plus_hits,
        CountDistinctEstimate(IF(hits >= 10, crypta_id_human, null)) as crypta_ids_with_10_plus_hits,
    from (
        select
            window_size,
            fielddate,
            video_type,
            advertiser_name,
            gender,
            category,
            producttype,
            age_segment,
            crypta_id_human,
            sum(shows) as shows,
            sum(hits) as hits
        from $grouping1_source
        group by
            window_size,
            fielddate,
            video_type,
            advertiser_name,
            gender,
            category,
            producttype,
            age_segment,
            crypta_id_human
    )
    group by
        window_size,
        fielddate,
        video_type,
        advertiser_name,
        gender,
        category,
        producttype,
        age_segment
);
$grouped_join = (
    select g.*,
    yandexuids_with_2_plus_shows ?? 0 as yandexuids_with_2_plus_shows,
    crypta_ids_with_2_plus_shows ?? 0 as crypta_ids_with_2_plus_shows,
    yandexuids_with_10_plus_shows ?? 0 as yandexuids_with_10_plus_shows,
    crypta_ids_with_10_plus_shows ?? 0 as crypta_ids_with_10_plus_shows,
    yandexuids_with_2_plus_hits ?? 0 as yandexuids_with_2_plus_hits,
    crypta_ids_with_2_plus_hits ?? 0 as crypta_ids_with_2_plus_hits,
    yandexuids_with_10_plus_hits ?? 0 as yandexuids_with_10_plus_hits,
    crypta_ids_with_10_plus_hits ?? 0 as crypta_ids_with_10_plus_hits,
    campaigns,
    campaigns_with_10_plus_hits
    from $grouped1 as g
    left join $yandexuids_shows_group as y on (
        g.window_size == y.window_size and
        g.fielddate == y.fielddate and
        g.video_type == y.video_type and
        g.advertiser_name == y.advertiser_name and
        g.gender == y.gender and
        g.age_segment == y.age_segment and
        g.category == y.category and
        g.producttype == y.producttype
    )
    left join $crypta_ids_shows_group as c on (
        g.window_size == c.window_size and
        g.fielddate == c.fielddate and
        g.video_type == c.video_type and
        g.advertiser_name == c.advertiser_name and
        g.gender == c.gender and
        g.age_segment == c.age_segment and
        g.category == c.category and
        g.producttype == c.producttype
    )
    left join $grouped_by_adv_2 as ga on (
        g.window_size == ga.window_size and
        g.fielddate == ga.fielddate and
        g.video_type == ga.video_type and
        g.advertiser_name == ga.advertiser_name and
        g.gender == ga.gender and
        g.age_segment == ga.age_segment and
        g.category == ga.category and
        g.producttype == ga.producttype
    )
);
$unwrap_dimensions = (
    select
        unwrap(window_size) as window_size,
        unwrap(fielddate) as fielddate,
        unwrap(video_type) as video_type,
        unwrap(advertiser_name) as advertiser_name,
        unwrap(gender) as gender,
        unwrap(category) as category,
        unwrap(age_segment) as age_segment,
        unwrap(producttype) as producttype,
        g.* without window_size,
        fielddate,
        video_type,
        advertiser_name,
        category,
        gender,
        age_segment,
        producttype
    from $grouped_join as g
    where fielddate is not null
    and video_type is not null
    and advertiser_name is not null
    and gender is not null
    and age_segment is not null
    and category is not null
    and producttype is not null
);

insert into $output_table with truncate
select * from $unwrap_dimensions;

/*upsert
upsert into stat.`@[report]/daily`
select * from $unwrap_dimensions;
upsert*/