use hahn;
PRAGMA File('bigb.so', 'https://proxy.sandbox.yandex-team.ru/last/BIGB_UDF?attrs={"released":"stable"}&cache=1235435');
PRAGMA udf('bigb.so');
pragma AnsiInForEmptyOrNullableItemsCollections;
pragma yt.Pool = "@[pool]";

$date = "@[date]";
$chevent_table = "//cooked_logs/bs-chevent-cooked-log/1d/" || $date;
$phrase_targeting_table = "//home/videoquality/vh_analytics/targeting/" || $date;
$phrase_stat_table = "//home/videoquality/vh_analytics/phrases_stat/" || $date;

$pages_noselect_grouped_join_domains_table = "//home/videoquality/vh_analytics/cpmadv-122/noselect_pages/" || $date;
$pages_zero_urlmd5_grouped_join_domains_table = "//home/videoquality/vh_analytics/cpmadv-122/zero_urlmd5_pages/" || $date;
$categories_report_table = "//home/videoquality/vh_analytics/cpmadv-122/categories_report/" || $date;

$pages_noselect = (select distinct PageID from `home/yabs/dict/Page` where (PageSelect & (1 << 3)) = 8);

$chevent = (
    select
        pageid,
        impid,
        producttype,
        CASE
        when producttype not like "%video%" then "media"
        when (
            impressionoptions_preroll
            or impressionoptions_midroll
            or impressionoptions_postroll
            or impressionoptions_pauseroll
        ) then "video_instream"
        when impressionoptions_inpage then "video_inpage"
        else "video_other"
        end as ad_type,
        eventcost,
        countertype,
        urlmd5,
        bannerid,
        phraseid,
        orderid,
        clientid
    from $chevent_table
    where producttype like "%reach%" and fraudbits = 0
);

$perc = ($x, $y) -> (Math::Round(cast($x as Double) / cast($y as Double) * 100.0, -2));

$by_ad_type = (
    select ad_type, count_if(countertype = 1) as shows_by_ad_type
    from $chevent
    group by ad_type
);

$pages_noselect_grouped = (
    select
        pageid, ad_type, COUNT_IF(countertype = 1) as shows, Math::Round(sum(eventcost / 1000000.0 / 1.18 * 30.0)) as eventcost_rub
    from $chevent
    where pageid in $pages_noselect
    group by pageid, ad_type
);

$pages_noselect_grouped_join_domains = (
    select
        g.*, $date as fielddate, (cast(pageid as string) ?? "") || " " || (Name ?? "-") as page_name, $perc(shows, shows_by_ad_type) as show_rate
    from $pages_noselect_grouped as g
    left join any `home/yabs/dict/Page` as p on (g.pageid = p.PageID)
    left join any $by_ad_type as at on (g.ad_type = at.ad_type)
);

select * from $pages_noselect_grouped_join_domains
order by shows desc
into result `Пейджи с (PageSelect & (1 << 3)) != 8`;

insert into $pages_noselect_grouped_join_domains_table with truncate
select * from $pages_noselect_grouped_join_domains
order by shows desc
limit 200;

upsert into stat.`Video/Others/CPMADV-122/noselect_pages/daily` erase by (fielddate)
select * from $pages_noselect_grouped_join_domains
order by shows desc
limit 200;

$pages_zero_urlmd5_grouped = (
    select
        pageid, ad_type, COUNT_IF(countertype = 1) as shows, COUNT_IF(countertype = 1 and urlmd5 = 0) as shows_zero_urlmd5, sum(eventcost / 1000000.0 / 1.18 * 30.0) as eventcost_rub
    from $chevent
    where pageid not in $pages_noselect
    group by pageid, ad_type
);

$pages_zero_urlmd5_grouped_join_domains = (
    select
        g.*,
        $date as fielddate,
        (cast(pageid as string) ?? "") || " " || (Name ?? "-") as page_name,
        $perc(shows_zero_urlmd5, shows) as zero_urlmd5_shows_perc,
        $perc(shows, shows_by_ad_type) as show_rate
    from (select * from $pages_zero_urlmd5_grouped where shows_zero_urlmd5 > 0) as g
    inner join any (
        select * from `home/yabs/dict/Page` --where not OptionsYandexPage and not OptionsBusinessUnit
    ) as p on (g.pageid = p.PageID)
    left join any $by_ad_type as at on (g.ad_type = at.ad_type)
);

select * from $pages_zero_urlmd5_grouped_join_domains
order by shows_zero_urlmd5 desc
into result `Пейджи с нулевым urlmd5`;

insert into $pages_zero_urlmd5_grouped_join_domains_table with truncate
select * from $pages_zero_urlmd5_grouped_join_domains
order by shows_zero_urlmd5 desc;

upsert into stat.`Video/Others/CPMADV-122/zero_urlmd5_pages/daily` ERASE BY (fielddate)
select * from $pages_zero_urlmd5_grouped_join_domains
order by shows_zero_urlmd5 desc
limit 200;

$shows_pages_noselect = select sum(shows) from $pages_noselect_grouped;
$shows_zero_urlmd5 = select sum(shows_zero_urlmd5) from $pages_zero_urlmd5_grouped;
$shows_total = select COUNT_IF(countertype = 1) from $chevent;

$monitoring = (
    select $date as fielddate,
        $perc($shows_pages_noselect, $shows_total) as no_category_targeting_perc,
    $perc($shows_zero_urlmd5, $shows_total) as zero_urlmd5_perc
);

select * from $monitoring 
into result `Мониторинг по отсутсвию кат. таргентинга и нулевому urlmd5`;

upsert into stat.`Video/Others/CPMADV-122/monitoring/daily` ERASE BY (fielddate)
select * from $monitoring;


$phrase_grouped = (
    select
        phraseid,
        ad_type,
        producttype,
        some(orderid) as orderid,
        some(clientid) as clientid,
        count_if(countertype = 1) as shows,
        Math::Round(sum(eventcost / 1000000.0 / 1.18 * 30.0)) as eventcost_rub
    from $chevent
    group by phraseid, ad_type, producttype
);

$pids_with_categories_ = (
    select
        pid,
        ListExtend(content_category_names, ["_total_"]) as categories
    from $phrase_targeting_table
    where ListLength(content_category_names) > 0
);

$pids_with_categories = (
    select p.*, PhraseID
    from $pids_with_categories_ as p
    inner join (
        select pid, PhraseID
        from $phrase_stat_table
        group by GroupExportID as pid, PhraseID
    ) as ph using (pid)
);

$caesar = '//home/bs/logs/AdsCaesarOrdersFullDump/latest';

$order_info = (
    select
    IF(
        Bigb::ParseOrderProfile(TableRow()).Resources.DirectBannersLogFields.OrderType in (1, 8),
        "commerce", "non-commerce"
    ) as order_type, 
    OrderID from $caesar
);

$join_commerce_and_categories = (
    select
        categories,
        order_type,
        p.*
    from $phrase_grouped as p
    inner join any $pids_with_categories as pc on (p.phraseid = pc.PhraseID)
    left join any $order_info as oi on (p.orderid = oi.OrderID)
);

$totalize = ($row) -> {
    $result = [$row];
    $add = ListMap($result, ($x)->(AddMember(RemoveMember($x, "producttype"), "producttype", "_total_")));
    $result = ListUnionAll($result, $add);
    $add = ListMap($result, ($x)->(AddMember(RemoveMember($x, "ad_type"), "ad_type", "_total_")));
    $result = ListUnionAll($result, $add);
    $add = ListMap($result, ($x)->(AddMember(RemoveMember($x, "order_type"), "order_type", "_total_")));
    $result = ListUnionAll($result, $add);
    return $result
};

$totalized = (
    select * from (process $join_commerce_and_categories using $totalize(TableRow())) flatten list by categories
);

$grouped = (
    select
        $date as fielddate,
        producttype,
        ad_type,
        order_type,
        category,
        sum(shows) as shows,
        sum(eventcost_rub) as eventcost_rub,
        CountDistinctEstimate(orderid) as campaigns,
        CountDistinctEstimate(clientid) as clients,
        CountDistinctEstimate(phraseid) as pids,
    from $totalized
    group by producttype ?? "-" as producttype,
        ad_type ?? "-" as ad_type,
        order_type ?? "-" as order_type,
        categories ?? "-" as category
);

insert into $categories_report_table with truncate
select * from $grouped;

upsert into stat.`Video/Others/CPMADV-122/categories_money_report/daily` ERASE BY (fielddate)
select * from $grouped;
