use hahn;
pragma AnsiInForEmptyOrNullableItemsCollections;
pragma yt.PoolTrees = "physical";
pragma yt.TentativePoolTrees = "cloud";

$root = "//home/bs/logs/JoinedEFH/1h";
$range_start = $date || "T00:00:00";
$range_end = $date || "T23:30:00";

$parseBSCategories = ($brandsafetycategories, $options) -> {
    return case
    when $options like "%unknown-brand-safety-categories%" then ["all"]
    when $brandsafetycategories like "%,%" then String::SplitToList($brandsafetycategories, ",")
    else null
    end
};



$getHost = ($url) -> {
    return Url::GetDomain($url, 2)
};


$inapp = "//home/bs/users/yabs-analytics/video/pageimp_ids_video_with_inapp";
$good_page_imp_  = (
    select
        PageID as pageid,
        ImpID as impid,
    from `home/yabs/dict/PageImp`
    where (
        AdTypeVideo or AdTypeMedia or AdTypeVideoNonSkippable
    )
);
$good_page_imp = (
    SELECT
        IF(
            i.category like "%VH%", i.category, p.Name
        ) as category,
        case
        when i.video_placement_name = "instream" then "instream"
        when i.video_placement_name is not null then "outstream"
        else "media"
        end as stream_type,
        case
        when i.category like "%VH%" then "vh"
        when p.OptionsYandexPage or p.OptionsBusinessUnit then "yandex"
        else "external"
        end as page_type,
        g.*
    from $good_page_imp_ as g
    left join any $inapp as i on (g.pageid = i.pageid and g.impid = i.impid)
    left join any `home/yabs/dict/Page` as p on (g.pageid = p.PageID)
);

$tmp = (
    select QueryArgs, Dsv::Parse("?" || QueryArgs, "&")["931"] as q931,
    BrandSafetyCategories,
    $parseBSCategories(BrandSafetyCategories, HitOptions) as bs_tags_parsed,
    PageID as pageid,
    ImpID as impid,
    $getHost(Referer) as host,
    IF(ProductType like "%video%", "video", "media") as producttype,
    from range($root, $range_start, $range_end)
    where (ProductType like "%video-creative-reach%" or ProductType like "%media-creative-reach%")
    and CounterType = 1
    and FraudBits = 0
);

$join = (
    select
        t.*, IF(
            producttype = "media", "media",
            stream_type
        ) as stream_type,
        page_type,
        category
    from $tmp as t
    inner join any $good_page_imp as g using (pageid, impid)
);

$getCat = ($bs) -> (case
when $bs is null then "null"
when $bs = ["all"] then "all"
else cast(ListLength($bs) as String)
end);

$map1 = (
    select
        producttype,
        [
            [stream_type],
            [stream_type, page_type],
            [stream_type, page_type, category],
        ] as identificator,
        $getCat(bs_tags_parsed) as bs_cat
    from $join
);

$gr = (
    select
        producttype,
        ListMap(identificator, ($x)->(unwrap($x))) as identificator,
        count(*) as `count`,
        count_if(bs_cat in ("null", "0")) as bs_tags_ok,
        count_if(bs_cat = "all") as bs_tags_all,
        count_if(bs_cat not in ("null", "0", "all")) as bs_tags_some,
    from $map1
    flatten list by identificator
    group by identificator, producttype
);

$gr12 = (
    select * from $gr where ListLength(identificator) < 3
);

$gr3 = (
    select ListTake(identificator, 2) as ident2, s.* from $gr as s where ListLength(identificator) = 3
);

$gr3a = (
    select
        p.`count` as parent_count,
        g.* without g.ident2
    from $gr3 as g
    left join any $gr12 as p on (g.ident2 = p.identificator and g.producttype = p.producttype)
);

$reunite = (
    select * from $gr3a
    union all
    select * from $gr12
);

$for_push = (
    select
        $date as fielddate,
        s.*
    from $reunite as s
    where "vh" in identificator or `count` >= 10000
);

upsert into stat.`Video/Others/Strm/MMA-5298-tree-shows/daily` ERASE BY (fielddate)
select * from $for_push;
