use hahn;
pragma AnsiInForEmptyOrNullableItemsCollections;

$input_table = "//logs/bs-hit-log/1d/" || $date;
$tags_data_internal = "//home/video-hosting/adv_tags/tags_data";
$tags_data_external = "//home/video-hosting/adv_tags/tags_data_external";
$inapp = "//home/bs/users/yabs-analytics/video/pageimp_ids_video_with_inapp";
$good_page_imp = (
    select pageid, impid, IF(some(video_placement_name) = "instream", "instream", "outstream") as stream_type, IF(some(category) like "%VH%", "vh", "external") as page_type
    from (
        select
            cast(pageid as String) as pageid,
            cast(impid as String) as impid,
            video_placement_name,
            category
        from $inapp
    )
    group by pageid, impid
);
-- $page_imp_dict = select ToDict(AGGREGATE_LIST(AsTuple(AsTuple(pageid, impid), stream_type))) from $good_page_imp;
$hosts_table = "home/videoquality/vh_analytics/MMA-5358/bad_hosts";

$wrap = ($id) -> (IF($id in ("", "0"), null, $id));

$getHost = ($url) -> {
    return Url::GetDomain($url, 2)
};

$parseBSTags = ($queryargs) -> {
    $parsed = Dsv::Parse($queryargs, "&");
    $tags = $parsed["931"];
    return if(
        DictContains($parsed, "931"),
        ListMap(String::SplitToList($tags, "%0A"), ($x)->(CAST($x as UInt32))),
        null
    )
};

$parseCCInner = ($s) -> (ListNotNull(ListMap(
    String::SplitToList($s,","), ($x)->(CAST($x as UInt64))
)));
$parseCC = ($s) -> {
    return IF(
        $s is not null and ListLength($parseCCInner($s)) > 0, $parseCCInner($s)
    ) 
};

$hits = (
    select
        contentid,
        urlmd5,
        $getHost(referer) as host,
        referer,
        $parseBSTags(queryargs) as hitlog_bs_tags,
        $parseCC(contentcategories) as hitlog_content_categories,
        stream_type,
        page_type
    from $input_table as i
    inner join any $good_page_imp as g using (pageid, impid)
    where gotproducttypes like "%video-creative-reach%" and (
        $wrap(contentid) is not null or $wrap(cast(urlmd5 as String)) is not null
    )
);

$bsTagsOk = ($x) -> ($x is null or ListLength($x) < 19);

$with_contentid = (
    select
        host, contentid, stream_type, page_type,
        count(*) as hits,
        count_if($bsTagsOk(hitlog_bs_tags)) as hits_with_hitlog_bs_tags,
        count_if(ListLength(hitlog_content_categories) > 0) as hits_with_hitlog_content_categories,
        CountDistinctEstimate(referer) ?? 0 as unique_urls,
        CountDistinctEstimate(IF($bsTagsOk(hitlog_bs_tags), referer)) ?? 0 as unique_urls_with_bs_tags,
        CountDistinctEstimate(IF(ListLength(hitlog_content_categories) > 0, referer)) ?? 0 as unique_urls_with_content_categories,
    from $hits
    where $wrap(contentid) is not null
    group by host, contentid, stream_type, page_type
);

$join_categs_internal = (
    select
        c.*,
        BrandSafetyTags,
        ContentCategories,
        category ?? "normal" as host_category
    from $with_contentid as c
    left join any $tags_data_internal as t on (c.contentid = t.`UUID`)
    left join any $hosts_table as h on (c.host = h.host)
);

$without_contentid = (
    select
        host, urlmd5, stream_type, page_type,
        count(*) as hits,
        count_if($bsTagsOk(hitlog_bs_tags)) as hits_with_hitlog_bs_tags,
        count_if(ListLength(hitlog_content_categories) > 0) as hits_with_hitlog_content_categories,
        CountDistinctEstimate(referer) ?? 0 as unique_urls,
        CountDistinctEstimate(IF($bsTagsOk(hitlog_bs_tags), referer)) ?? 0 as unique_urls_with_bs_tags,
        CountDistinctEstimate(IF(ListLength(hitlog_content_categories) > 0, referer)) ?? 0 as unique_urls_with_content_categories,
    from $hits
    where $wrap(contentid) is null and $wrap(cast(urlmd5 as String)) is not null
    group by host, urlmd5, stream_type, page_type
);

$join_categs_external = (
    select
        c.*,
        BrandSafetyTags,
        ContentCategories,
        category ?? "normal" as host_category
    from $without_contentid as c
    left join any $tags_data_external as t on (c.urlmd5 = t.urlmd5)
    left join any $hosts_table as h on (c.host = h.host)
);

insert into `home/videoquality/vh_analytics/mma-5298-debug/external` WITH TRUNCATE 
select * from $join_categs_external;

insert into `home/videoquality/vh_analytics/mma-5298-debug/internal` WITH TRUNCATE 
select * from $join_categs_internal;

$totalize = ($row)->{
    $result = [$row];
    $add = ListMap($result, ($x)->(AddMember(RemoveMember($x, "host_category"), "host_category", "_total_")));
    $result = ListUnionAll($result, $add);
    $add = ListMap($result, ($x)->(AddMember(RemoveMember($x, "stream_type"), "stream_type", "_total_")));
    $result = ListUnionAll($result, $add);
    $add = ListMap($result, ($x)->(AddMember(RemoveMember($x, "page_type"), "page_type", "_total_")));
    $result = ListUnionAll($result, $add);
    return $result
};

$union = (
    select t.*, IF(page_type = "vh", "unparsed", "external") as data_type
    from $join_categs_external as t
    union all
    select t.*, "internal" as data_type
    from $join_categs_internal as t
);

$totalized = process $union using $totalize(TableRow());

$totals = (
    select data_type, stream_type, page_type, sum(hits) as hits_total
    from $totalized
    where host_category = "_total_"
    group by data_type, stream_type, page_type
);

$grouped = (
    select
        $date as fielddate,
        unwrap(data_type) as data_type,
        unwrap(host_category) as host_category,
        unwrap(stream_type) as stream_type,
        unwrap(page_type) as page_type,
        sum(hits) ?? 0 as hits,
        sum(hits_with_hitlog_bs_tags) ?? 0 as hits_with_hitlog_bs_tags,
        sum(hits_with_hitlog_content_categories) ?? 0 as hits_with_hitlog_content_categories,
        sum_if(hits, BrandSafetyTags is not null) ?? 0 as hits_good,
        sum(unique_urls) ?? 0 as unique_urls,
        sum(unique_urls_with_bs_tags) ?? 0 as unique_urls_with_bs_tags,
        sum(unique_urls_with_content_categories) ?? 0 as unique_urls_with_content_categories,
    from $totalized
    group by data_type, host_category, stream_type, page_type
);

$add_totals = (
    select g.*, hits_total ?? 0 as hits_total, "_total_" as source_type
    from $grouped as g
    left join any $totals as t using (data_type, stream_type, page_type)
);

insert into `home/videoquality/vh_analytics/mma-5298-debug/add_totals` WITH TRUNCATE 
select * from $add_totals;

upsert into stat.`Video/Others/Strm/MMA-5298-tags-data-join-v3/daily`
select * from $add_totals;
