USE hahn;
PRAGMA AnsiInForEmptyOrNullableItemsCollections;
pragma yt.Pool = "@[pool]";

$date_from = "@[date_from]";
$date_to = "@[date_to]";
$current_base_table_contentid = "//home/videoquality/vh_analytics/mma-5254/by_contentid";
$current_base_table_urlmd5 = "//home/videoquality/vh_analytics/mma-5254/by_urlmd5";
$last_date_table = "//home/videoquality/vh_analytics/mma-5254/base_last_date";

$parseBSTags = ($queryargs) -> {
    $parsed = Dsv::Parse($queryargs, "&");
    $tags = $parsed["931"];
    return if(
        DictContains($parsed, "931"),
        ListMap(String::SplitToList($tags, "%0A"), ($x)->(CAST($x as UInt32))),
        null
    )
};

$nwrap = ($x) -> (IF($x not in ("", "0"), $x, null));

$mergeLists = ($list) -> {
    $lists = ListMap($list, ($x)->(String::SplitToList($x, ",")));
    $lists = ListFlatMap($lists, ($x)->($x));
    $result = ListSort(ListUniq($lists));
    return String::JoinFromList($result, ",")
};

$tmp = (
SELECT
    $nwrap(contentid ?? Dsv::Parse(queryargs, "&")["354"]) as contentid,
    case
    when $nwrap(contentid ?? Dsv::Parse(queryargs, "&")["354"]) is not null then null
    else cast($nwrap(urlmd5) as Uint64)
    end as urlmd5,
    cast(eventtime as Uint64) as `timestamp`,
    wonproducttype,
    pageid,
    $parseBSTags(queryargs) as bs_tags
FROM range(`logs/bs-hit-log/1d`, $date_from, $date_to) with columns Struct<contentid:String?>
WHERE wonproducttype in ("video-creative-reach", "media-creative-reach")
    AND cast(hitlogid as Uint64) > 1
    AND eventflags IN ("0","2","16384","16386") --дает хорошие хиты в РСЯ
    AND (find(impressionoptions, "roll") is not null or wonproducttype = "media-creative-reach")
);

$contentid = (
    select * from $tmp where contentid is not null
);

$urlmd5 = (
    select * from $tmp where urlmd5 is not null
);

$grouped_contentid = (
    select
        contentid,
        min(`timestamp`) as min_timestamp,
        min(if(bs_tags is not null, `timestamp`)) as min_timestamp_non_null_bs_tags,
        min(if(bs_tags is not null and ListLength(bs_tags) > 0, `timestamp`)) as min_timestamp_non_empty_bs_tags,
        some(if(bs_tags is not null and ListLength(bs_tags) > 0, bs_tags)) as bs_tags,
        String::JoinFromList(ListSort(aggregate_list_distinct(pageid)), ",") as pageids,
        String::JoinFromList(ListSort(aggregate_list_distinct(wonproducttype)), ",") as wonproducttypes,
    from $contentid
    group by contentid
);

$grouped_urlmd5 = (
    select
        urlmd5,
        min(`timestamp`) as min_timestamp,
        min(if(bs_tags is not null, `timestamp`)) as min_timestamp_non_null_bs_tags,
        min(if(bs_tags is not null and ListLength(bs_tags) > 0, `timestamp`)) as min_timestamp_non_empty_bs_tags,
        some(if(bs_tags is not null and ListLength(bs_tags) > 0, bs_tags)) as bs_tags,
        String::JoinFromList(ListSort(aggregate_list_distinct(pageid)), ",") as pageids,
        String::JoinFromList(ListSort(aggregate_list_distinct(wonproducttype)), ",") as wonproducttypes,
    from $urlmd5
    group by urlmd5
);

$update_contentid = (
    select
        contentid,
        min(min_timestamp) as min_timestamp,
        min(min_timestamp_non_empty_bs_tags) as min_timestamp_non_empty_bs_tags,
        min(min_timestamp_non_null_bs_tags) as min_timestamp_non_null_bs_tags,
        some(bs_tags) as bs_tags,
        $mergeLists(aggregate_list_distinct(pageids)) as pageids,
        $mergeLists(aggregate_list_distinct(wonproducttypes)) as wonproducttypes,
    from (select * from $current_base_table_contentid union all select * from $grouped_contentid)
    group by contentid
);

$update_urlmd5 = (
    select
        urlmd5,
        min(min_timestamp) as min_timestamp,
        min(min_timestamp_non_empty_bs_tags) as min_timestamp_non_empty_bs_tags,
        min(min_timestamp_non_null_bs_tags) as min_timestamp_non_null_bs_tags,
        some(bs_tags) as bs_tags,
        $mergeLists(aggregate_list_distinct(pageids)) as pageids,
        $mergeLists(aggregate_list_distinct(wonproducttypes)) as wonproducttypes,
    from (select * from $current_base_table_urlmd5 union all select * from $grouped_urlmd5)
    group by urlmd5
);

insert into $current_base_table_contentid WITH TRUNCATE
select * from $update_contentid;

insert into $current_base_table_urlmd5 WITH TRUNCATE
select * from $update_urlmd5;

insert into $last_date_table WITH TRUNCATE
select $date_to as last_date;
