use hahn;
pragma yt.Pool = "@[pool]";
pragma AnsiInForEmptyOrNullableItemsCollections;

$date = "@[date]";

$campaign_stat_table = "//home/videoquality/vh_analytics/underimpressions_tmp/" || $date;
$phrase_stat_table = "//home/videoquality/vh_analytics/phrases_stat/" || $date;
$phrase_targeting_table = "//home/videoquality/vh_analytics/targeting/" || $date;
$campaigns_targeting_table = "//home/videoquality/vh_analytics/targeting_campaigns/" || $date;

$w = ($x) -> (IF($x == "", null, $x) ?? "empty");

$getTree = ($row)->([
    [$w($row.media_type)],
    [$w($row.media_type), $w($row.campaign_type)],
    [$w($row.media_type), $w($row.campaign_type), $w($row.client_name)],
    [$w($row.media_type), $w($row.campaign_type), $w($row.client_name), $w($row.campaign_id)],
]);

$cids_whitelist = select distinct cid from $phrase_targeting_table where content_category_names is not null;

$phrase_targeting_whitelisted = (
    select *
    from $phrase_targeting_table
    where cid in $cids_whitelist
);

$join_phrase_stat = (
    select
        t.*, amount_realised
    from $phrase_targeting_whitelisted as t
    left join any (
        select pid, sum(Cost / 1000000.0 / 1.18 * 30.0) as amount_realised
        from $phrase_stat_table
        group by GroupExportID as pid
    ) as p using (pid)
);

$cid_to_phrases_count = (
    select
        cid,
        count_if(content_category_names is null) ?? 0 as non_cat_pids,
        COUNT_IF(content_category_names is not null) ?? 0 as cat_pids
    from $join_phrase_stat
    group by cid
);

$cid_to_non_cat_cost = (
    select
        cid,
        sum(amount_realised) as non_cat_amount_realised
    from $join_phrase_stat
    where content_category_names is null
    group by cid
);

$nz = ($x) -> (IF($x < 0 or Math::IsInf($x) or Math::IsNaN($x), 0, $x));

$join_cid_stat = (
    select
        c.*,
        client_name ?? "-" as client_name,
        amount_accepted ?? 0 as amount_accepted,
        amount_expected ?? 0 as amount_expected,
        amount_realised ?? 0 as amount_realised,
        underimpression ?? 0 as underimpression,
        underimpression_nz ?? 0 as underimpression_nz,
        $nz((amount_expected - non_cat_amount_realised) / cast(cat_pids as Double)) as amount_expected_from_cat_pid
    from $cid_to_non_cat_cost as c
    left join any $cid_to_phrases_count as cp on (c.cid = cp.cid)
    left join any $campaign_stat_table as cs on (c.cid = cast(cs.campaign_id as Int64))
);

$join_expected_amount = (
    select
        cast(p.cid as String) as cid,
        cast(p.pid as String) as pid,
        p.cid as cid_for_uniq,
        pid as pid_for_uniq,
        client_name,
        ListExtend(p.content_category_names ?? ListCreate(String), ["_total_"]) as content_category_names,
        amount_expected_from_cat_pid,
        amount_expected_from_cat_pid - p.amount_realised as underimpression,
        c.amount_accepted as amount_accepted_campaign,
        c.amount_expected as amount_expected_campaign,
        c.amount_realised as amount_realised_campaign,
        c.underimpression as underimpression_campaign,
        c.underimpression_nz as underimpression_nz_campaign,
        p.* without p.pid, p.cid, p.content_category_names
    from $join_phrase_stat as p
    left join any $join_cid_stat as c using (cid)
    where p.content_category_names is not null
);

$cc_pids = (
    select cid,
        sum(amount_expected_from_cat_pid) as amount_expected_from_cat_pids,
        sum(amount_realised) as amount_realised_from_cat_pids,
        sum(amount_expected_from_cat_pid) - sum(amount_realised) as underimpression_from_cat_pids
    from $join_expected_amount
    group by cid
);

$treelike = (
    select
        $getTree(TableRow()) as identificator,
        t.*
    from $campaign_stat_table as t
    where dsp_type = "direct" and cast(campaign_id as Int64) in $cids_whitelist
);

$treelike_join = (
    select t.*, amount_expected_from_cat_pids,
        amount_realised_from_cat_pids,
        underimpression_from_cat_pids
    from $treelike as t
    left join any $cc_pids as c on (t.campaign_id = c.cid)
);

$campaigns_stat_grouped = (
    select
        $date as fielddate,
        unwrap(ListNotNull(identificator)) as identificator,
        IF(ListLength(identificator) == 4, SOME(start_date)) as start_date,
        IF(ListLength(identificator) == 4, SOME(end_date)) as end_date,
        sum(amount_accepted) as amount_accepted,
        sum(amount_expected) as amount_expected,
        sum(amount_realised) as amount_realised,
        sum(amount_expected_from_cat_pids) as amount_expected_from_cat_pids,
        sum(amount_realised_from_cat_pids) as amount_realised_from_cat_pids,
        sum(underimpression_from_cat_pids) as underimpression_from_cat_pids,
        sum(shows) as shows,
        sum(underimpression) as underimpression,
        sum(underimpression_nz) as underimpression_nz,
        sum(underimpression_current) as underimpression_current,
        sum(underimpression_current_nz) as underimpression_current_nz,
    from $treelike_join
    flatten list by identificator
    group by identificator
);

$for_phrase_join = (
    select
        ListLast(identificator) as tree_part,
        cast(campaign_id as Int64) as cid
    from $treelike
);

$lw = ($l) -> ($l ?? ListCreate(String));

$getTargeting = ($age, $gender, $income, $interests, $use_auditory, $use_metrika) -> {
    $age = IF(ListLength($age) == 6, ListCreate(String), $age);
    $gender = IF(ListLength($gender) == 2, ListCreate(String), $gender);
    $income = IF(ListLength($income) == 5, ListCreate(String), $income);
    $use_auditory = IF($use_auditory > 0, ["custom_auditory"], ListCreate(String));
    $use_metrika = IF($use_metrika > 0, ["custom_metrika"], ListCreate(String));
    return String::JoinFromList(ListExtend(
        $lw($age), $lw($gender), $lw($income), $lw($interests), $lw($use_auditory), $lw($use_metrika)
    ), ",") ?? "-"
};

$interests = (
    select
        $getTargeting(age, gender, income, interests, use_auditory, use_metrika) as targeting,
        content_category_names,
        pid
    from $phrase_targeting_table
);

$transformGeo = ($x) -> {
    $sp = String::SplitToList($x, ",");
    $to_names = ListNotNull(ListMap($sp, ($x)->(IF($x like "-%", "-", "") || (Geo::RegionById(ABS(CAST($x as Int32))).name))));
    return String::JoinFromList($to_names, ",")
};

$assembleTargeting = ($targeting, $geo, $bsc, $ccn, $pi) -> {
    $result = (String::JoinFromList($bsc ?? ListCreate(String), ",") ?? "")
    || (String::JoinFromList($ccn ?? ListCreate(String), ",") ?? "")
    || (String::JoinFromList(ListMap($pi, ($x)->(cast($x as String))) ?? ListCreate(String), ",") ?? "")
    || ($targeting ?? "") || ((";geo=" || $transformGeo($geo)) ?? "") ;
    return IF($result is null or $result == "", "-", $result)
};

$campaigns_targeting = (
    select
        cid,
        bs_categories_names,
        allowed_page_ids
    from $campaigns_targeting_table
);

$phrases_stat = (
    select
        $date as fielddate,
        p.cid as cid,
        unwrap(ListExtend(tree_part, [cast(GroupExportID as String)])) as identificator,
        group_name,
        Shows as shows,
        (Cost / 1000000.0 / 1.18 * 30.0) as amount_realised,
        targeting as targeting__,
            geo,
            bs_categories_names,
            content_category_names,
            allowed_page_ids,
        $assembleTargeting(
            targeting,
            geo,
            bs_categories_names,
            content_category_names,
            allowed_page_ids,
        ) as targeting
    from $phrase_stat_table as p
    inner join any $for_phrase_join as c on (p.cid = c.cid)
    left join any $interests as i on (p.GroupExportID = i.pid)
    left join any $campaigns_targeting as ct on (p.cid = ct.cid)
);


$to_push = (
    select * from $campaigns_stat_grouped
    union all
    select * from $phrases_stat
);

upsert into stat.`Video/Others/CPMADV-77-underimpressions-cc/daily` ERASE BY (fielddate)
select ListNotNull(identificator) as identificator, s.* without identificator from $to_push as s
