use hahn;
PRAGMA File('bigb.so', 'https://proxy.sandbox.yandex-team.ru/last/BIGB_UDF?attrs={"released":"stable"}&cache=1235435');
PRAGMA udf('bigb.so');

pragma yt.Pool = "@[pool]";

$date = "@[date]";
$mon1 = "@[mon1]";

$stats = "//home/videoquality/vh_analytics/underimpressions_tmp/" || $date;
$pids = "//home/videoquality/vh_analytics/targeting/" || $date;
$camp = "//home/videoquality/vh_analytics/targeting_campaigns/" || $date;
$caesar = '//home/bs/logs/AdsCaesarOrdersFullDump/latest';

$orders = (
    select
    Bigb::ParseOrderProfile(TableRow()).Resources.AutoBudget.AutoBudgetAvgCPV as AvgCPV, 
    OrderID from $caesar
);

$cpv_stats_real = (
    select cid, some(orderid) as orderid, IF(sum(completes) > 0, sum(cost_rub) / cast(sum(completes) as Double)) as cpv_real
    from range(`//home/videoquality/vh_analytics/campaigns_cpv`, $mon1, $date)
    group by cid
);

$cpv_stats = (
    select c.*, (AvgCPV / 1e6 * 30.0 / 1.2) as cpv_plan
    from $cpv_stats_real as c
    left join any $orders as abo on (c.orderid = abo.OrderID)
);

$lw = ($l) -> ($l ?? ListCreate(String));

$getTargeting = ($age, $gender, $income, $interests, $ret_cond_ids) -> {
    $age = IF($age is null or ListLength($age) == 6, ListCreate(String), ListSort(ListNotNull($age)));
    $gender = IF($gender is null or ListLength($gender) == 2, ListCreate(String), ListSort(ListNotNull($gender)));
    $income = IF($income is null or ListLength($income) == 5, ListCreate(String), ListSort(ListNotNull($income)));
    $interests = ListNotNull(ListFilter($interests, ($x)->($x != "-")));
    $merged = ListExtend($age, $gender, $income, $interests);
    $merged = IF(
        $ret_cond_ids is not null,
        ListExtend($merged, ["retargeting"]),
        $merged
    );
    $merged = String::JoinFromList($merged, ";;;");
    return IF($merged is null or $merged = "", "-", $merged)
};

$w_age = ($age) -> (IF(ListLength($age) == 6, ListCreate(String), $age));
$w_gender = ($gender) -> (IF(ListLength($gender) == 2, ListCreate(String), $gender));
$w_income = ($income) -> (IF(ListLength($income) == 5, ListCreate(String), $income));
$w_bs = ($bs) -> (IF(ListLength($bs) == 18, ["all_bs"], $bs));
$commaize = ($x)->{
    $x = ListFilter($x, ($y) -> ($y != "-"));
    return IF(
        $x is not null and ListLength($x) > 0,
        String::JoinFromList(ListSort($x), ";;;"),
        "-"
    )
};

-- <10, 10, 15, 20, 20-30, 30-60, 60+

$processDuration = ($x) -> {
    return case
    when $x is null then null
    when $x < 10 then "<10"
    when $x = 10 then "10"
    when $x <= 15 then "11-15"
    when $x <= 20 then "16-20"
    when $x between 21 and 30 then "21-30"
    when $x between 31 and 60 then "31-60"
    else "61+"
    end
};

$pids_ = (
    select
        age ?? ListCreate(String) as age,
        gender ?? ListCreate(String) as gender,
        income ?? ListCreate(String) as income,
        content_category_names ?? ListCreate(String) as content_category_names,
        interests ?? ListCreate(String) as interests,
        ListNotNull(ListMap(durations, $processDuration)) ?? ListCreate(String) as durations,
        p.* without age, gender, income, content_category_names,
        interests, durations
    from $pids as p
);

-- insert into `//home/videoquality/vh_analytics/cpmadv-45/pids_` WITH TRUNCATE 
-- select * from (
--         select * from $pids_ flatten list by (
--             age, gender, income, interests,
--             content_category_names, durations
--         )
--     );

$retargeting = (
    select
        cid,
        AGGREGATE_LIST_DISTINCT(ret_cond_id) as ret_cond_ids
    from `home/direct/db/bids_retargeting`
    group by cid
);

$AG_FL = AggregateFlatten(AggregationFactory("AGGREGATE_LIST_DISTINCT"));

$pids_to_camp_ = (
    select
        cid,
        AggregateBy(age, $AG_FL) as age,
        AggregateBy(gender, $AG_FL) as gender,
        AggregateBy(income, $AG_FL) as income,
        AggregateBy(interests, $AG_FL) as interests,
        AggregateBy(content_category_names, $AG_FL) as content_category_names_raw,
        AggregateBy(durations, $AG_FL) as durations,
        $commaize(AggregateBy(content_category_names, $AG_FL)) as content_category_names,
        max(is_non_skippable) as is_non_skippable
    from $pids_
    group by cid
);
$pids_to_camp = (
    select
        p.*,
        $getTargeting(
            age,
            gender,
            income,
            interests,
            ret_cond_ids
        ) as targeting,
    from $pids_to_camp_ as p
    left join any $retargeting as r using (cid)
);


$camp_stat = (
    select
        cast(campaign_id as Int64) as cid,
        media_type,
        client_name,
        shows,
        amount_realised,
        amount_expected,
        underimpression_nz,
        underimpression_current_nz
    from $stats
    where dsp_type = "direct"
);

$camp_meta = (
    select
        cid,
        ClientID,
        strategy_name as strategy,
        "-" as frequency,
        "-" as places_filter,
        "-" as instream_outstream_filter
    from `home/direct/db/campaigns`
);

$bs_camp_meta = (
    select
        cid,
        $w_bs(bs_categories_names) as bs_categories_names,
        has_brandlift
    from $camp
);

$camp_joined = (
    select cs.*,
        cpvr.cpv_real as cpv_real,
        cpvr.cpv_plan as cpv_plan,
        ClientID,
        strategy,
        frequency,
        places_filter,
        instream_outstream_filter,
        targeting ?? "-" as targeting,
        content_category_names ?? "-" as content_category_names,
        IF(is_non_skippable, "yes", "no") as non_skippable,
        $commaize($lw(bs_categories_names)) as brand_safety,
        IF(has_brandlift, "yes", "no") as brandlift,
        IF(
            durations is not null,
            String::JoinFromList(ListMap(ListSort(durations), ($x)->(CAST($x as String))), ";;;"),
            "-"
        ) as durations,
    from $camp_stat as cs
    left join any $camp_meta as cm on (cs.cid = cm.cid)
    left join any $pids_to_camp as p on (cs.cid = p.cid)
    left join any $bs_camp_meta as bs on (cs.cid = bs.cid)
    left join any $cpv_stats as cpvr on (cs.cid = cpvr.cid)
);

$sp = ($x) -> (String::SplitToList($x, ";;;"));

$make_lists = (
    select
        ListExtend($sp(brand_safety), ["_total_"]) as brand_safety,
        ListExtend($sp(brandlift), ["_total_"]) as brandlift,
        ListExtend($sp(content_category_names), ["_total_"]) as content_category_names,
        ListExtend($sp(durations), ["_total_"]) as durations,
        ListExtend($sp(frequency), ["_total_"]) as frequency,
        ListExtend($sp(instream_outstream_filter), ["_total_"]) as instream_outstream_filter,
        ListExtend($sp(media_type), ["_total_"]) as media_type,
        ListExtend($sp(non_skippable), ["_total_"]) as non_skippable,
        ListExtend($sp(places_filter), ["_total_"]) as places_filter,
        ListExtend($sp(strategy), ["_total_"]) as strategy,
        ListExtend($sp(targeting), ["_total_"]) as targeting,
        s.* WITHOUT 
            brand_safety,
            brandlift,
            content_category_names,
            durations,
            frequency,
            instream_outstream_filter,
            media_type,
            non_skippable,
            places_filter,
            strategy,
            targeting
        from $camp_joined as s
);

$gr = (
    select
    $date as fielddate,
    unwrap(media_type) as media_type,
    unwrap(strategy) as strategy,
    unwrap(brand_safety) as brand_safety,
    unwrap(brandlift) as brandlift,
    unwrap(content_category_names) as content_category_names,
    unwrap(durations) as durations,
    unwrap(frequency) as frequency,
    unwrap(instream_outstream_filter) as instream_outstream_filter,
    unwrap(non_skippable) as non_skippable,
    unwrap(places_filter) as places_filter,
    unwrap(substring(targeting, 0, 800)) as targeting,
    sum(shows) as shows,
    sum(amount_realised) as amount_realised,
    sum(amount_expected) as amount_expected,
    sum(underimpression_current_nz) as underimpression_current_nz,
    sum(underimpression_nz) as underimpression_nz,
    CountDistinctEstimate(cid) as campaigns,
    CountDistinctEstimate(client_name) as clients,
    avg(cpv_plan) as cpv_plan,
    avg(cpv_real) as cpv_real
from (select * from $make_lists flatten list by (
    brand_safety,
    brandlift,
    content_category_names,
    durations,
    frequency,
    instream_outstream_filter,
    media_type,
    non_skippable,
    places_filter,
    strategy,
    targeting
))
group by
    media_type,
    strategy,
    brand_safety,
    brandlift,
    content_category_names,
    durations,
    frequency,
    instream_outstream_filter,
    non_skippable,
    places_filter,
    targeting
);

$camp_joined_table = "//home/videoquality/vh_analytics/cpmadv-45/" || $date;
insert into $camp_joined_table WITH TRUNCATE 
select * from $camp_joined;

upsert into stat.`Video/Others/CPMADV-45-running-campaigns/daily` erase by (fielddate)
select * from $gr;
