use hahn;
PRAGMA AnsiInForEmptyOrNullableItemsCollections;
PRAGMA yt.ParallelOperationsLimit = "4";
pragma yt.Pool = "@[pool]";
pragma library("stability_common.sql");
import stability_common symbols $dateRange;

$date_from = "@[date_from]";
$date_to = "@[date_to]";
$output_root = "@[output_root]/";

$pageimps_table = "//home/bs/users/yabs-analytics/video/pageimp_ids_video_with_inapp";
$good_page_ids = select distinct pageid from $pageimps_table;
$good_dsp_ids = select distinct dspid from $pageimps_table;
$page_imp = (
    select
        pageid,
        impid,
        some(category) as category,
        some(video_placement_name) as ad_type,
        some(video_type) as video_type,
        some(name) as partner_name,
    from $pageimps_table
    group by pageid, impid
);

$dspid_to_name = (
    select
        ToDict(AGGREGATE_LIST(AsTuple(dspid, dsp_name)))
    from (
        select
            unwrap(dspid) as dspid, unwrap(max(dsp_name)) as dsp_name
        from $pageimps_table
        group by dspid
    )
);

$getProductType = ($di, $pt) -> (
    case
    when $di not in (1, 5, 10) and $dspid_to_name[$di] == "auction" then "awaps_auction"
    when $di not in (1, 5, 10) and $dspid_to_name[$di] == "price" then "awaps_fix_cpm"
    else $pt
    end
);

$okProductType = ($pt) -> ($pt like "%awaps%" or $pt like "%reach%");

$undefWrapper = ($x) -> {
    RETURN IF(
        $x is null or $x in ("-", "undefined", "null", "", "0"),
        null,
        $x
    )
};

$nw = ($x) -> (IF($x is null, 0, $x));

$getEffectiveMinCPM = ($dspmincpm, $selfmincpm, $ps) -> {
    $max = CAST(MAX_OF($nw($dspmincpm), $nw($selfmincpm)) as Double);
    return IF(
        $ps is not null and $ps > 0,
        $max / $ps,
        $max
    )
};

define subquery $pre_map($date_from, $date_to) as

$map_ = (
    select
        TableName() as fielddate,
        pageid,
        adtype,
        uniqid as yandexuid,
        `impressionoptions_yandex-page` or `impressionoptions_business-unit` as is_ya,
        impid,
        dspid,
        countertype,
        win,
        price,
        partnerprice,
        $undefWrapper(Url::GetCGIParam("?" || queryargs, "354")) as video_content_id,
        Url::GetCGIParam("?" || queryargs, "769") as vsid,
        position,
        producttype,
        bidreqid,
        dspfraudbits,
        dspeventflags,
        maxadscount,
        referer,
        videoduration,
        selfmincpm,
        dspmincpm,
        $getEffectiveMinCPM(dspmincpm, selfmincpm, partnershare) as effective_mincpm
    from range(`cooked_logs/bs-dsp-cooked-log/1d`, $date_from, $date_to) with unordered
);

$map = (
    select
        fielddate,
        pageid,
        is_ya,
        yandexuid,
        impid,
        dspid,
        countertype,
        win,
        price,
        partnerprice,
        vsid,
        position,
        $getProductType(dspid, producttype) as producttype,
        bidreqid,
        IF(
            countertype == 0 and win == 1 and position == 0,
            maxadscount,
            0
        ) as hits,
        referer,
        video_content_id,
        videoduration,
        dspmincpm,
        selfmincpm,
        effective_mincpm
    from $map_
        where
    bidreqid is not null and bidreqid != 0
    and dspfraudbits == 0
    and dspeventflags == 0
    and adtype == "video"
    and $okProductType($getProductType(dspid, producttype))
    and pageid in $good_page_ids
    and (
        dspid in $good_dsp_ids
        or (
            countertype == 0
            and win == 1
            and dspid in (5, 10)
        )
    )
);

$joined = (
    select
        m.*,
        IF(
            category == "other" and is_ya,
            "yandex",
            category
        ) as category,
        video_type,
        partner_name
    from $map as m
    inner join $page_imp as p using (pageid, impid)
);

$reduce = (
    select
        fielddate,
        yandexuid,
        pageid,
        impid,
        video_type,
        partner_name,
        category,
        referer,
        producttype,
        videoduration,
        video_content_id,
        MAX(vsid) as vsid,
        SUM(hits) as hits,
        CountDistinctEstimate(bidreqid) as hits_block,
        COUNT_IF(countertype = 1 and win = 1 and dspid not in (5, 10)) as shows,
        COUNT_IF(countertype = 1 and win = 1 and dspid not in (5, 10) and position = 0) as shows_block,
        sum_if(price, countertype = 1 and win = 1 and dspid not in (5, 10)) as price,
        sum_if(partnerprice, countertype = 1 and win = 1 and dspid not in (5, 10)) as partnerprice,
        COUNT_IF(countertype = 0 and win = 1 and dspid not in (5, 10)) as winhits,
        COUNT_IF(countertype = 0 and win = 1 and dspid not in (5, 10) and position = 0) as winhits_block,
        MEDIAN(selfmincpm) as selfmincpm,
        MEDIAN(dspmincpm) as dspmincpm,
        MEDIAN(effective_mincpm) as effective_mincpm
    from $joined
    group by fielddate,
        yandexuid,
        pageid,
        impid,
        video_type,
        partner_name,
        category,
        referer,
        video_content_id,
        producttype,
        videoduration
);

$ib = (
    select
        JoinKey,
        `UUID`,
        computed_channel,
        duration
    from `//home/videolog/strm_meta/iron_branch/concat`
);

$isVh = ($category) -> ($category like "%VH%" or $category like "%vh%");

$ib_non_joinable = (
    select * from $reduce where not $isVh(category) or video_content_id is null
);

$ib_joinable = (
    select * from $reduce where $isVh(category) and video_content_id is not null
);

$ib_joined = (
    select r.*, `UUID`, computed_channel, duration
    from $ib_joinable as r
    left join $ib as i on (r.video_content_id == i.JoinKey)
);

$after_ib_join = (
    select * from $ib_non_joinable union all select * from $ib_joined
);

$crypta_non_joinable = (
    select * from $after_ib_join where yandexuid is null or yandexuid == 0
);

$crypta_joinable = (
    select * from $after_ib_join where yandexuid is not null and yandexuid != 0
);

$crypta_map_ = (
    select
        TableName() as fielddate,
        yandexuid,
        crypta_id,
        gender,
        user_age_6s,
        income_5_segments
    from range(
        `//statbox/crypta-yandexuid-profiles-log`, $date_from, $date_to
    )
);

$getMaxProbValue = ($yson) -> {
    $dict = DictItems(Yson::ConvertToDoubleDict($yson));
    return ListSortDesc($dict, ($x)->($x.1))[0].0
};

$crypta = (
    select
        fielddate,
        yandexuid,
        crypta_id,
        $getMaxProbValue(user_age_6s) as age_segment,
        $getMaxProbValue(gender) as gender,
        $getMaxProbValue(income_5_segments) as income_segment
    from $crypta_map_
);

$crypta_joined = (
    select r.*,
        crypta_id,
        gender,
        income_segment,
        age_segment
    from $crypta_joinable as r
    left join $crypta as c using (yandexuid, fielddate)
);

$after_crypta_join = (
    select * from $crypta_non_joinable union all select * from $crypta_joined
);

select * from $after_crypta_join;
end define;

$pre_mapped = select * from $pre_map($date_from, $date_to);

define action $split_into_days($date) as
$output_table = $output_root || $date;
insert into $output_table with truncate
select * from $pre_mapped where fielddate == $date;
end define;

evaluate for $date in $dateRange($date_from, $date_to)
    do $split_into_days($date);
