pragma AnsiInForEmptyOrNullableItemsCollections;
pragma yt.Pool = "@[pool]";
pragma yt.MaxInputTablesForSortedMerge="30";
pragma yson.DisableStrict;
use hahn;

$date_from = "@[date_from]";
$date_to = "@[date_to]";
$output_table = "//home/videoquality/vh_analytics/operanalytics-1472/" || $date_from || "_" || $date_to;
-- $format = DateTime::Format("%Y-%m-%d");
$inapp_table = "//home/bs/users/yabs-analytics/video/pageimp_ids_video_with_inapp";
-- $path3 = '//home/direct-managers/chelu/hitlog_non_VH' || $date;
-- $path4 = '//home/direct-managers/chelu/hitlog_non_VH_full' || $date;

$categories_blocklist = select * from as_table([
    <|block:true, host:"wi-fi.ru"|>,
    <|block:true, host:"player.mediavitrina.ru"|>,
    <|block:true, host:"auth.wi-fi.ru"|>,
    <|block:true, host:"echo.msk.ru"|>,
]);

$hitlog = (
    select h.*, TableName() as fielddate
    from RANGE(`//logs/bs-hit-log/1d`, $date_from, $date_to) as h
);

$step1 = (
        select
        fielddate,
        cast(uniqid as Uint64) as uniqid,
        cast(hitlogid as Uint64) as hitlogid, 
        cast(pageid as Uint64) as pageid,
        cast(impid as Uint64) as impid,
        count(distinct rtbbidreqid) as hits, --уникальному айди аукциона - bidreqid, число аукционов. т.е. запросов за рекламой (hits или aucs)
        urlmd5,
        referer,
        contentid as content_id_column,
        String::SplitToList(gotproducttypes, ",") as gotproducttypes,
    from $hitlog
    where 
        (
            gotproducttypes like '%video-creative-reach%' 
        ) --множество участвующих в аукционе ProductType
        and cast(hitlogid as Uint64) > 1
        and eventflags IN ('0','2','16384','16386') --дает хорошие хиты в РСЯ
    group by
        fielddate,
        hitlogid, 
        uniqid,
        pageid,
        impid,
        urlmd5, 
        referer,
        contentid,
        gotproducttypes
);

$chevent_ = (
    select
        TableName() as fielddate,
        hitlogid,
        eventcost,
        countertype
    from range(
        `//statbox/cooked_logs/bs-chevent-cooked-log/v2/1d`, $date_from, $date_to
    )
    where fraudbits = 0
    and eventflags = 0
);

$chevent = (
    select
        fielddate,
        hitlogid,
        sum(eventcost)as eventcost,
        count_if(countertype = 1) as shows
    from $chevent_
    group by fielddate, hitlogid
        
);

$step2 = (
select * from
(
select
    a.fielddate as fielddate,
    a.hitlogid as hitlogid, 
    uniqid,
    a.pageid as pageid,
    a.impid as impid,
    urlmd5 as UrlMD5,
    name,
    c.login as login,
    referer as Referer,
    content_id_column as ContentID,
    producttype,
    eventcost,
    shows,
    case 
        when InAppType != "" then "inapp "||InAppType 
        when InAppType == "" and VideoKind == "instream" then "web instream"
        when InAppType == "" and VideoKind != "instream" then "web outstream"
        else "-" 
    end as VideoKind,
    case when String::Contains(category, "VH") == true then "VH" else "not VH" end as is_VH,
    hits
from (
    SELECT 
        fielddate,
        uniqid,
        hitlogid, 
        pageid,
        impid,
        hits,
        urlmd5,
        referer,
        content_id_column,
        gotproducttypes as producttype
    from $step1
) as a
LEFT JOIN any (select PageID, ImpID, InAppType, VideoKind from `home/yabs/dict/PageImpArtmon` group by PageID, ImpID, InAppType, VideoKind) as b
on (a.pageid == b.PageID and a.impid == b.ImpID)
LEFT JOIN any (select name, category, impid, pageid, login from $inapp_table where dspid == 1 group by name, category, impid, pageid, login) as c
on (a.pageid == c.pageid and a.impid == c.impid)
left join any $chevent as ch on (a.fielddate == ch.fielddate and a.hitlogid== ch.hitlogid)
)
);


$hits_stats = (
    SELECT name, login, SUM(hits) as enough_hits, UrlMD5
    FROM $step2
    GROUP BY name, login, UrlMD5
);

$joined_hits = (
    select
        a.*,
        ContentCategories,
        ContentGenres,
        block,
        enough_hits,
        case
        when ContentCategories is null then "null"
        when ContentCategories == "[]" then "bad"
        else "good"
        end as CC_type,
        a.UrlMD5 is not null and a.UrlMD5 not in ("0", "") as good_urlmd5,
        Referer NOT IN ("0", "") AND Referer IS NOT null as good_referer
    FROM
        $step2 as a
    LEFT JOIN any
        (SELECT urlmd5, ContentCategories, ContentGenres 
        FROM `//home/video-hosting/adv_tags/tags_data_external` 
        GROUP  BY urlmd5, ContentCategories, ContentGenres) as c
    ON a.UrlMD5 == c.urlmd5
    LEFT JOIN any $categories_blocklist as b ON a.name = b.host
    LEFT JOIN any $hits_stats as d
    ON a.name = d.name and a.login = d.login
);

$filtered_hits = (
    SELECT 
        fielddate,
        name as name,
        login as login,
        SUM(eventcost) as eventcost,
        SUM(shows) as shows,
        SUM_IF(hits, CC_type == "good" and enough_hits >= 5) ?? 0 as good_referer_hits_with_threshold,
        SUM_IF(hits, CC_type != "null" and enough_hits >= 5) ?? 0 as urlmd5_joined_hits_with_threshold,
        SUM_IF(hits, good_referer and enough_hits >= 5) ?? 0 as not_null_referer_hits_with_threshold,
        SUM(hits) ?? 0 as hits,
        SUM_IF(hits, enough_hits >= 5) ?? 0 as hits_with_threshold,
        SUM_IF(hits, good_urlmd5) ?? 0 as not_null_urlmd5_hits,
        SUM_IF(hits, good_referer) ?? 0 as not_null_referer_hits,
        SUM_IF(hits, CC_type == "good") ?? 0 as good_referer_hits,
        SUM_IF(hits, CC_type != "null") ?? 0 as nonzero_joined_hits,
        SOME(IF(CC_type != "good", Referer)) as bad_referer_example2
    FROM
        $joined_hits
    WHERE
        name is not null
        AND login is not null
        AND VideoKind = "web instream"
        AND is_VH = "not VH"
        AND block IS null
    GROUP BY 
        fielddate, name, login
);

$totalhits = SELECT SUM(hits) FROM $filtered_hits;

$filtered_hits_with_shares = (
    select
        f.*,
        nanvl(good_referer_hits_with_threshold / cast(hits_with_threshold as Double), null) as good_referer_hits_with_threshold_share,
        nanvl(urlmd5_joined_hits_with_threshold / cast(hits_with_threshold as Double), null) as urlmd5_joined_hits_with_threshold_share,
        nanvl(not_null_referer_hits_with_threshold / cast(hits_with_threshold as Double), null) as not_null_referer_hits_with_threshold_share,
        hits / cast($totalhits as Double) as totalhits_share
    from $filtered_hits as f
);


$getUrlData = ($fd, $data) -> {
    -- $parsed = Yson::ParseJson($data);
    -- $url = Yson::ConvertToString(Yson::YPath($parsed, "/data/url"));
    $sp = String::SplitToList($data, " ");
    $url = "https://an.yandex.ru" || ($sp[1] ?? "");
    $target_ref = Url::Decode(Url::GetCGIParam($url, "target-ref"));
    $imp_id = Url::GetCGIParam($url, "imp-id");
    $page_id = IF(
        $url like "%/meta/%",
        unwrap(String::SplitToList(String::SplitToList($url, "/meta/")[1], "?")[0]),
        null
    );
    return AsStruct(
        $fd as fielddate,
        $target_ref as target_ref,
        cast($imp_id as Int64) as impid,
        cast($page_id as Int64) as pageid,
        $url as url
    )
};

$bsrequest = (
    select request, TableName() as fielddate
    from range(
        `//logs/bs-request-log/1d`, $date_from, $date_to
    )
);

$url_data = (
    select * from (
        process $bsrequest using $getUrlData(fielddate, request)
    )
    where pageid is not null and impid is not null
);

$isGood = ($tr)->($tr is not null and $tr not in ("", "-"));

$url_data_grouped = (
    select
        fielddate, pageid, impid,
        count(*) as `count`,
        count_if($isGood(target_ref)) as target_ref_not_null
    from $url_data
    group by fielddate, pageid, impid
);

$url_data_join_logins = (
    select *
    from (
        select u.*, login
        from $url_data_grouped as u
        left join any $inapp_table as i using (pageid, impid)
    )
    where login is not null
);

$url_data_join_logins_grouped = (
    select
        fielddate,
        login,
        sum(`count`) as `count`,
        sum(target_ref_not_null) as target_ref_not_null,
        sum(target_ref_not_null) / cast(sum(`count`) as double) as good_target_ref_share
    from $url_data_join_logins
    group by fielddate, login
);

$join_target_refs_share = (
    select f.*, u.`count` as bsrequest_count, good_target_ref_share
    from $filtered_hits_with_shares as f
    left join any $url_data_join_logins_grouped as u using (login)
);

-- insert into `home/videoquality/vh_analytics/operanalytics-1472/step1` WITH TRUNCATE 
-- select * from $step1;

-- insert into `home/videoquality/vh_analytics/operanalytics-1472/step2` WITH TRUNCATE 
-- select * from $step2;

-- insert into `home/videoquality/vh_analytics/operanalytics-1472/hits_stats` WITH TRUNCATE 
-- select * from $hits_stats;

-- insert into `home/videoquality/vh_analytics/operanalytics-1472/joined_hits` WITH TRUNCATE 
-- select * from $joined_hits;

insert into $output_table WITH TRUNCATE 
select * from $join_target_refs_share;
