-- run in https://sandbox.yandex-team.ru/scheduler/22846/view
USE hahn;
PRAGMA yt.Auth = "yt_plato";
PRAGMA yt.Pool = "bannerland-data";


PRAGMA yson.DisableStrict;

$date = CurrentUtcDate() - DateTime::IntervalFromDays(1);
-- $date = Date('2020-06-15');
$date_from = cast($date as String);
$date_to = cast($date as String);

$bl_date_start = cast($date - DateTime::IntervalFromDays(7) as String);

$bs_data = (
    select
        producttype,
        h.hitlogid as hitlogid,
        CAST(bannerid as Int64) as BannerID,
        ResponseBody as responsebody,
        ch.eventtime as eventtime,
        CreativeTemplateID,
        SmartThemeID,
        SmartLayoutID,
        SmartSizeID,
        case 
            when CAST(ch.devicetype as UInt64?) <= 4 then 1 
            when CAST(ch.devicetype as UInt64?) > 4 then 0
            else NULL 
        end as is_mobile,
        case when OptionsSsp then 1 else 0 end as ssp,
    case when not OptionsSsp and OptionsMobile and OptionsApp then 1 else 0 end as sdk

    from range(`logs/bs-proto-request-log/1d`, $date_from, $date_to) as r
    join range(`logs/bs-rtb-log/1d`, $date_from, $date_to) as rtb
        on r.RequestID = rtb.requestid
    join range(`logs/bs-hit-log/1d`, $date_from, $date_to) as h
        on rtb.bidreqid = h.rtbbidreqid
    join range(`logs/bs-chevent-log/1d`, $date_from, $date_to) as ch
        on ch.hitlogid = h.hitlogid
    left join `home/yabs/dict/DSPCreative` as cr
        on CAST(ch.creativeid as UInt64) = cr.CreativeID
    left join `home/yabs/dict/Page` as p
        on CAST(ch.pageid as UInt64) = p.PageID

    where r.ResponseBody != '' and r.ResponseBody is not NULL
        and h.rtbbidreqid != '0'
        and ch.fraudbits = '0'
        and ch.countertype = '1'
        and ch.contexttype = '8'
        and ch.producttype in ("direct", "media-smart"));

$bids =
    SELECT BannerID
    FROM $bs_data;

-- we do not store orig-sized avatars in banner, just replace size in ava url
$replace_ava_size = Re2::Replace('/huge$');
$parse_ava = ($ava_json) -> {
    $ava_yson = Yson::ParseJson($ava_json);
    $huge_url = Yson::ConvertToString($ava_yson[0]['huge']['url']);
    RETURN $replace_ava_size($huge_url, "/orig-optimized");
};

-- находим нужные bid-ы в последнем фс, чтобы по нему определить карманы, в которых они сгенерированы
$bid_pocket_row_info =
    SELECT
        BannerID,
        SOME(pocket) as pocket,
        SOME(row_id) as row_id,
        SOME(Yson::ConvertToString(Yson::ParseJson(ModelCard)['attributes']['market']['AdvType'])) as adv_type,
        SOME($parse_ava(Avatars)) as avatars_orig_optimized_picture,
    FROM (
        SELECT *
        FROM `//home/bannerland/perf/full_state/export/v3.1/banners` as perf_fs
        LEFT SEMI JOIN $bids as bs_logs_bids
        USING (BannerID))
    GROUP BY BannerID;

$good_bl_feed_keys = AsList(
    -- Список ключей которые мы удаляем из product_inf
    'OfferID', 'OfferId', 'id', 'unique_id', -- subj
    '_cached_clean_md5', '_cached_get_shopname', '_from_feed', 'additional_data', 'custom_phrases', 'bad_flags', 'merge_key', 'feed_lang', -- наши внутренниие поля
    'offer_line_md5', 'offerfilters', 'proj_current_lang', 'source_letter', 'dynamic_homonymy_words', 'product_type', 'turbo_url', -- наши внутренниие поля
    'category', 'categoryId', 'categoryName', 'market_category', 'minicategs', -- остается только categspath - путь по дереву в категориях клиента, возможно он тоже не нужен

    'main_mirror', 'main_mirror_id', 'orig_domain_id', -- оставляем только orig_domain
    'picture', 'images', -- c картинками разбираемся отдельно
    'available', -- bool нужно только для фильтров
    'manufacturer_warranty', -- bool не знаю про что и скорее всего не нужно
    'params_specformat', -- остается params отличие только в том что в params_specformat удалены размер и материал (возможно дублируют какие-то одноименные поля)
    'url' -- лендинг оффера
    );

$filter_feed_keys = ($y_dict) -> {
    $dict = Yson::ConvertToDict($y_dict);
    $items = DictItems($dict);
    $good_items = ListFilter($items, ($x) -> { RETURN $x.0 not in $good_bl_feed_keys });
    $good_dict = ToDict($good_items);
    return Yson::From($good_dict);
};


$bl_data =
    SELECT
        $filter_feed_keys(Yson::ParseJson(tao.product_inf)) as feed_inf,
        Yson::ParseJson(tao.product_inf)['images'] as images,
        bid_inf.BannerID as BannerID,
        bid_inf.avatars_orig_optimized_picture as avatars_orig_optimized_picture,
        bid_inf.adv_type as adv_type,
        tao.product_class as pt_class,
    FROM (
        SELECT
            ListReverse(String::SplitToList(TablePath(), '/'))[1] as pocket,
            product_inf,
            row_id,
            product_class,
        FROM RANGE('//home/bannerland/perf/make_banners/archive', $bl_date_start, '2100-01-01_00:00:00', 'tasks_and_offers.final')
    ) as tao
    INNER JOIN $bid_pocket_row_info as bid_inf
    USING(pocket, row_id);

$full_data =
    SELECT *
    FROM $bs_data as bs_data
    INNER JOIN $bl_data as tao_with_bid
    USING(BannerID);

-- удаляем дубли по producttype, BannerID
$data =
    SELECT t.*
    FROM ANY $full_data as t
    LEFT SEMI JOIN $full_data as tt
    USING(producttype, BannerID);

$data_with_score =
    SELECT
        t.*,
        ROW_NUMBER() OVER w AS score
    FROM $data as t
    WINDOW w AS (
        PARTITION BY adv_type
        ORDER BY RANDOM(producttype, BannerID, eventtime)
    );

$res_table = '//home/bannerland/data/perf_quality_stand/' || $date_to ;
INSERT INTO $res_table WITH TRUNCATE
SELECT *
FROM $data_with_score
ORDER BY score
LIMIT 10000;
