use hahn;
pragma yson.DisableStrict;
pragma yt.Pool = "@[pool]";
pragma library("get_ugc_meta.sql");
import get_ugc_meta symbols $get_ugc_meta;
pragma library("stability_common.sql");
import stability_common symbols $remappedWrapRefFrom;
pragma yt.ParallelOperationsLimit = "4";
PRAGMA AnsiInForEmptyOrNullableItemsCollections;
PRAGMA File(
    "content_ids_mapping.tsv",
    'https://proxy.sandbox.yandex-team.ru/last/VH_TO_UGC_BLOGGERS_MAPPING?attrs={"released":"stable"}&salt=1593700687.52'
);

$check_type = "@[check_type]";
$output_root = "@[root]/";
$date_from = "@[date_from]";
$date_to = "@[date_to]";

$parseDate = ($x)->(DateTime::MakeDate(DateTime::Parse("%Y-%m-%d")($x)));
$formatDate = DateTime::Format("%Y-%m-%d");

$parseContentIdsMapping = ($mapping) -> {
    $mapping = String::SplitToList($mapping, "\n");
    $mapping = ListFilter($mapping, ($x)->(FIND($x, "\t") is not null));
    $mapping = ListMap($mapping, ($x)->(AsTuple(
        unwrap(String::SplitToList($x, "\t")[0]), unwrap(cast(String::SplitToList($x, "\t")[1] as UInt64))
    )));
    return ToDict($mapping)
};

$content_ids_mapping = $parseContentIdsMapping(FileContent("content_ids_mapping.tsv"));


$serviceEnum = ($x) -> (cast(case
when $x == "ugc" then 1
when $x == "zen_cinema" then 2
when $x == "districts" then 3
when $x == "ether-prod" then 4
when $x == "q-prestable" then 5
when $x == "districts-testing" then 6
when $x == "morda" then 7
when $x == "q" then 8
when $x == "market_front_white-production-production" then 9
when $x == "maps" then 10
when $x == "games" then 11
when $x == "testservice" then 12
when $x == "ugc-internal" then 13
when $x == "districts-unstable" then 14
when $x == "selfservice" then 15
when $x == "ug" then 16
when $x == "test" or $x like "test%" or $x == "" then 17
when $x == "testserviceaaaaaa" then 18
when $x == "vh_replica" then 19
when $x == "youtube-low" then 20
when $x == "uslugi" then 21
when $x == "youtube-high" then 22
when $x == "zen_publishers" then 23
when $x == "ugc-test" then 24
when $x == "live-test" then 25
when $x == "canvas" then 26
when $x == "zen_stories" then 27
when $x == "music" then 28
when $x == "games-prestable" then 29
when $x == "news" then 30
when $x == "muz" then 31
when $x == "zen_cinema-testing" then 32
when $x == "zen_publishers-testing" then 33
when $x == "zen_stories-testing" then 34
when $x == "pythia" then 35
when $x == "ott-content-api" then 36
when $x == "ott-dataset-sport-cg-generator" then 37
when $x == "geoadv" then 38
when $x == "geoadv-prestable" then 39
when $x == "vh-ugc-lkpo-music" then 40
when $x == "vh-ugc-partners" then 41
when $x == "vh-ugc-vconf-internal" then 42
when $x == "vh-ugc-ya-external" then 43
when $x == "vh-ugc-ya-internal" then 44
when $x == "superadmin" then 45
when $x == "telemost" then 46
when $x == "telemost-prestable" then 47
when $x == "market-socialecom" then 48
when $x == "market-socialecom-testing" then 49
else null
end as UInt8));

$dateRange = ($f, $t) -> {
    $f = $parseDate($f);
    $t = $parseDate($t);
    $diff = DateTime::ToDays($t - $f) + 1;
    return ListMap(
        ListFromRange(0, unwrap($diff)),
        ($x)->(
            $formatDate($f + DateTime::IntervalFromDays(cast($x as Int16)))
        )
    )
};

$input_dates = $dateRange($date_from, $date_to);

$ZEN_SERVICES = (
    "zen_cinema-testing",
    "zen_cinema",
    "zen_publishers-testing",
    "zen_publishers",
    "zen_stories-testing",
    "zen_stories",
    "news"
);

$check = EvaluateCode(IF(
    $check_type = "zen",
    QuoteCode(($x)->(($x ?? "") in $ZEN_SERVICES)),
    QuoteCode(($x)->(($x ?? "") not in $ZEN_SERVICES))
));

$ugc_meta_ = (
    select
        Ensure(
            $serviceEnum(service),
            $serviceEnum(service) is not null,
            "unknown service found in ugc base: " || (service ?? "")
        ) as service,
        t.* without service
    from $get_ugc_meta() as t
    where $check(service)
);

$ugc_meta = (
    select * from $ugc_meta_
    union all
    select cast(video_id as String) as video_content_id,
    t.* without t.video_content_id
    from $ugc_meta_ as t
);

$genderEnum = ($x)->(cast(case
when $x == "f" then 1
when $x == "m" then 2
else 0
end as Uint8));

$ageEnum = ($x)->(cast(case
when $x == "0_17" then 1
when $x == "18_24" then 2
when $x == "25_34" then 3
when $x == "35_44" then 4
when $x == "45_54" then 5
when $x == "55_99" then 6
else 0
end as Uint8));

$deviceEnum = ($x)->(cast(case
when $x == "desktop" then 1
when $x == "phone" or $x == "tablet" then 2
when $x == "tv" then 3
else 0
end as Uint8));

$osEnum = ($x)->(cast(case
when $x == "Android" then 1
when $x == "iOS" then 2
when $x == "MacOS" then 3
when $x == "Windows" then 4
else 0
end as Uint8));

$browserEnum = ($x)->(cast(case
when $x == "ChromeMobile" or $x == "Chrome" or $x == "GoogleSearch" then 1
when $x == "YandexBrowser" or $x == "YandexSearch" then 2
when $x == "MobileSafari" or $x == "Safari" then 3
when $x == "MobileFirefox" or $x == "Firefox" then 4
else 0
end as Uint8));

$viewSourceEnum = ($x)->(cast(case
when $remappedWrapRefFrom($x) == "VIDEO" then 1
when $remappedWrapRefFrom($x) == "EFIR" then 2
when $remappedWrapRefFrom($x) == "ZEN" then 3
when $remappedWrapRefFrom($x) == "ya-q" then 4
when $remappedWrapRefFrom($x) == "SERP" then 5
when $remappedWrapRefFrom($x) IN ("TVAPP", "STATION") then 6
when $x == "streamhandler_tv" then 7
ELSE 0
end as Uint8));

$fielddateFormat = DateTime::Format("%Y-%m-%d %H:%M:%S");
$getFielddate = ($ts) -> {
    $ts = CAST($ts as Uint32);
    $tm = AddTimezone(DateTime::FromSeconds($ts), "UTC");
    RETURN $fielddateFormat($tm)
};

define action $process_date($fielddate) as
$input_table = "//cubes/video-strm/" || $fielddate || "/sessions";
$output_table = $output_root || $fielddate;
$sessions_raw = (
    select
        fielddate,
        $getFielddate(`timestamp`) as create_time,
        view_time,
        shows_good,
        price,
        partner_price,
        vsid,
        yandexuid,
        $viewSourceEnum(ref_from) as view_source,
        $deviceEnum(device_type) as device_type,
        $browserEnum(browser_name) as browser_name,
        $osEnum(os_family) as os_family,
        $ageEnum(Yson::LookupString(exact_socdem, "age_segment")) as age_segment,
        $genderEnum(Yson::LookupString(exact_socdem, "gender")) as gender,
        region,
        `UUID`,
        video_content_id
    from $input_table
    where (yandexuid is not null or puid is not null)
    and (fraud is null or fraud == 0)
    and (
        sources_aggr like "%start%"
        or sources_aggr like "%heartbeat%"
        or shows_good > 0
        or price > 0
    )
);

$sessions_content_ids_replacement = (
    select
        case
        when `UUID` in $content_ids_mapping then cast(unwrap($content_ids_mapping[`UUID`]) as String)
        when video_content_id in $content_ids_mapping then cast(unwrap($content_ids_mapping[video_content_id]) as String)
        when `UUID` is not null then `UUID`
        else video_content_id
        end as video_content_id,
        s.* without `UUID`, video_content_id
    from $sessions_raw as s
);

$sessions = (
    select
        s.*,
        --
        video_id,
        u.channel_id as channel_id,
        owner_id,
        service
    from $sessions_content_ids_replacement as s
    inner join $ugc_meta as u using (video_content_id)
);

$grouped = (
    select
        create_time,
        vsid,
        video_id,
        max(view_source) as view_source,
        max(fielddate) as fielddate,
        max(channel_id) as channel_id,
        max(owner_id) as owner_id,
        max(yandexuid) as yandexuid,
        max(service) as service,
        max(region) as region,
        max(gender) as gender,
        max(age_segment) as age_segment,
        max(os_family) as os_family,
        max(browser_name) as browser_name,
        max(device_type) as device_type,
        sum(price) ?? 0 as price,
        sum(partner_price) ?? 0 as partner_price,
        sum(view_time) ?? 0 as view_time,
        sum(shows_good) ?? 0 as shows_good
    from $sessions
    group by create_time, vsid, video_id
);

insert into $output_table WITH TRUNCATE
select
    create_time as CreateTime,-- as CreateTime,
    owner_id as OwnerUID, -- UInt64,
    channel_id as ChannelID, -- UInt64,
    view_source as ViewSource, -- UInt8,
    video_id as VideoID, -- UInt64,
    cast(0 as Uint8) as EventType, -- UInt8,
    cast(view_time as Uint64) as ViewTime, -- UInt64,
    cast(shows_good as UInt16) as AdShows, -- UInt16,
    cast(price as UInt64) as Price, -- UInt64,
    cast(partner_price as UInt64) as PartnerPrice, -- UInt64,
    vsid as VideoSessionID, -- String,
    yandexuid as YandexUID, -- String,
    device_type as Device, -- UInt8,
    browser_name as Browser, -- UInt8,
    os_family as OSFamily, -- UInt8,
    gender as UserGender, -- UInt8,
    age_segment as UserAge, -- UInt8,
    service as Service, -- UInt8,
    cast(region as UInt16) as Region -- UInt16
from $grouped;
END DEFINE;

EVALUATE FOR $fielddate IN $input_dates
    DO $process_date($fielddate);
