use hahn;
pragma yson.DisableStrict;
pragma yt.Pool = "@[pool]";
pragma library("stability_common.sql");
import stability_common symbols $dateRange;
pragma yt.ParallelOperationsLimit = "4";
PRAGMA AnsiInForEmptyOrNullableItemsCollections;


$output_root = "@[root]/";
$date_from = "@[date_from]";
$date_to = "@[date_to]";

$dates = $dateRange($date_from, $date_to);

$iron_branch_table = "//home/videolog/strm_meta/iron_branch/concat";

$wrapFielddate = ($x)->{
    $dt = DateTime::MakeDatetime(DateTime::Parse("%Y-%m-%d %H:%M:%S %Z")($x || " 00:00:00 Europe/Moscow"));
    return DateTime::ToSeconds($dt)
};

$saTest = ($sa) -> ($sa like "%start%" or $sa like "%heartbeat%");

$nullToEmpty = ($x) -> (IF($x is null or $x == "novcid", "", $x));

$deviceType = ($x)->(CASE
WHEN $x == "phone" THEN 3
WHEN $x == "tablet" THEN 4
WHEN $x == "desktop" THEN 5
ELSE 0
END);

$hashToInt = Python::hash_to_int(
    Callable<(String?)->Int64?>, @@
def hash_to_int(s):
        try:
            return int(s, 16) % 2 ** 32
        except:
            return 0
@@
);

$getUniqid = ($yu, $yu_hash) -> {
    $result = IF(
        $yu is not null and $yu != "-",
        CAST($yu as Int64) ?? 0,
        $hashToInt($yu_hash) ?? 0
    );
    return IF(
        $result > 18446744073709551615,
        0,
        $result
    )
};

$getRegion = ($region, $country) -> {
    return IF(
        $country != "RU",
        Geo::RoundRegionById($region, "country").id ?? 10000,
        Geo::RoundRegionById($region, "region").id ?? 225
    )
};

$isView = ($pa, $sa) -> ($pa > 0 or $saTest($sa));

$getFromChain = ($chain, $field, $func, $index) -> {
    $chain = ListReverse(Yson::ConvertToList($chain));
    RETURN $func($chain[$index], $field)
};

$isHighlight = ($chain) -> {
    $chain = Yson::ConvertToList($chain);
    $highlights = ListFilter(
        $chain,
        ($x)->(
            Yson::LookupString($x, "content_type_id_name") == "vod-broadcast"
            and Yson::LookupString($x, "Name") == "highlights"
        )
    );
    return ListLength($highlights) > 0
};

define subquery $preprocess_data($date_from, $date_to) as
$iron_branch = (
    select
        JoinKey,
        `UUID` as ContentUUID,
        $getFromChain(chain, "ContentTypeID", Yson::LookupInt64, 0) ?? 0 as ContentTypeID,
        $getFromChain(chain, "ContentTypeID", Yson::LookupInt64, 1) ?? 0 as ParentTypeID,
        $getFromChain(chain, "UUID", Yson::LookupString, 1) ?? "" as ParentUUID,
    from $iron_branch_table
    where chain is not null and not $isHighlight(chain)
    union all
    select
        JoinKey,
        `UUID` as ContentUUID,
        48 as ContentTypeID,
        2 as ParentTypeID,
        cast(ugc_channel_id as String) as ParentUUID,
    from $iron_branch_table
    where ugc_channel_service = "news"
);

$sessions = (
    select
        $wrapFielddate(fielddate) as UpdateTime,
        $nullToEmpty(video_content_id) as VideoContentID,
        $nullToEmpty(vsid) as VideoSessionID,
        ref_from as Service,
        $nullToEmpty(ip) as IP,
        $nullToEmpty(os_family) as DetailedDeviceType,
        $nullToEmpty(browser_name) as BrowserName,
        $nullToEmpty(user_agent) as UserAgent,
        CAST(page_id as Int64) ?? 0 as PageID,
        $deviceType(device_type) as DeviceType,
        CAST(category_id as Int64) ?? 0 as VideoCategoryID,
        `timestamp` as EventTime,
        $getUniqid(yandexuid, device_id) as UniqID,
        price ?? 0 as Price,
        partner_price ?? 0 as PartnerPrice,
        $getRegion(CAST(region as Int32), country) as RegionID,
        IF($isView(view_time_player_alive, sources_aggr), 1, 0) as IsView,
        view_time as Duration,
        hits_good ?? 0 as Hits,
        shows_good ?? 0 as ShownHits,
        Yson::LookupString(exact_socdem, "age_segment") ?? "" as UserAgeSegment,
        Yson::LookupString(exact_socdem, "gender") ?? "" as UserGender,
    from range(
        `cubes/video-strm`, $date_from, $date_to, `sessions`
    )
    where
    (
        $isView(view_time_player_alive, sources_aggr)
        or price > 0
        or partner_price > 0
    )
    and $getUniqid(yandexuid, device_id) is not null
);

$joined = (
    select s.*,
        ContentUUID,
        ContentTypeID,
        ParentTypeID,
        ParentUUID
    from $sessions as s
    inner join any $iron_branch as i on (s.VideoContentID == i.JoinKey)
);

$grouped1 = (
    select
        UpdateTime,
        ParentUUID,
        ContentUUID,
        PageID,
        ParentTypeID,
        ContentTypeID,
        VideoSessionID,
        max(VideoContentID) as VideoContentID,
        max(IsView) as IsView,
        max(UniqID) as UniqID,
        max(Service) as Service,
        max(RegionID) as RegionID,
        max(UserGender) as UserGender,
        max(UserAgeSegment) as UserAgeSegment,
        max(IP) as IP,
        max(UserAgent) as UserAgent,
        max(DeviceType) as DeviceType,
        max(DetailedDeviceType) as DetailedDeviceType,
        max(BrowserName) as BrowserName,
        max(VideoCategoryID) as VideoCategoryID,
        sum(Price) as Price,
        sum(PartnerPrice) as PartnerPrice,
        sum(Duration) as Duration,
        sum(Hits) as Hits,
        sum(ShownHits) as ShownHits,
    from $joined
    group by
        UpdateTime,
        ParentUUID,
        ContentUUID,
        PageID,
        ParentTypeID,
        ContentTypeID,
        VideoSessionID
);

$grouped2 = (
    select
        UpdateTime,
        PageID,
        UniqID,
        ParentUUID,
        ContentUUID,
        VideoCategoryID,
        ParentTypeID,
        ContentTypeID,
        BrowserName,
        DeviceType,
        DetailedDeviceType,
        UserGender,
        UserAgeSegment,
        RegionID,
        Service,
        max(VideoSessionID) as VideoSessionID,
        max(VideoContentID) as VideoContentID,
        sum(IsView) as IsView,
        max(IP) as IP,
        max(UserAgent) as UserAgent,
        sum(Price) as Price,
        sum(PartnerPrice) as PartnerPrice,
        sum(Duration) as Duration,
        sum(Hits) as Hits,
        sum(ShownHits) as ShownHits,
    from $grouped1
    group by
        UpdateTime,
        PageID,
        UniqID,
        ParentUUID,
        ContentUUID,
        VideoCategoryID,
        ParentTypeID,
        ContentTypeID,
        BrowserName,
        DeviceType,
        DetailedDeviceType,
        UserGender,
        UserAgeSegment,
        RegionID,
        Service
);
select * from $grouped2;
end define;

$preprocessed = select * from $preprocess_data($date_from, $date_to);

define action $insert_by_day($date) as
$output_table = $output_root || $date;
insert into $output_table WITH TRUNCATE 
select * from $preprocessed where UpdateTime == $wrapFielddate($date);
end define;

evaluate for $date in $dates
    do $insert_by_day($date);