use hahn;
pragma yt.PoolTrees = "physical";
pragma yt.DefaultOperationWeight = "@[weight]";
pragma library("common.sql");
-- pragma yt.UseDefaultTentativePoolTrees;
pragma yt.Pool = "@[pool]";
pragma yson.DisableStrict;
pragma AnsiInForEmptyOrNullableItemsCollections;
pragma DqEngine = "disable";
PRAGMA yt.PublishedCompressionCodec = 'zstd_8';
PRAGMA yt.MinPublishedAvgChunkSize = '8G';
PRAGMA yt.PublishedErasureCodec = 'lrc_12_2_2';
PRAGMA yt.PublishedAutoMerge = "economy";
PRAGMA File(
    "content_ids_mapping.tsv",
    'https://proxy.sandbox.yandex-team.ru/last/VH_TO_UGC_BLOGGERS_MAPPING?attrs={"released":"stable"}&salt=1593700687.52'
);

-- pragma yt.DefaultMemoryLimit = "3G";
pragma yt.InferSchema;
pragma yt.IgnoreWeakSchema;
IMPORT common SYMBOLS $get_raw_icookie_buckets, $aggregate_test_buckets, $re_yandexuid, $undefWrapper, $processZenGroupIds;

$week_ago = "@[week_ago]";
$date = "@[date]";

$strm_map_table = "@[root]/@[date]/strm_map";
$redir_map_table = "@[root]/@[date]/redir_map";
$jstracer_map_table = "@[root]/@[date]/jstracer_map";
$androidvsids_table = "@[root]/@[date]/androidvsids";
$rtbdsp_map_table = "@[root]/@[date]/money_map";

$iron_branch_table = "//home/videolog/strm_meta/iron_branch/concat";
$crypta_table = "//home/crypta/production/profiles/export/profiles_for_14days";
-- $efir_history_table = "//home/videolog/24julia/mma-2177/1.efir_history";

$subscriptions_table = "//home/msdata/user-profiles/v1/@[date]";

-- $antifraud_table = "//home/antifraud/export/videohosting/views/@[date]";

$access_log_table = "//logs/yandex-access-log/1d/@[date]";
$morda_access_log_table = "//logs/morda-access-log/1d/@[date]";
$news_access_log_table = "//logs/news-scarab-access-log/1d/@[date]";
-- $answers_test_buckets_table = "//home/answers/yuid_testids/@[date]";
$zen_events_table = "//logs/zen-events-log/1d/@[date]";
$gogol_table = "//logs/strm-gogol-log/1d/@[date]";
$hit_log_table = "//logs/bs-hit-log/1d/@[date]";
/*alice
$alice_buckets_table = "//home/alice/dialog/prepared_logs_expboxes/@[date]";
alice*/
$strm_access_log_table = "//logs/strm-access-log/1d/@[date]";
$apphost_table = "//logs/vh-apphost-logs/1d/@[date]";

$fielddate = "@[date]";

$output_table = "@[root]/@[date]/sessions";

$player_alive_state_type = Struct<
    'capHeight':Int64?,
    'capWidth':Int64?,
    'height':Int64?,
    'is_ad':Bool,
    'isVisible':Bool?,
    'isMuted':Bool?,
    'maxHeight':Int64?,
    'maxWidth':Int64?,
    'stalledCount':Int64?,
    'stalledTime':Double?,
    'state':String?,
    'timestamp':Uint64?,
    'watchedTime':Double?,
    'width':Int64?
>?;

$microsessions_reducer = Python::microsessions_reducer(
Callable<(String?,
 Stream<Struct<'a_station':String?,
'add_info':Yson?,
'bidreqid':String?,
'browser':String?,
'browser_name':String?,
'browser_version':String?,
'bytes_sent':UInt64?,
'category_id':String?,
'channel_id':String?,
'channel_old':String?,
'country':String?,
'device_type':String?,
'device_id':String?,
'device_uuid':String?,
'error_id':String?,
'error_id_raw':String?,
'extension':String?,
'event':String?,
'fielddate':String?,
'gogol_service':String?,
'icookie':String?,
'imp_id':String?,
'is_kal':String?,
'is_dhd':Bool?,
'ip':String?,
'non_muted': Bool?,
'os_family':String?,
'page_id':String?,
'parsed_player_state':$player_alive_state_type,
'player_version':String?,
'puid':String?,
'provider':String?,
'redir_licence':String?,
'ref_from':String?,
'ref_from_block':String?,
'region':Int64?,
'reqid':String?,
'request_ts':Int64?,
'resolution':String?,
'source':String,
'stream_block':String?,
'testIds':String?,
'timestamp':Int64?,
'time_on_seekbar':Double?,
'tvandroid_data':Json?,
'user_agent':String?,
'user_id':String?,
'utm_data':Yson?,
'video_content_id':String?,
'view_type':String?,
'view_type_player':String?,
'vsid':String?,
'yandexuid':String?,
'yu_hash':String?,
'strongest_id': String?,
'item_id': Int64?,
'rid': String?,
'ppi': String?,
>>) ->
Stream<Struct<'ad_events':Yson?,
'ad_tracking_events':Yson?,
'add_info':Yson?,
'avglogs':Double?,
'browser_name':String?,
'browser_version':String?,
'bytes_sent':UInt64?,
'category_id':String?,
'channel_id':String?,
'channel_old':String?,
'chunks_types':String?,
'country':String?,
'device_type':String?,
'device_id':String?,
'device_uuid':String?,
'errors':Yson?,
'gogol_service':String?,
'gogol_test_buckets':String?,
'heartbeats':Yson?,
'hits_block_good':Int64?,
'hits_good':Int64?,
'icookie':String?,
'ip':String?,
'is_kal':String?,
'is_dhd':String?,
'os_family':String?,
'page_id':String?,
'partner_price':Int64?,
'player_alive_data':Yson?,
'player_events':Yson?,
'player_version':String?,
'playlists_types':String?,
'price':Int64?,
'provider':String?,
'puid':String?,
'ref_from':String?,
'ref_from_block':String?,
'region':Int64?,
'reqid':String?,
'shows_block_good':Int64?,
'shows_good':Int64?,
'sources_aggr':String,
'stream_block':String?,
'tcpinfo_total_retrans':Int64?,
'timestamp':Int64?,
'times_on_seekbar':List<Int64>?,
'user_agent':String?,
'user_id':String?,
'video_content_id':String?,
'view_time':Int64?,
'view_time_non_muted':Int64?,
'view_time_player_alive':Double?,
'view_time_player_alive_non_muted':Double?,
'view_time_player_alive_visible_non_muted':Double?,
'vsid':String?,
'winhits_block_good':Int64?,
'winhits_good':Int64?,
'yandexuid':String?,
'yu_hash':String?,
'zen_data':Yson?,
>>>,
FileContent("microsessions_reducer.py")
);

--@[preprocessed]

pragma yt.DataSizePerJob = "2G";

$sessions_reduce_input = (
    select
        `timestamp`,
        a_station,
        add_info,
        browser_name,
        browser_version,
        bytes_sent,
        category_id,
        channel_id,
        channel_old,
        country,
        device_type,
        device_id,
        device_uuid,
        error_id,
        error_id_raw,
        event,
        extension,
        gogol_service,
        is_kal,
        is_dhd,
        ip,
        non_muted,
        os_family,
        page_id,
        parsed_player_state,
        player_version,
        provider,
        puid,
        redir_licence,
        ref_from,
        ref_from_block,
        region,
        reqid,
        resolution,
        source,
        stream_block,
        testIds,
        time_on_seekbar,
        tvandroid_data,
        user_agent,
        user_id,
        utm_data,
        video_content_id,
        vsid,
        video_type as view_type_player,
        yandexuid,
        yu_hash,
        xYandexICookie ?? icookie as icookie,
        strongest_id,
        item_id,
        rid,
        ppi
    from $preprocessed
    where vsid != ""
);

pragma yt.DataSizePerJob = "512M";

$sessions_pre = (
    reduce $sessions_reduce_input
    PRESORT `timestamp`, video_content_id, parsed_player_state.`timestamp`
    ON vsid
    using $microsessions_reducer(TableRow())
);

pragma yt.DataSizePerJob = "1G";

$getChainData = ($chain, $ugc_owner_id) -> {
    $chain = Yson::ConvertToList($chain);
    $reversed = ListReverse($chain);
    RETURN AsStruct(
        Yson::LookupUint64($reversed[0], "ContentTypeID") as ContentTypeID,
        Yson::LookupUint64($reversed[1], "ContentTypeID") as ParentTypeID,
        IF(
            $ugc_owner_id is not null,
            CAST($ugc_owner_id as String),
            Yson::LookupString($reversed[1], "UUID")
        ) as ParentUUID,
    )
};

$iron_branch = (
    select
        heur_category,
        license,
        duration as content_duration,
        `UUID`,
        computed_channel as channel,
        computed_program as program,
        IF(
            start_time is not null and finish_time is not null,
            AsTuple(unwrap(start_time), unwrap(finish_time)),
            null
        ) as timetuple,
        JoinKey,
        $getChainData(chain, ugc_owner_id).ContentTypeID as ContentTypeID,
        $getChainData(chain, ugc_owner_id).ParentUUID as ParentUUID,
        $getChainData(chain, ugc_owner_id).ParentTypeID as ParentTypeID,
    from $iron_branch_table
);

$generate_vcid = Python::generate_vcid(@@#py
import binascii
from base64 import urlsafe_b64encode, urlsafe_b64decode
from yql.typing import *


def generate_vcid(video_meta_id: Optional[Uint64]) -> Optional[String]:
    encoded_meta_id = urlsafe_b64encode(video_meta_id.to_bytes(8, "little"))
    video_url_id = encoded_meta_id.decode().strip("=")
    video_url_id = f"v{video_url_id}"
    return video_url_id
@@);

$parseContentIdsMapping = ($mapping) -> {
    $mapping = String::SplitToList($mapping, "\n");
    $mapping = ListFilter($mapping, ($x)->(FIND($x, "\t") is not null));
    $mapping = ListMap($mapping, ($x)->(AsTuple(
        unwrap(String::SplitToList($x, "\t")[0]), unwrap($generate_vcid(cast(String::SplitToList($x, "\t")[1] as UInt64)))
    )));
    return ToDict($mapping)
};

$content_ids_mapping = $parseContentIdsMapping(FileContent("content_ids_mapping.tsv"));

$video_content_id_null = select * from $sessions_pre where video_content_id is null;
$video_content_id_non_null = (
    select
        IF(
            DictContains($content_ids_mapping, video_content_id),
            unwrap($content_ids_mapping[video_content_id]),
            video_content_id
        ) as video_content_id_,
        s.*
    from $sessions_pre as s where video_content_id is not null and length(video_content_id) <= 1000
);

insert into @video_content_id_non_null
select * from $video_content_id_non_null
order by video_content_id_;
commit;

$joined_with_iron_branch = (
    SELECT
        `UUID`,
        heur_category,
        license,
        content_duration,
        channel,
        timetuple,
        program,
        ContentTypeID,
        ParentUUID,
        ParentTypeID,
        s.* without s.video_content_id_
    from @video_content_id_non_null as s
    left join any $iron_branch as i on (s.video_content_id_ == i.JoinKey)
);

$after_iron_branch = (select * from $joined_with_iron_branch union all select * from $video_content_id_null);

define subquery $raw_icookie_buckets() as 
select * from $get_raw_icookie_buckets(
    $access_log_table,
    $news_access_log_table,
    $morda_access_log_table,
    $zen_events_table,
    $hit_log_table,
    $apphost_table,
    $gogol_table
)
/*alice
union all
select
    unwrap(icookie) as user_id,
    expboxes as test_buckets
from $alice_buckets_table
where expboxes is not null and $undefWrapper(icookie) is not null
alice*/
;end define;

define subquery $test_buckets_icookie() as
select * from $aggregate_test_buckets($raw_icookie_buckets);
end define;

define subquery $raw_yandexuid_buckets() as
select
    cookie_yandexuid as user_id, exp as test_buckets
from $strm_access_log_table
where cookie_yandexuid is not null and $re_yandexuid(cookie_yandexuid) and $undefWrapper(exp) is not null
;end define;

define subquery $test_buckets_yandexuid() as
select * from $aggregate_test_buckets($raw_yandexuid_buckets);
end define;

-- $antifraud = (
--     select * from $antifraud_table where `uid` is not null and `uid` != ""
-- );

$with_puid = (
    select * from $after_iron_branch where puid is not null
);

$without_puid = (
    select * from $after_iron_branch where puid is null
);

$subscriptions_by_puid = (
    select puid, bundle
    from $subscriptions_table
    where `state` = "active"
);

$add_subscriptions_puid = (
    select s.*, bundle as user_license
    from $with_puid as s
    left join any $subscriptions_by_puid as p using (puid)
);

$after_subscriptions = (
    select * from $add_subscriptions_puid
    union all
    select * from $without_puid
);

$getViewType = ($is_kal, $timetuple, $timestamp) -> {
    $sp = String::SplitToList($is_kal, ",");
    $kal = "kal" in $sp or "live" in $sp or "both" in $sp;
    RETURN CASE
    WHEN $kal AND $timetuple IS NULL THEN "live"
    WHEN $kal AND ($timetuple.0 <= $timestamp) AND ($timestamp <= $timetuple.1) THEN "live"
    WHEN $kal THEN "dvr"
    ELSE "vod"
    END
};

$prism_segments = (
    SELECT 
        yandexuid,
        MAX_BY(prism_segment, tn) AS prism_segment
    FROM (
        SELECT 
            yandexuid,
            prism_segment,
            TableName() AS tn
        FROM RANGE(`home/prism/user_weights`, $week_ago, $date)
    )
    GROUP BY yandexuid
);


$crypta = (
    select
        age_segments,
        user_age_6s,
        gender,
        income_segments,
        exact_socdem,
        cast(yandexuid as String) as yandexuid,
    from $crypta_table
);

$yandexuid_null = select * from $after_subscriptions where yandexuid is null;
$yandexuid_non_null = select * from $after_subscriptions where yandexuid is not null;

insert into @crypta with truncate
select * from $crypta
order by yandexuid;

insert into @yandexuid_non_null with truncate
select * from $yandexuid_non_null
order by yandexuid;

insert into @prism_segments
select * from $prism_segments
order by yandexuid;

insert into @test_buckets_yandexuid
select * from $test_buckets_yandexuid()
order by user_id;

insert into @test_buckets_icookie
select * from $test_buckets_icookie()
order by user_id;

commit;

$joined_on_yandexuid = (
    select
        s.*,
        test_buckets as test_buckets_yandexuid,
        prism_segment,
        age_segments,
        user_age_6s,
        gender,
        income_segments,
        exact_socdem,
        null as fraud,
    from @yandexuid_non_null as s
    left join any @test_buckets_yandexuid as t on (s.yandexuid == t.user_id)
    left join any @prism_segments as p on (s.yandexuid == p.yandexuid)
    left join any @crypta as c on (s.yandexuid == c.yandexuid)
    -- left join any $antifraud as af on (s.yandexuid == af.uid)
);


$after_yandexuid = (
    select * from $joined_on_yandexuid
    union all
    select * from $yandexuid_null
);

$checkZenData = ($zen_data) -> (
    Yson::LookupString($zen_data, "strongest_id") is not null
    and Yson::LookupInt64($zen_data, "item_id") is not null
    and Yson::LookupString($zen_data, "rid") is not null
);

$zen_null = select * from $after_yandexuid where not $checkZenData(zen_data);
$zen_non_null = (
    select s.*,
        Yson::LookupString(zen_data, "strongest_id") as strongest_id,
        Yson::LookupInt64(zen_data, "item_id") as item_id,
        Yson::LookupString(zen_data, "rid") as rid
    from $after_yandexuid as s where $checkZenData(zen_data)
);

$filter_empty = ($lst) -> (ListNotNull(ListFilter($lst, ($x)->($x != ""))));

$merge_single_test_buckets = ($tb1) -> {
    $tb_dict = ToDict(ListMap(
        $tb1, ($x)->(AsTuple(
            unwrap(String::SplitToList($x, ",")[0]),
            $x
        ))
    ));
    $merged_list = ListSort(ListUniq(
        $filter_empty(DictPayloads($tb_dict) ?? ListCreate(String)),
    ));
    return IF(
        ListLength($merged_list) > 0,
        String::JoinFromList($merged_list, ";"),
        null
    )
};

$AG_FL = AggregateFlatten(AggregationFactory("AGGREGATE_LIST_DISTINCT"));

$get_zen_test_buckets = ($yandexuid, $group_ids, $external_client_exps) -> {
    $yandexuid = CAST($yandexuid as String);
    $group_ids_buckets = $processZenGroupIds($yandexuid, $group_ids);
    $final_list = ListExtend($group_ids_buckets ?? ListCreate(String), String::SplitToList($external_client_exps, ";") ?? ListCreate(String));
    RETURN ListFilter($final_list, ($x)->($undefWrapper($x) is not null))
};

$zen_test_buckets_map = (
    SELECT 
        strongest_id,
        item_id,
        rid,
        $get_zen_test_buckets(strongest_id, group_ids, external_client_exps) as test_bucket
    from $zen_events_table
    where $get_zen_test_buckets(strongest_id, group_ids, external_client_exps) is not null
);

$zen_test_buckets = (
    select
        strongest_id,
        item_id,
        rid,
        $merge_single_test_buckets(AggregateBy(test_bucket, $AG_FL)) as test_buckets
    from $zen_test_buckets_map
    group by strongest_id,
        item_id,
        rid
);

$joined_on_zen = (
    select
        t.test_buckets as test_buckets_zen,
        z.* without z.strongest_id, z.item_id, z.rid
    from $zen_non_null as z
    left join any $zen_test_buckets as t using (
        strongest_id, item_id, rid
    )
);

$after_zen = (
    select * from $joined_on_zen
    union all
    select * from $zen_null
);

$icookie_null = select * from $after_zen where icookie is null;
$icookie_non_null = select * from $after_zen where icookie is not null;

insert into @icookie_non_null WITH TRUNCATE 
select * from $icookie_non_null
order by icookie;
commit;

$tb_to_dict = ($tb) -> {
    $sp = String::SplitToList($tb, ";");
    return ToDict(ListMap(
        $sp, ($x)->(AsTuple(
            unwrap(String::SplitToList($x, ",")[0]),
            $x
        ))
    ))
};

$empty_dict = ToDict(ListMap(ListCreate(String), ($x)->(AsTuple($x, $x))));

$dict_union = ($dct1, $dct2) -> (SetUnion($dct1, $dct2, ($_, $a, $b) -> ($a ?? $b)));

$merge_test_buckets = ($tb1, $tb2, $tb3, $tb4) -> {
    $dict1 = $tb_to_dict($tb1);
    $dict2 = $tb_to_dict($tb2);
    $dict3 = $tb_to_dict($tb3);
    $dict4 = $tb_to_dict($tb4);
    $tb_dict = $dict_union($dict1, $dict2);
    $tb_dict = $dict_union($tb_dict, $dict3);
    $tb_dict = $dict_union($tb_dict, $dict4);
    $merged_list = ListSort(ListUniq(
        $filter_empty(DictPayloads($tb_dict) ?? ListCreate(String)),
    ));
    return IF(
        ListLength($merged_list) > 0,
        String::JoinFromList($merged_list, ";"),
        null
    )
};

$joined_on_icookie = (
    select
        s.*,
        test_buckets as test_buckets_icookie
    from @icookie_non_null as s
    left join any @test_buckets_icookie as t on (s.icookie = t.user_id)
);

$after_icookie = (
    select * from $joined_on_icookie
    union all
    select * from $icookie_null
);

$sessions = (
    select
        $fielddate as fielddate,
        $getViewType(is_kal, timetuple, `timestamp`) as view_type,
        $merge_test_buckets(
            gogol_test_buckets,
            test_buckets_icookie,
            test_buckets_yandexuid,
            test_buckets_zen
        ) as test_buckets,
        s.* without s.test_buckets_yandexuid, s.test_buckets_icookie, s.gogol_test_buckets, s.test_buckets_zen
    from $after_icookie as s
);

INSERT INTO $output_table WITH TRUNCATE 
SELECT * FROM $sessions;
