pragma yt.ParallelOperationsLimit = "4";
pragma yson.DisableStrict;
pragma yt.TmpFolder;
pragma AnsiInForEmptyOrNullableItemsCollections;
pragma library("stability_common.sql");
import stability_common symbols $dateRange;
pragma library("helpers.sql");
pragma yt.MaxJobCount = "99999";
import helpers symbols $parseCellsTypes, $parseIntList,
    $re_yandexuid, $parseIntList, $getOperator, $getQuality,
    $isRussia, $geoOk, $parseCellsTypes, $radioOk,
    $getWifiSsid, $get_true_first_buffer_durations,
    $sessionsExtractStalledInfo, $getHeartbeatsTimestamps,
    $sessionsProcessor, $getRadioData, $get_mm_data, $get_mm_data_new;

$date_from = $date;
$date_to = $date;
$input_dates = $dateRange($date_from, $date_to);
$output_root = "//home/videoquality/vh_analytics/mma-3568-tele2/reduced";

$APPMETRICA_LOCATION_THRESHOLD = "2020-07-26";

define action $process_date($date) as
$output_table = $output_root || "/" || $date;

define subquery $mm_source_old() as
select * from range(
    `logs/metrika-mobile-log/1d`, $date, $date
);
end define;

define subquery $appmetrica_log_subquery($root, $date) as
$table = $root || "/" || $date;
select DeviceID, cast(EventTimestamp as String) as EventTimestamp, ClientIP, cast(OperatorID as String) as OperatorID, NetworkType, IF(ConnectionType = 1, "CONN_WIFI", "CONN_CELL") as ConnectionType
from $table;
end define;


define subquery $mm_source_new() as
select * from range(
    `logs/appmetrica-location-log/1d`, $date, $date
)
union all
select * from $appmetrica_log_subquery("logs/appmetrica-events-log/appmetrica-yandex-events/1d", $date)
union all
select * from $appmetrica_log_subquery("logs/appmetrica-events-log/appmetrica-external-events/1d", $date)
union all
select * from $appmetrica_log_subquery("logs/appmetrica-events-log/browser-metrika-mobile-log/1d", $date)
union all
select * from $appmetrica_log_subquery("logs/appmetrica-events-log/navi-metrika-mobile-log/1d", $date)
union all
select * from $appmetrica_log_subquery("logs/appmetrica-events-log/taxi-metrika-mobile-log/1d", $date)
union all
select * from $appmetrica_log_subquery("logs/appmetrica-events-log/superapp-metrika-mobile-log/1d", $date)
end define;

-- $source_subquery = IF(
--     $date > $APPMETRICA_LOCATION_THRESHOLD,
--     $mm_source_new,
--     $mm_source_old
-- );

$source_subquery = EvaluateCode(IF(
    $date > $APPMETRICA_LOCATION_THRESHOLD,
    QuoteCode($mm_source_new),
    QuoteCode($mm_source_old)
));

$get_mm_data_subquery = EvaluateCode(IF(
    $date > $APPMETRICA_LOCATION_THRESHOLD,
    QuoteCode($get_mm_data_new),
    QuoteCode($get_mm_data)
));


$yandexuids = (
    select
        fielddate, yandexuid, vsid, $getOperator(some(provider)) as operator
    from range(
        `cubes/video-strm`, $date, $date, `sessions`
    )
    where os_family in ('iOS', 'Android') and country == 'RU'
    and $getOperator(provider) is not null
    and length(vsid) in (62, 64)
    and $re_yandexuid(yandexuid)
    group by yandexuid, vsid, fielddate
);

$sessions_map = (
    select
        fielddate,
        yandexuid,
        vsid,
        ip,
        `timestamp`,
        $getOperator(provider) as operator,
        $sessionsExtractStalledInfo(errors).all_stalleds as all_stalleds,
        $sessionsExtractStalledInfo(errors).first_buffer_duration as first_buffer_duration_sessions,
        $sessionsExtractStalledInfo(errors).first_buffer_rel_time + `timestamp` as first_buffer_timestamp_sessions,
        $getHeartbeatsTimestamps(heartbeats, `timestamp`) as heartbeats_timestamps,
        FIND(sources_aggr, "start") is not null and FIND(sources_aggr, "heartbeat") is null as refuse,
    from range(
        `cubes/video-strm`, $date, $date, `sessions`
    )
    where os_family in ('iOS', 'Android') and country == 'RU'
    and $getOperator(provider) is not null
    and length(vsid) in (62, 64)
    and $re_yandexuid(yandexuid)
);

$sessions_map_joined = (
    select
        s.*,
        g.stalledInitDuration as first_buffer_duration_gogol,
        g.`timestamp` as first_buffer_timestamp_gogol
    from $sessions_map as s
    left join any $get_true_first_buffer_durations($date, $date) as g
    using (fielddate, vsid)
);

$perf_map = (
    select
        TableName() as fielddate,
        "perf" as source_log,
        _logfeller_timestamp as `timestamp`,
        remote_ip as ip,
        CAST(responseEnd as Double) - cast(requestStart as Double) as duration,
        cast(transferSize as Double) as transferSize,
        Url::GetCGIParam(request, "vsid") as vsid,
        $getQuality(request) as quality,
        (
            CAST(transferSize as Double) / (
                CAST(responseEnd as Double) - cast(requestStart as Double)
            )
        ) * 8.0 as throughput
    from range(
        `logs/strm-perf-log/1d`, $date, $date
    )
    where length(Url::GetCGIParam(request, "vsid")) in (62, 64)
);

$perf_joined = (
    select
        p.*,
        yandexuid,
        operator
    from $perf_map as p
    inner join $yandexuids as y using (vsid)
);

$yandexuid_whitelist = select distinct yandexuid from $yandexuids;
$yandexuid_to_device_id = (
    select * from `//home/crypta/production/state/graph/v2/matching/by_id/yandexuid/direct/mm_device_id` as crypta
    left semi join $yandexuid_whitelist as wh on (crypta.id == wh.yandexuid)
);

$mm_joined = (
    select
        m.*,
        y.id as yandexuid
    from $get_mm_data_subquery($source_subquery) as m
    inner join $yandexuid_to_device_id as y on (y.target_id == m.DeviceID)
);

$processed = process $sessions_map_joined using $sessionsProcessor(TableRow());

-- insert into $output_table with truncate
$mapped = (
    select * from $perf_joined
    union all
    select * from $mm_joined
    union all
    select * from $processed
);
-- order by yandexuid, `timestamp`

$input_type = Struct<
    'CellID':Int64?,
    'CellType':String?,
    'ConnectionType':String?,
    'DeviceID':String?,
    'Lac':Int64?,
    'Latitude':Double?,
    'LocationPrecision':Uint64?,
    'LocationSource':String?,
    'Longitude':Double?,
    'NetworkType':String?,
    'OperatorID':String?,
    'OperatorName':String?,
    'SignalStrength':Int64?,
    'WifiSsid':String?,
    'Wifi_Ssids':List<String>?,
    'duration':Double?,
    'fielddate':String?,
    'location_timestamp':Uint64?,
    'operator':String?,
    'ip':String?,
    's2_p11':Uint64?,
    's2_p12':Uint64?,
    's2_p13':Uint64?,
    's2_p14':Uint64?,
    's2_p15':Uint64?,
    's2_p16':Uint64?,
    's2_p17':Uint64?,
    'quality':Uint64?,
    'source_log':String,
    'throughput':Double?,
    'timestamp':Int64?,
    'transferSize':Double?,
    'vsid':String?,
    'yandexuid':String?,
    'buffer_duration':Double?,
    'event':String?,
>;

$output_type = Struct<
    's2_p11':Uint64?,
    's2_p12':Uint64?,
    's2_p13':Uint64?,
    's2_p14':Uint64?,
    's2_p15':Uint64?,
    's2_p16':Uint64?,
    's2_p17':Uint64?,
    'LocationSource':String?,
    'throughput':Double?,
    'operator':String?,
    'NetworkType':String?,
    'CellID':Int64?,
    'OperatorID':String?,
    'CellType':String?,
    'Lac':Int64?,
    'SignalStrength':Int64?,
    'ip':String?,
    'yandexuid':String?,
    'quality':Uint64?,
    'transferSize':Double?,
    'DeviceID':String?,
    'DeviceIDVideo':String?,
    'start':Int64?,
    'refuse':Int64?,
    'first_buffer_throughput':Double?,
    'first_buffer_duration':Double?,
    'buffer_duration':Double?,
    'view_time':Double?,
    'timestamp':Int64?
>;

$reducer = Python::reducer(
    Callable<(String?, Stream<$input_type>)->Stream<$output_type>>,
    FileContent("new_algo_reducer.py")
);


$source = (
    select
        if(s2_p11 is null, null, unwrap(s2_p11)) as s2_p11,
        if(s2_p12 is null, null, unwrap(s2_p12)) as s2_p12,
        if(s2_p13 is null, null, unwrap(s2_p13)) as s2_p13,
        if(s2_p14 is null, null, unwrap(s2_p14)) as s2_p14,
        if(s2_p15 is null, null, unwrap(s2_p15)) as s2_p15,
        if(s2_p16 is null, null, unwrap(s2_p16)) as s2_p16,
        if(s2_p17 is null, null, unwrap(s2_p17)) as s2_p17,
        t.* without t.s2_p11, t.s2_p12, t.s2_p13, t.s2_p14, t.s2_p15, t.s2_p16, t.s2_p17
    from $mapped as t
    where yandexuid is not null
);

$reduced = (
    reduce $source
    presort `timestamp`
    on yandexuid
    using $reducer(TableRow())
);

insert into $output_table with truncate
select * from $reduced;
end define;

evaluate for $date in $input_dates
    do $process_date($date);
