use hahn;
pragma yson.DisableStrict;
pragma AnsiInForEmptyOrNullableItemsCollections;

$parseCellsTypes = ($string) -> {
    $len = Length($string);
    RETURN String::SplitToList(SUBSTRING($string, 1, cast($len - 2 as UInt32)), ",")
};

$parseIntList = ($string)->(Yson::ConvertToInt64List(Yson::ParseJson($string)));

$re_yandexuid = Re2::Match("[0-9]+1[23456][0-9]{8}");

$getOperator = ($provider) -> {
    return case
    when $provider == "beeline" or $provider == "vimpelcom" then "beeline"
    when $provider == "tele2 russia" or $provider == "t2 mobile llc" then "tele2"
    when $provider == "pjsc megafon" then "megafon"
    when $provider == "mts pjsc" then "mts"
    when $provider == "yota" then "yota"
    else null
    end
};

$parseSignedUrl = ($url) -> {
    $url = SUBSTRING($url, 0, FIND($url, "?"));
    $sp = ListFilter(String::SplitToList($url, "/"), ($x) -> {RETURN FIND($x, "=") IS NOT NULL});
    RETURN ToDict(ListMap($sp, ($x) -> {
        RETURN AsTuple(
            unwrap(String::SplitToList($x, "=")[0]),
            unwrap(String::SplitToList($x, "=")[1])
        )
    }));
};

$re_audio_chunk = Re2::Grep("-a[0-9].+(ts|m4s)");
$re_video_chunk = Re2::Grep("-v[0-9].+(ts|m4s)");

$getQuality = ($request) -> {
    $parsedSigned = $parseSignedUrl($request);
    $result = CASE
    WHEN $parsedSigned["res"] is not null then unwrap($parsedSigned["res"])
    WHEN String::Contains($request, "720p") THEN "720"
    WHEN String::Contains($request, "1080p") THEN "1080"
    WHEN String::Contains($request, "360p") THEN "360"
    WHEN String::Contains($request, "540p") THEN "540"
    WHEN String::Contains($request, "480p") THEN "480"
    WHEN String::Contains($request, "240p") THEN "240"
    WHEN String::Contains($request, "576p") THEN "576"
    WHEN String::Contains($request, "432p") THEN "432"
    WHEN String::Contains($request, "408p") THEN "408"
    WHEN String::Contains($request, "504p") THEN "504"
    WHEN String::Contains($request, "270p") THEN "270"
    WHEN String::Contains($request, "600p") THEN "600"
    WHEN String::Contains($request, "144p") THEN "144"
    ELSE NULL
    END;
    return cast($result as UInt64)
};

$isRussia = ($lat, $lon) -> {
    return
        (40.0 <= $lat and $lat <= 82.0)
        and ((19.0 <= $lon and $lon <= 180.0) or (-180.0 <= $lon and $lon <= -168.0))
};

$geoOk = ($row) -> {
    return cast($row.LocationPrecision as Uint64) <= 750
    and $isRussia(cast($row.Latitude as Double), cast($row.Longitude as Double))
};

$parseCellsTypes = ($string) -> {
    $len = Length($string);
    RETURN String::SplitToList(SUBSTRING($string, 1, cast($len - 2 as UInt32)), ",")
};

$parseIntList = ($string)->(Yson::ConvertToInt64List(Yson::ParseJson($string)));

$getRadioData = ($row) -> {
-- Cells_SignalsStrengths,
--     Cells_CellsIDs,
--     Cells_Lacs,
--     Cells_AreConnected,
--     Cells_OperatorsIDs,
--     Cells_Types
    $Cells_AreConnected = $parseIntList($row.Cells_AreConnected);
    $Cells_CellsIDs = $parseIntList($row.Cells_CellsIDs);
    $Cells_Lacs = $parseIntList($row.Cells_Lacs);
    $Cells_SignalsStrengths = $parseIntList($row.Cells_SignalsStrengths);
    $Cells_Types = $parseCellsTypes($row.Cells_Types);
    $Cells_OperatorsIDs = $parseIntList($row.Cells_OperatorsIDs);
    $Cells_CountriesCodes = $parseIntList($row.Cells_CountriesCodes);
    $zipped = ListZip(
        $Cells_AreConnected,
        $Cells_CellsIDs,
        $Cells_Lacs,
        $Cells_SignalsStrengths,
        $Cells_Types,
        $Cells_OperatorsIDs,
        $Cells_CountriesCodes
    );
    $connected = ListFilter($zipped, ($x)->($x.0 == 1));
    RETURN IF(
        ListLength($connected) == 1
        and $connected[0].1 != 0
        and $connected[0].2 != 0
        -- and cast($connected[0].5 as String) == $row.OperatorID
        and -110.0 <= $connected[0].3
        and $connected[0].3 <= 0.0
        and $connected[0].6 == 250,
        AsStruct(
            $connected[0].1 as CellID,
            $connected[0].2 as Lac,
            $connected[0].3 as SignalStrength,
            $connected[0].4 as CellType,
            $connected[0].5 as OperatorID,
        ),
        NULL
    )
};

$radioOk = ($row) -> {
    return
    case
    when $getRadioData($row) is null then false
    when ListLength(ListUniq($parseIntList($row.Cells_SignalsStrengths))) == 1 then false
    else true
    end
};

$getWifiSsid = ($row) -> {
    $ssids = ListZip(
        Yson::ConvertToStringList(Yson::ParseJson($row.Wifi_Ssids)),
        Yson::ConvertToInt64List(Yson::ParseJson($row.Wifi_AreConnected))
    );
    $ones = ListFilter($ssids, ($x)->($x.1 == 1));
    return if(
        listlength($ones) == 1,
        unwrap($ones[0].0),
        null
    )
};

$get_coord_pair = Python::get_coord_pair(
    Callable<(Uint64?)->List<Double?>?>,
    FileContent("sphere_v2.py")
);

$getRRInner = ($lat, $lon) -> {
    $r_id = Geo::RegionByLocation($lat, $lon).id;
    RETURN CASE
    WHEN ListHas(Geo::GetParents($r_id), 20358) THEN "Москва, ВАО"
    WHEN ListHas(Geo::GetParents($r_id), 11119) THEN "Татарстан"
    WHEN ListHas(Geo::GetParents($r_id), 10174) THEN "Санкт-Петербург и Ленинградская область" 
    ELSE NULL
    END
};

$getRR = ($s2_value) -> {
    $coord_pair = $get_coord_pair($s2_value);
    RETURN IF(
        $coord_pair is null,
        null,
        $getRRInner(unwrap($coord_pair[0]), unwrap($coord_pair[1]))
    )
};

$stalled = ($errors) -> {
    $errors = Yson::ConvertToList($errors);
    $init = ListFilter($errors, ($x) -> {RETURN Yson::LookupString($x, "id") == "Stalled_Init"});
    $rel_time = IF(ListLength($init) > 0, Yson::LookupUint64(unwrap($init[0]), "rel_time"), NULL);
    $init = ListReverse(ListTake($init, 3))[0];
    $first_stalled_duration = Yson::ConvertToInt64(Yson::YPath($init, "/details/stalledDuration")) ?? 0;
    $all_stalleds = ListFlatMap($errors, ($x) -> {
        RETURN IF(
            Yson::LookupString($x, "id_raw") == "Stalled",
            Yson::ConvertToInt64(Yson::YPath($x, "/details/stalledDuration")) ?? 0,
            NULL
        )
    });
    RETURN AsStruct(
        $first_stalled_duration as first_buffer_duration,
        ListLength($all_stalleds) as total_buffer_count,
        ListSum($all_stalleds) as total_buffer_duration,
        $rel_time as first_buffer_rel_time
    )
};

$sessionsExtractStalledInfo = ($errors) -> {
    $errors = Yson::ConvertToList($errors);
    $init = ListFilter($errors, ($x) -> {RETURN Yson::LookupString($x, "id") == "Stalled_Init"});
    $rel_time = IF(ListLength($init) > 0, Yson::LookupUint64(unwrap($init[0]), "rel_time"), NULL);
    $init = ListReverse(ListTake($init, 3))[0];
    $first_stalled_duration = Yson::ConvertToInt64(Yson::YPath($init, "/details/stalledDuration")) ?? 0;
    $all_stalleds = ListFlatMap($errors, ($x) -> {
        RETURN IF(
            Yson::LookupString($x, "id_raw") == "Stalled",
            AsTuple(unwrap(Yson::ConvertToInt64(Yson::YPath($x, "/details/stalledDuration")) ?? 0), unwrap(Yson::LookupUint64($x, "rel_time"))),
            NULL
        )
    });
    RETURN AsStruct(
        $first_stalled_duration as first_buffer_duration,
        $rel_time as first_buffer_rel_time,
        $all_stalleds as all_stalleds
    )
};

define subquery $get_true_first_buffer_durations($date_from, $date_to) as
select
    TableName() as fielddate,
    vsid,
    Yson::ConvertToDouble(Yson::YPath(Yson::ParseJson(data), "/data/stalledDuration")) as stalledInitDuration,
    CAST(clientTimestamp / 1000 as Int64) as `timestamp`
from range(
    `logs/strm-gogol-log/1d`, $date_from, $date_to
)
where service in ('StreamPlayer', 'AndroidPlayer', 'ott-smart') and eventName == 'StalledEnd' and Yson::ConvertToInt64(
    Yson::YPath(Yson::ParseJson(data), "/data/stalledId")
) == 1;
end define;

$getHeartbeatsTimestampsInner = ($num, $ts) -> {
    RETURN ListMap(
        ListFromRange(1, $num + 1),
        ($x)->(AsTuple(30, $ts + $x * 30))
    )
};

$getHeartbeatsTimestamps = ($heartbeats, $timestamp) -> {
    $jst = Yson::ConvertToInt64Dict(Yson::YPath($heartbeats, "/js_tracer"));
    $redir = Yson::ConvertToInt64Dict(Yson::YPath($heartbeats, "/redir"));
    RETURN CASE
    WHEN $jst is not null and $jst["30SecHeartbeat"] is not null THEN $getHeartbeatsTimestampsInner(unwrap($jst["30SecHeartbeat"]), $timestamp)
    WHEN $jst is not null and $jst["20SecWatched"] is not null THEN AsList(AsTuple(20, $timestamp + 20))
    WHEN $jst is not null and $jst["10SecWatched"] is not null THEN AsList(AsTuple(10, $timestamp + 10))
    WHEN $redir is not null and $redir["redir_heartbeat"] is not null then $getHeartbeatsTimestampsInner(unwrap($redir["redir_heartbeat"]), $timestamp)
    ELSE NULL
    END
};

$sessionsProcessor = ($row) -> {
    $base = AsStruct(
        $row.vsid as vsid,
        $row.ip as ip,
        $row.yandexuid as yandexuid,
        $row.operator as operator,
        "sessions" as source_log,

    );
    $result = AsList(ExpandStruct(
        $base,
        "start" as event,
        $row.`timestamp` as `timestamp`
    ));
    $hbmap = IF(
        $row.heartbeats_timestamps is not null,
        ListMap(
            unwrap($row.heartbeats_timestamps),
            ($x)->(ExpandStruct(
                $base,
                "heartbeat" as event,
                $x.0 as duration,  -- да-да, в поле duration для чанка пишется одно, а для хартбита другое, экономлю колонку 🤡
                $x.1 as `timestamp`
            ))
        ),
        AsList()
    );
    $result = ListUnionAll($result, $hbmap);
    $result = ListUnionAll(
        $result,
        AsList(ExpandStruct(
            $base,
            "first_buffer" as event,
            $row.first_buffer_duration_gogol ?? $row.first_buffer_duration_sessions ?? 0.0 as buffer_duration,
            $row.first_buffer_timestamp_gogol ?? $row.first_buffer_timestamp_sessions ?? $row.`timestamp` as `timestamp`,
        ))
    );
    $stalleds = IF(
        $row.all_stalleds is not null,
        ListMap(
            unwrap($row.all_stalleds),
            ($x)->(
                ExpandStruct(
                    $base,
                    "buffer" as event,
                    $x.0 as buffer_duration,
                    $x.1 + $row.`timestamp` as `timestamp`
                )
            )
        ),
        AsList()
    );
    $result = ListUnionAll($result, $stalleds);
    $refuse = IF(
        $row.refuse,
        AsList(
            ExpandStruct(
                $base,
                "refuse" as event,
                $row.`timestamp` as `timestamp`
            )
        ),
        AsList()
    );
    $result = ListUnionAll($result, $refuse);
    RETURN $result
};

define subquery $get_mm_data($mm_source) as
$mm_map = (
    select
        "metrika" as source_log,
        DeviceID,
        ClientIP as ip,
        cast(CollectTimestamp ?? EventTimestamp ?? LocationTimestamp as UInt64) as `timestamp`,
        IF($radioOk(TableRow()), $getRadioData(TableRow()).CellID) as CellID,
        IF($radioOk(TableRow()), $getRadioData(TableRow()).Lac) as Lac,
        IF($radioOk(TableRow()), $getRadioData(TableRow()).SignalStrength) as SignalStrength,
        IF($radioOk(TableRow()), $getRadioData(TableRow()).CellType) as CellType,
        -- IF($radioOk(TableRow()), NetworkType) as NetworkType,
        -- IF($radioOk(TableRow()), OperatorID) as OperatorID,
        -- IF($radioOk(TableRow()), OperatorName) as OperatorName,
        -- IF($radioOk(TableRow()), ConnectionType) as ConnectionType,
        NetworkType,
        OperatorID,
        OperatorName,
        ConnectionType,
        IF($radioOk(TableRow()), Yson::ConvertToStringList(Yson::ParseJson(Wifi_Ssids))) as Wifi_Ssids,
        $getWifiSsid(TableRow()) as WifiSsid,
        IF($geoOk(TableRow()), CAST(LocationPrecision as Uint64)) as LocationPrecision,
        IF($geoOk(TableRow()), LocationSource) as LocationSource,
        IF($geoOk(TableRow()), CAST(Latitude as Double)) as Latitude,
        IF($geoOk(TableRow()), CAST(Longitude as Double)) as Longitude,
        IF($geoOk(TableRow()), CAST(LocationTimestamp as Uint64)) as location_timestamp,
    from $mm_source()
    where (
        $geoOk(TableRow())
        or $radioOk(TableRow())
    ) and cast(CollectTimestamp ?? EventTimestamp ?? LocationTimestamp as UInt64) is not null and DeviceID is not null
    and $getRadioData(TableRow()).CellType != "DEFAULT"
);

$get_cell_ids = Python::get_cell_ids(
    Callable<(Double?, Double?, Uint64?)->List<UInt64?>?>,
    FileContent("sphere_v2.py")
);

$unwrap = ($x) -> {
    RETURN IF(
        $x is null,
        null,
        unwrap($x)
    )
};

$mm_add_s2 = (
    select
        m.*,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[0])) as s2_p11,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[1])) as s2_p12,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[2])) as s2_p13,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[3])) as s2_p14,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[4])) as s2_p15,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[5])) as s2_p16,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[6])) as s2_p17,
    from $mm_map as m
);
select * from $mm_add_s2;
end define;

define subquery $get_mm_data_new($mm_source) as
$old_mm = (
    select
        "metrika" as source_log,
        DeviceID,
        ClientIP as ip,
        OperatorID,
        CAST(EventTimestamp as UInt64) as `timestamp`,
        NetworkType,
        ConnectionType
    from $mm_source()
    where
        NetworkType is not null
        and ConnectionType is not null
        and CAST(EventTimestamp as UInt64) is not null
);

$appmetrica = (
    select
        "metrika" as source_log,
        DeviceID,
        cast(CollectTimestamp ?? LocationTimestamp as UInt64) as `timestamp`,
        IF($radioOk(TableRow()), $getRadioData(TableRow()).CellID) as CellID,
        IF($radioOk(TableRow()), $getRadioData(TableRow()).Lac) as Lac,
        IF($radioOk(TableRow()), $getRadioData(TableRow()).SignalStrength) as SignalStrength,
        IF($radioOk(TableRow()), $getRadioData(TableRow()).CellType) as CellType,
        IF($radioOk(TableRow()), CAST($getRadioData(TableRow()).OperatorID as String)) as OperatorID,
        IF($radioOk(TableRow()), Yson::ConvertToStringList(Yson::ParseJson(Wifi_Ssids))) as Wifi_Ssids,
        $getWifiSsid(TableRow()) as WifiSsid,
        IF($geoOk(TableRow()), CAST(LocationPrecision as Uint64)) as LocationPrecision,
        IF($geoOk(TableRow()), LocationSource) as LocationSource,
        IF($geoOk(TableRow()), CAST(Latitude as Double)) as Latitude,
        IF($geoOk(TableRow()), CAST(Longitude as Double)) as Longitude,
        IF($geoOk(TableRow()), CAST(LocationTimestamp as Uint64)) as location_timestamp,
    from $mm_source()
    where (
        $geoOk(TableRow())
        or $radioOk(TableRow())
    ) and cast(CollectTimestamp ?? EventTimestamp ?? LocationTimestamp as UInt64) is not null
    and DeviceID is not null
    and $getRadioData(TableRow()).CellType != "DEFAULT"
);

$get_cell_ids = Python::get_cell_ids(
    Callable<(Double?, Double?, Uint64?)->List<UInt64?>?>,
    FileContent("sphere_v2.py")
);

$unwrap = ($x) -> {
    RETURN IF(
        $x is null,
        null,
        unwrap($x)
    )
};

$mm_add_s2 = (
    select
        m.*,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[0])) as s2_p11,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[1])) as s2_p12,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[2])) as s2_p13,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[3])) as s2_p14,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[4])) as s2_p15,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[5])) as s2_p16,
        IF(Latitude is not null and Longitude is not null, $unwrap($get_cell_ids(Latitude, Longitude, LocationPrecision)[6])) as s2_p17,
    from $appmetrica as m
);
select * from $mm_add_s2 union all select * from $old_mm;
end define;

export $parseIntList, $re_yandexuid, $getOperator, $getQuality, $isRussia,
    $geoOk, $parseCellsTypes, $radioOk, $getWifiSsid, $getRR,
    $get_true_first_buffer_durations, $sessionsExtractStalledInfo,
    $getHeartbeatsTimestamps, $sessionsProcessor, $getRadioData,
    $get_coord_pair, $get_mm_data, $get_mm_data_new;