PRAGMA yson.DisableStrict;
PRAGMA AnsiInForEmptyOrNullableItemsCollections;

-- === Helpfull UD(A)Fs ==================================================== --
$strip = ($str) -> {
    -- Clean up string
    RETURN String::Strip($str ?? '') ?? '';
};

$lower_strip = ($str) -> {
    -- Clean up string
    RETURN String::ToLower($strip($str)) ?? '';
};

$upper_strip = ($str) -> {
    -- Clean up string
    RETURN String::ToUpper($strip($str)) ?? '';
};

$uniq_array = ($a, $b) -> {
    -- Join lists and return uniq elements
    RETURN DictKeys(SetUnion(ToSet($a), ToSet($b))); -- might be better for long arrays
    -- RETURN ListUniq(ListExtend($a, $b));
};

$dict_join = ($d, $sep_items, $sep_pair) -> {
    -- Join dict items into string
    RETURN CASE
        WHEN $d IS Null THEN Null
        ELSE String::JoinFromList(
            ListMap(
                DictItems($d),
                ($pair) -> {
                    RETURN String::JoinFromList(
                        AsList(
                            CAST($pair.0 AS String), CAST($pair.1 AS String)
                        ),
                        $sep_pair
                    );
                }
            ),
            $sep_items
        )
    END;
};

$dict_dict_join = ($dd, $sep_items, $sep_pair, $sep_items_2, $sep_pair_2) -> {
    -- Join dict of dicts into string
    RETURN CASE
        WHEN $dd IS Null THEN Null
        ELSE String::JoinFromList(
            ListMap(
                DictItems($dd),
                ($pair) -> {
                    RETURN String::JoinFromList(
                        AsList(
                            CAST($pair.0 AS String),
                            $dict_join($pair.1, $sep_items_2, $sep_pair_2)
                        ),
                        $sep_pair
                    );
                }
            ),
            $sep_items
        )
    END;
};

$dict_from_string_int = ($d, $sep_items, $sep_pairs) -> {
    RETURN ToDict(ListMap(
        String::SplitToList($d, $sep_items),
        ($pair) -> {
            $pair_list = String::SplitToList($pair, $sep_pairs);
            RETURN AsTuple($pair_list[0], CAST($pair_list[1] AS Int64) ?? 0);
        }
    ));
};

$dict_dict_from_string_int = ($dd, $sep_items, $sep_pairs, $sep_items2, $sep_pairs2) -> {
    RETURN ToDict(ListMap(
        String::SplitToList($dd, $sep_items),
        ($d) -> {
            $pair_list = String::SplitToList($d, $sep_pairs);
            RETURN AsTuple(
                $pair_list[0], $dict_from_string_int($pair_list[1], $sep_items2, $sep_pairs2)
            );
        }
    ));
};

$key_with_max_value = ($dict_items) -> {
    -- MODE aggregation function equivalent
    -- sort by value desc and take first key, because ListMax doesn't support custom comparator
    return ListSortDesc(
        ListFilter(
            $dict_items,
            ($pair) -> {RETURN $pair.0 IS NOT NULL;}
        ),
        ($pair) -> {RETURN $pair.1;}
    )[0].0;
};

$get_main_geo = ($geo_fun, $regions) -> {
    -- map region with $geo_fun and sum corresponding hits
    $mapped_region_hits = ListMap(
        DictItems(
            ToMultiDict(ListMap(
                DictItems(Unwrap($regions)),
                ($pair) -> {
                    RETURN ($geo_fun(CAST($pair.0 AS Int32)), $pair.1);
                }
            ))
    ),
    ($pair) -> {
            RETURN ($pair.0, ListSum($pair.1));
    });
    RETURN IF(
        $regions IS NULL,
        NULL,
        $key_with_max_value($mapped_region_hits)
    );
};

$get_main_regions = ($region_ids) -> {
        $main_region_country = $get_main_geo(
            Geo::FindCountry,
            $region_ids
        );
        $main_region_obl = $get_main_geo(
            ($id) -> {
                $geo = Geo::RoundRegionById($id, 'region');
                RETURN IF($geo.type == 5, $geo.id);
            },
            $region_ids
        );
        $main_region_city = $get_main_geo(
            ($id) -> {
                $geo = Geo::RoundRegionById($id, 'city');
                RETURN IF($geo.type == 6, $geo.id);
            },
            $region_ids
        );
        $main_region = $get_main_geo(
            ($id) -> {
                $geo_region = Geo::RoundRegionById($id, 'region');
                $geo_city = Geo::RoundRegionById($id, 'city');
                RETURN CASE
                    WHEN $geo_region.type == 5 THEN $geo_region.id
                    WHEN $geo_city.type == 6 THEN $geo_city.id
                    ELSE Null
                END;
            },
            $region_ids
        );

        return AsStruct(
            $main_region_country as main_region_country,
            $main_region_obl as main_region_obl,
            $main_region_city as main_region_city,
            $main_region as main_region
        );
};
-- ========================================================================= --

-- === UDFs to parse app metrica =========================================== --

$clean_app_id = ($app_id) -> {
    -- Replace special chars in application id
    $key = String::ReplaceAll(
        String::ReplaceAll($strip($app_id), '-', '_'), ';', '_'
    );
    RETURN IF($key == '', Null, $key);
};

$get_device_type = ($device_type) -> {
    RETURN CASE $device_type
        WHEN 1 THEN "phone"
        WHEN 2 THEN "tablet"
        WHEN 3 THEN "phablet"
        WHEN 4 THEN "tv"
        WHEN 5 THEN "desktop"
        WHEN 6 THEN "car"
        WHEN 7 THEN "watch"
        ELSE "unknown"
    END;
};

$get_os = ($os) -> {
    RETURN CASE $os
        WHEN 1 THEN "android"
        WHEN 2 THEN "ios"
        WHEN 3 THEN "windows"
        WHEN 4 THEN "macos"
        WHEN 5 THEN "linux"
        ELSE "unknown"
    END;
};

$dev_features = ($ts, $app_id) -> {
    -- Return dev ml features {app_id: {ts: 1}}
    $app_key = $clean_app_id($app_id);
    $time_hist_interval = 5 * 60;  -- 5 min
    $time_range = 24 * 3600;  -- 1 day
    $features_key = (($ts % $time_range) / $time_hist_interval) ?? 0;
    RETURN CASE
        WHEN $app_key IS Null OR $app_key == '' THEN Null
        ELSE AsDict((
            $app_key, AsDict(($features_key, 1))
        ))
    END;
};

$get_connection_type = ($connection_metrica) -> {
    -- Mapping from Metrica connection type to Crypta
    RETURN CASE $connection_metrica
        WHEN 1 THEN 'wifi' -- 'CONN_WIFI'
        WHEN 0 THEN 'cell' -- 'CONN_CELL'
        ELSE Null -- $connection_metrica -- May be should use Null
    END
};

$clean_ip = ($ip) -> {
    -- Filter localhost ips, and clean fake ipv6 prefix
    $fff = '::ffff:';
    $ip = $lower_strip($ip);
    $ip = IF(String::StartsWith($ip, $fff), String::ReplaceAll($ip, $fff, ''), $ip);
    RETURN CASE
        WHEN Ip::FromString($ip) IS Null
            OR $ip IN ('127.0.0.1', '0.0.0.0', '255.255.255.255')
            THEN Null
        ELSE $ip
    END;
};

$parse_macs = ($interfaces_macs, $interfaces_names) -> {
    -- Filter wlan0 mac addrs without [0000]
    RETURN ListFlatMap(
        -- FlatMap skip Null values (so we not use Map(Filter()))
        ListZip(
            $interfaces_macs,
            $interfaces_names
        ),
        ($pair) -> {
            -- pair (mac, name)
            -- wlan0 is primary wi-fi interface
            $mac = CASE
                WHEN $pair.1 != 'wlan0' THEN Null
                ELSE $lower_strip(String::ReplaceAll($pair.0, ':', ''))
            END;
            RETURN IF(($mac ?? '') IN ('', '000000000000'), Null, $mac);
        }
    );
};

-- sys.maxint is equal to 9223372036854775807
$max_int = 9223372036854775807;

$get_subkey = ($ts) -> {
    RETURN CASE
        WHEN $ts IS Null OR $ts == 0 THEN Null
        ELSE CAST($max_int - $ts AS String)
    END;
};

-- === UDFs to parse FingerPrings log sources ============================== --

$fp_capture_login = Re2::Capture('login=(?P<g>[^;\\s]+)');

$force_null = ($val) -> {
    RETURN CASE
        WHEN $val IN (Null, '', '-', '0', '""') THEN Null
        ELSE $val
    END;
};

$_clean_yuid_fuid_match = Re2::Match('\\d{8,}');
$clean_yuid_fuid = ($val) -> {
    RETURN CASE
        WHEN $_clean_yuid_fuid_match($val) THEN $val
        ELSE Null
    END;
};

-- ========================================================================= --

$get_date = Re2::Capture(@@.*(?P<dt>\d{4}\-\d{2}\-\d{2}).*@@);
$seconds_from_table = ($path) -> {
    RETURN DateTime::ToSeconds(
        DateTime::MakeTimestamp(
            DateTime::ParseIso8601(
                $get_date($path).dt || 'T12:00:00.0+0300'
            )
        )
    )
};

-- ========================================================================= --

$ClientUserID = ($params) -> {
    RETURN IF(
        $params LIKE '%UserID%',
        JSON_VALUE(Yson::SerializeJson(Yson::ParseJson($params)), '$.__ymu.UserID'),
        Null
    );
};

$ym_uid_regexp = Re2::Capture(@@.*_ym_uid\=(?P<ym>\d+).*@@);

$get_duid = ($domainuserid, $headerargs) -> ( CAST($domainuserid AS UInt64) ?? CAST($ym_uid_regexp($headerargs).ym AS UInt64) ?? 0 );

$uniqid_was_changed = Hyperscan::Grep(@@(^|\:)cy\:2($|\:)@@); -- 1: got the same yandexuid as in cookie (headerags), 2: otherwise
-- more details https://wiki.yandex-team.ru/jandexmetrika/doc/watchlog/#opisaniebrowserinfo

$get_domain = ($url) -> ( Url::PunycodeToHostName(Url::GetSignificantDomain($url)) ?? Url::GetSignificantDomain($url) ?? "" );

$check_phone = ($client_user_id) -> ( Identifiers::IsValidPhone($client_user_id) AND $client_user_id REGEXP @@^[\+78]@@ );

-- ========================================================================= --

EXPORT
    $strip, $lower_strip, $upper_strip,

    $get_subkey,
    $max_int,
    $force_null,

    $dict_join,
    $dict_dict_join,
    $dict_from_string_int,
    $dict_dict_from_string_int,

    $clean_app_id,
    $get_device_type,
    $dev_features,
    $get_connection_type,
    $clean_ip,
    $fp_capture_login,
    $clean_yuid_fuid,

    $parse_macs,

    $get_main_geo,
    $get_main_regions,
    $get_os,

    $seconds_from_table,

    $ClientUserID,
    $ym_uid_regexp,
    $get_duid,
    $uniqid_was_changed,
    $get_domain,
    $check_phone
;
