PRAGMA AnsiInForEmptyOrNullableItemsCollections;

$turbo_url_to_host = ($url) -> {
    $path_list = String::SplitToList(Url::GetPath($url), '/');
    RETURN IF(String::Contains($path_list[1], '.'), $path_list[1], $path_list[2]) ?? $url
};

$old_turbo_url_to_host = ($url) -> {
    RETURN
        CASE
           WHEN Url::GetCGIParam($url, 'text') IS NOT NULL AND String::Contains(Url::GetCGIParam($url, 'text'), '.')
           THEN Url::GetCGIParam($url, 'text')
           ELSE String::SplitToList(Url::GetPath($url), '/')[2] ?? $url
        END
};

$third_level_domain_exclusions = AsSet(
    'rsport.ria.ru',
    'travel.wildberries.ru',
    'travel.ozon.ru',
    'powerplace.mts.ru',
    'tchaikovsky.lenta.ru',
    'travel.drom.ru',
    'travel-vrn.aif.ru',
    'museum-rzd-spb.aif.ru',
    'travel.open.ru',
    'sevkavkaz.tass.ru',
    'vdnh80.tass.ru',
    'turizm.e1.ru',
    'tourism.interfax.ru',
    'turizm.ngs.ru',
    'travel.bspb.ru',
    'travel.kaspi.kz',
    'дт.мвд.рф',
    'avia.sakh.com',
    'booking.sakh.com',
    'travel.sakh.com',
    'kurort.rosminzdrav.ru',
    'dol.tatar.ru',
    'museum.tatar.ru',
    'charodeyjka.mediasole.ru',
    'yuzhny.minfin.ru',
    'travel.rsb.ru',
    'chemodan.penzainform.ru',
    'tourism.reso.ru',
    'turizm.ngs55.ru'
);

$second_level_domain_exclusions = AsSet(
    'com.az',
    'com.jo',
    'com.kg',
    'com.mv',
    'com.tm',
    'net.kg',
    'okis.ru',
    'org.cy',
    'org.qa',
    'tam.by',
    'tb.ru',
    'uds.app'
);

$norm_host = ($url) -> {
    $blacklist = AsSet('0.1', '0.0', '0.10', '0.100', '0.101', '0.110', '0.111', '0.200');
    $url = IF(Url::GetDomain($url, 2) = 'turbopages.org', $turbo_url_to_host($url), $url);
    $url = IF(
        Url::GetDomain($url, 2) like 'yandex.%' AND String::SplitToList(Url::GetPath($url), '/')[1] = 'turbo',
        $old_turbo_url_to_host($url), $url
        );

    $url_c = String::ReplaceAll(String::RemoveAll($url, ' '), '%20', '');
    $url_c = String::ReplaceAll($url_c, '%2F', '/');
    $url_c = String::ReplaceAll($url_c, '%3A', ':');
    -- $url_c = Url::Decode($url_c); -- fail on raw spylog on ForcePunycodeToHostName

    $host =
    CASE
        WHEN Url::GetDomain($url_c, 3) IN $third_level_domain_exclusions THEN Url::GetDomain($url_c, 3)
        WHEN Url::GetDomain($url_c, 2) IN $second_level_domain_exclusions AND Url::GetDomain($url_c, 3) IS NOT NULL THEN Url::GetDomain($url_c, 3)
        ELSE Url::CutWWW2(Url::GetOwner($url_c))
    END;

    $is_ya = (
        IF(Url::GetSignificantDomain($url_c) LIKE 'yandex.%' OR Url::GetSignificantDomain($url_c) LIKE '%.yandex',
            1, 0)
    );

    $is_g = if(Url::GetSignificantDomain($url_c) LIKE 'google.%',1,0);

    $yaservice = IF(
        Url::GetDomain($host,2) LIKE '%.yandex',
        String::SplitToList(Url::GetDomain($host, 2), '.')[0],
        IF(
            Url::GetDomain($host,3) IS NOT NULL,
            IF(
                String::SplitToList(Url::GetDomain($host,3), '.')[0] == 'yandex',
                String::SplitToList(Url::GetDomain($host,4), '.')[0],
                String::SplitToList(Url::GetDomain($host,3), '.')[0]
            ),
            if(
                Url::Parse($url).ParseError is not Null,
                String::SplitToList($url_c, '/')[1],
                String::SplitToList(Url::Parse($url_c).Path, '/')[1]
            )
        )
    );

    $gservice = IF(
            Url::GetDomain($host,3) IS NOT NULL,
            IF(
                String::SplitToList(Url::GetDomain($host,3), '.')[0] == 'google',
                String::SplitToList(Url::GetDomain($host,4), '.')[0],
                String::SplitToList(Url::GetDomain($host,3), '.')[0]
            ),
            if(
                Url::Parse($url).ParseError is not Null,
                String::SplitToList($url_c, '/')[1],
                String::SplitToList(Url::Parse($url_c).Path, '/')[1]
            )
        );

    $otherhost = substring(IF($host LIKE 'm.%',
        Substring($host,  2),
        Url::ForcePunycodeToHostName($host)
    ),0,1000);

    $yahost = IF(
            $is_ya == 1,
            if($yaservice is not Null and $yaservice != '', Substring($yaservice,0,1000) || '.yandex.ru', 'yandex.ru'),
            if($is_g == 1,
                if($gservice is not Null and $gservice != '', Substring($gservice,0,1000) || '.google.com', 'google.com'),
                $otherhost
            )
        );

    $finalhost = IF(
        $yahost in $blacklist
            or Url::GetDomain(Url::GetOwner($url), 2) IS NUll,
        NULL,
        if(
            ListLength(String::SplitToList($yahost, '.')) >1
            and String::SplitToList($yahost, '.')[1] != ''
            and $yahost!='.',
            Cast(String::AsciiToLower($yahost) as string),
            Null
        )
    );

    RETURN
        CASE $finalhost
                WHEN 'aliexpress.ru' THEN 'aliexpress.com'
                WHEN 'praktikum.yandex.ru' THEN 'practicum.yandex.ru'
                WHEN 'docdoc.ru' THEN 'sberhealth.ru'
                WHEN 'geekbrains.ru' THEN 'gb.ru'
                WHEN 'domru.ru' THEN 'dom.ru'
                WHEN 'goods.ru' THEN 'sbermegamarket.ru'
                WHEN 'tripadvisor.ru' THEN 'tripadvisor.com'
                ELSE $finalhost
        END
};

export $norm_host;
