package ru.yandex.url.processor;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Set;
import java.util.function.Function;

import ru.yandex.detect.locale.LocaleDetector;
import ru.yandex.function.GenericFunction;

public enum CommonZoneExtractor
    implements Function<String, String>,
        GenericFunction<String, String, RuntimeException>
{
    INSTANCE;

    private static final Set<String> KNOWN_TLDS = new HashSet<>();

    static {
        loadTldsFrom("tlds.txt");
    }

    private static void loadTldsFrom(final String resourceName) {
        try (BufferedReader reader =
                 new BufferedReader(
                     new InputStreamReader(
                         CommonZoneExtractor.class.getResourceAsStream(
                             resourceName),
                         StandardCharsets.UTF_8)))
        {
            reader.lines().forEach(KNOWN_TLDS::add);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public String apply(String domain) {
        if (domain == null) {
            return null;
        }
        domain = LocaleDetector.INSTANCE.toLowerCase(domain);
        int end = domain.length();
        if (domain.charAt(end - 1) == '.') {
            --end;
        }
        int cur, prev = end;
        String res = domain.substring(0, end);
        do  {
            cur = domain.lastIndexOf('.', prev - 1);
            if (prev != end && isTld(domain.substring(prev + 1, end))) {
                res = domain.substring(cur + 1, end);
            }
            prev = cur;
        } while (cur != -1);
        return res;
    }

    public static boolean isTld(String domainPart) {
        return KNOWN_TLDS.contains(domainPart);
    }
}
