package ru.yandex.detect.locale;

import java.util.Arrays;
import java.util.Locale;

import ru.yandex.function.CharArrayProcessor;

public enum LocaleDetector
    implements CharArrayProcessor<Locale, RuntimeException>
{
    INSTANCE;

    public static final String ENGLISH_ALPHABET =
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
    public static final String GERMAN_ALPHABET =
        "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz";
    public static final String FRENCH_ALPHABET =
        "AaÂâÀàBbCcÇçDdEeÉéÊêÈèËëFfGgHhIiÎîÏïJjKkLlMmNnOoÔôPpQqRrSsTtUuÛûÙùÜü"
        + "VvWwXxYyŸÿZz";
    public static final String POLISH_ALPHABET =
        "AĄBCĆDEĘFGHIJKLŁMNŃOÓPQRSŚTUVWXYZŹŻaąbcćdeęfghijklłmnńoópqrsśtuvwxyz"
        + "źż";
    public static final String SWEDISH_ALPHABET =
        "ABCDEFGHIJKLMNOPQRSTUVWXYZÅÄÖabcdefghijklmnopqrstuvwxyzåäö";
    public static final String NORWEGIAN_ALPHABET =
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÆæØøÅå";
    public static final String ITALIAN_ALPHABET =
        "ABCDEFGHILMNOPQRSTUVZabcdefghilmnopqrstuvzÀàÉéÈèÌìÍíÎîÒòÓóÙùÚú";
    public static final String LITHUANIAN_ALPHABET =
        "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž";
    public static final String TURKISH_ALPHABET =
        "AaBbCcÇçDdEeFfGgĞğHhIıİiJjKkLlMmNnOoÖöPpRrSsŞşTtUuÜüVvYyZz";
    public static final String KAZAKH_LATIN_ALPHABET =
        "AaÁáBbDdEeFfGgǴǵHhIiIıJjKkLlMmNnŃńOoÓóPpQqRrSsTtUuÚúVvYyÝýZz";
    public static final String AZERBAIJANI_LATIN_ALPHABET =
        "AaBbCcÇçDdEeƏəFfGgĞğHhXxIıİiJjKkQqLlMmNnOoÖöPpRrSsŞşTtUuÜüVvYyZz";
    // Deviation from Wikipedia:
    // Zamanälif-2 used "Әә" instead of "Ää", so some dictionaries have words
    // like:
    // https://classes.ru/all-tatar/dictionary-tatar-russian-k-term-2035.htm
    // Allow both these characters in latin alphabet
    public static final String TATAR_LATIN_ALPHABET =
        "AaÄäBbCcÇçDdEeFfGgĞğHhIıİiJjKkQqLlMmNnÑñOoÖöPpRrSsŞşTtUuÜüVvWwXxYyZz"
        + "Әә";

    public static final String RUSSIAN_ALPHABET =
        "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя";
    public static final String UKRAINIAN_ALPHABET =
        "АаБбВвГгҐґДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя";
    public static final String BELARUSIAN_ALPHABET =
        "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя";
    public static final String KAZAKH_CYRILLIC_ALPHABET =
        "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШш"
        + "ЩщЪъЫыІіЬьЭэЮюЯя";
    public static final String AZERBAIJANI_CYRILLIC_ALPHABET =
        "АаБбВвГгҒғДдЕеӘәЖжЗзИиЈјКкҜҝЛлМмНнОоӨөПпРрСсТтУуҮүФфХхҺһЧчҸҹШшЫы";
    public static final String TATAR_CYRILLIC_ALPHABET =
        "АаӘәБбВвГгДдЕеЁёЖжҖҗЗзИиЙйКкЛлМмНнҢңОоӨөПпРрСсТтУуҮүФфХхҺһЦцЧчШшЩщЪъ"
        + "ЫыЬьЭэЮюЯя";

    public static final String GREEK_ALPHABET =
        "ΑαΒβΓγΔδΕεΖζΗηΘθΙιΚκΛλΜμΝνΞξΟοΠπΡρΣσςϹϲΤτΥυΦφΧχΨψΩω"
        + "\u03A2ΪϊΫϋάέήίΰόύώ";

    @SuppressWarnings("ImmutableEnumChecker")
    private LocaleTables tables = new LocaleTables();

    LocaleDetector() {
        // Latin based alphabets
        tables.addLocale(Locale.ENGLISH, ENGLISH_ALPHABET);
        tables.addLocale(Locale.GERMAN, GERMAN_ALPHABET);
        tables.addLocale(Locale.FRENCH, FRENCH_ALPHABET);
        tables.addLocale(Locale.forLanguageTag("pl-PL"), POLISH_ALPHABET);
        tables.addLocale(Locale.forLanguageTag("sv-SE"), SWEDISH_ALPHABET);
        tables.addLocale(Locale.forLanguageTag("no-NO"), NORWEGIAN_ALPHABET);
        tables.addLocale(Locale.forLanguageTag("it-IT"), ITALIAN_ALPHABET);
        tables.addLocale(Locale.forLanguageTag("lt-LT"), LITHUANIAN_ALPHABET);
        tables.addLocale(Locale.forLanguageTag("tr-TR"), TURKISH_ALPHABET);
        tables.addLocale(
            Locale.forLanguageTag("kk-Latn-KZ"),
            KAZAKH_LATIN_ALPHABET);
        tables.addLocale(
            Locale.forLanguageTag("az-Latn-AZ"),
            AZERBAIJANI_LATIN_ALPHABET);
        tables.addLocale(
            Locale.forLanguageTag("tt-Latn-RU"),
            TATAR_LATIN_ALPHABET);

        // Cyrillic based alphabets
        tables.addLocale(Locale.forLanguageTag("ru-RU"), RUSSIAN_ALPHABET);
        tables.addLocale(Locale.forLanguageTag("uk-UA"), UKRAINIAN_ALPHABET);
        tables.addLocale(Locale.forLanguageTag("be-BY"), BELARUSIAN_ALPHABET);
        tables.addLocale(
            Locale.forLanguageTag("kk-Cyrl-KZ"),
            KAZAKH_CYRILLIC_ALPHABET);
        tables.addLocale(
            Locale.forLanguageTag("az-Cyrl-AZ"),
            AZERBAIJANI_CYRILLIC_ALPHABET);
        tables.addLocale(
            Locale.forLanguageTag("tt-Cyrl-RU"),
            TATAR_CYRILLIC_ALPHABET);

        // National alphabets
        tables.addLocale(Locale.forLanguageTag("el-GR"), GREEK_ALPHABET);
    }

    // Return null on locales mixture, Locale.ROOT on unknown language
    @Override
    public Locale process(final char[] buf, final int off, final int len) {
        return detectLocale(prepareLocaleChars(), buf, off, len);
    }

    public int[] prepareLocaleChars() {
        return new int[tables.localesCount()];
    }

    public Locale detectLocale(
        final int[] localeChars,
        final char[] buf,
        int off,
        int len)
    {
        int alphas = 0;
        int unmatchedAlphas = 0;
        for (int i = 0; i < len; ++i) {
            char c = buf[i + off];
            int codePoint = 0;
            if (c >= Character.MIN_HIGH_SURROGATE
                && c <= Character.MAX_HIGH_SURROGATE)
            {
                int nextPos = i + 1;
                if (nextPos < len) {
                    char next = buf[nextPos + off];
                    if (next >= Character.MIN_LOW_SURROGATE
                        && next <= Character.MAX_LOW_SURROGATE)
                    {
                        ++i;
                        codePoint = Character.toCodePoint(c, next);
                    }
                }
            } else {
                codePoint = c;
            }
            if (codePoint != 0 && Character.isAlphabetic(codePoint)) {
                ++alphas;
                int localeMask = tables.charToLocaleMask(c);
                if (localeMask == 0) {
                    ++unmatchedAlphas;
                } else {
                    int mask = 1;
                    int localeId = 0;
                    while (mask <= localeMask) {
                        if ((localeMask & mask) != 0) {
                            ++localeChars[localeId];
                        }
                        ++localeId;
                        mask <<= 1;
                    }
                }
            }
        }
        if (unmatchedAlphas == alphas) {
            // Unknown language, let it be
            return Locale.ROOT;
        }
        if (unmatchedAlphas > 0) {
            return null;
        }
        for (int i = 0; i < localeChars.length; ++i) {
            if (localeChars[i] == alphas) {
                return tables.locale(i);
            }
        }
        return null;
    }

    // Best effort heuristic lower case for domains and emails
    public String toLowerCase(final String str) {
        int len = str.length();
        boolean hasUpperCase = false;
        for (int i = 0; i < len; ++i) {
            char c = str.charAt(i);
            int codePoint = 0;
            if (c >= Character.MIN_HIGH_SURROGATE
                && c <= Character.MAX_HIGH_SURROGATE)
            {
                int nextPos = i + 1;
                if (nextPos < len) {
                    char next = str.charAt(nextPos);
                    if (next >= Character.MIN_LOW_SURROGATE
                        && next <= Character.MAX_LOW_SURROGATE)
                    {
                        ++i;
                        codePoint = Character.toCodePoint(c, next);
                    }
                }
            } else {
                codePoint = c;
            }
            if (codePoint != 0
                && codePoint != Character.toLowerCase(codePoint))
            {
                hasUpperCase = true;
                break;
            }
        }
        if (!hasUpperCase) {
            return str;
        }
        int[] localeChars = prepareLocaleChars();
        char[] chars = str.toCharArray();
        StringBuilder sb = new StringBuilder(len);
        int prev = 0;
        int pos = 0;
        while (pos < len) {
            char c = chars[pos];
            char next = 0;
            int codePoint = 0;
            if (c >= Character.MIN_HIGH_SURROGATE
                && c <= Character.MAX_HIGH_SURROGATE)
            {
                int nextPos = pos + 1;
                if (nextPos < len) {
                    char tryNext = chars[nextPos];
                    if (tryNext >= Character.MIN_LOW_SURROGATE
                        && tryNext <= Character.MAX_LOW_SURROGATE)
                    {
                        next = tryNext;
                        ++pos;
                        codePoint = Character.toCodePoint(c, next);
                    }
                }
            } else {
                codePoint = c;
            }
            if (Character.isAlphabetic(codePoint)
                || Character.isDigit(codePoint)
                || codePoint == '-'
                || codePoint == '_')
            {
                ++pos;
            } else if (prev < pos) {
                int substringLen = pos - prev;
                Arrays.fill(localeChars, 0);
                Locale locale =
                    detectLocale(localeChars, chars, prev, substringLen);
                if (locale == null) {
                    locale = Locale.ROOT;
                }
                sb.append(
                    new String(chars, prev, substringLen).toLowerCase(locale));
                sb.append(c);
                if (next != 0) {
                    sb.append(next);
                }
                prev = ++pos;
            } else {
                sb.append(c);
                if (next != 0) {
                    sb.append(next);
                }
                prev = ++pos;
            }
        }
        int substringLen = pos - prev;
        if (substringLen > 0) {
            Locale locale = process(chars, prev, substringLen);
            if (locale == null) {
                locale = Locale.ROOT;
            }
            String lowercase =
                new String(chars, prev, substringLen).toLowerCase(locale);
            if (sb.length() > 0) {
                sb.append(lowercase);
            } else {
                return lowercase;
            }
        }
        return new String(sb);
    }
}

