package ru.yandex.url.processor;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.IDN;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import ru.yandex.detect.locale.LocaleDetector;
import ru.yandex.net.uri.fast.FastUri;
import ru.yandex.net.uri.fast.FastUriParser;
import ru.yandex.util.string.StringUtils;

public class UrlAccumulator {
    private static final String IDN_PREFIX = "xn--";
    private static final String DEC_OCTET =
        "(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2(?:[0-4][0-9]|5[0-5]))";
    private static final String HEX_OCTET = "(?:0[xX][0-9a-fA-F]{1,2})";
    private static final String OCT_OCTET =
        "(?:0+(?:[1-7][0-7]?|[1-3][0-7][0-7]))";
    private static final String OCTET =
        "(?:" + DEC_OCTET + '|' + HEX_OCTET + '|' + OCT_OCTET + ')';
    private static final Pattern DEC_IPV4_PATTERN =
        Pattern.compile("(?:" + DEC_OCTET + "[.]){3}" + DEC_OCTET);
    private static final Pattern IPV4_PATTERN =
        Pattern.compile("(?:" + OCTET + "[.]){1,3}" + OCTET);
    private static final Pattern NUMERIC_HOST_PATTERN =
        Pattern.compile("(?:0[xX][0-9A-Fa-f]+|[0-9]+)$");
    private static final Map<String, Integer> DEFAULT_PORTS = new HashMap<>();
    private static final Set<String> KNOWN_TLDS = new HashSet<>();
    private static final Set<String> NUMERIC_HOST_SCHEMAS =
        new HashSet<>(Arrays.asList("http", "https", "ftp", "ssh"));
    private static final int INITIAL_CAPACITY = 64;
    private static final int PUNCT_MASK =
        (1 << Character.CONNECTOR_PUNCTUATION)
        | (1 << Character.DASH_PUNCTUATION)
        | (1 << Character.END_PUNCTUATION)
        | (1 << Character.FINAL_QUOTE_PUNCTUATION)
        | (1 << Character.INITIAL_QUOTE_PUNCTUATION)
        | (1 << Character.OTHER_PUNCTUATION)
        | (1 << Character.START_PUNCTUATION);

    static {
        DEFAULT_PORTS.put("http", 80);
        DEFAULT_PORTS.put("https", 443);
        DEFAULT_PORTS.put("ftp", 21);
        DEFAULT_PORTS.put("ssh", 22);

        try {
            loadTldsFrom("tlds-alpha-by-domain.txt");
            loadTldsFrom("pseudo-tlds.txt");
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static void loadTldsFrom(final String resourceName)
        throws IOException
    {
        try (BufferedReader reader =
                new BufferedReader(
                    new InputStreamReader(
                        UrlAccumulator.class.getResourceAsStream(resourceName),
                        StandardCharsets.UTF_8)))
        {
            String line;
            while ((line = reader.readLine()) != null) {
                if (line.isEmpty() || line.charAt(0) == '#') {
                    continue;
                }
                line = line.toLowerCase(Locale.ROOT);
                KNOWN_TLDS.add(line);
                if (line.startsWith(IDN_PREFIX)) {
                    String str = IDN.toUnicode(line);
                    Locale locale =
                        LocaleDetector.INSTANCE.process(str.toCharArray());
                    if (locale == null) {
                        locale = Locale.ROOT;
                    }
                    KNOWN_TLDS.add(str.toLowerCase(locale));
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private final ImmutableUrlProcessorConfig config;
    private char[] buf = new char[INITIAL_CAPACITY];
    private int len = 0;
    private boolean hasScheme;
    private boolean hasWwwPrefix;
    private boolean hasIdnPrefix;
    private boolean hasUserinfo;

    public UrlAccumulator(final ImmutableUrlProcessorConfig config) {
        this.config = config;
    }

    public void clear() {
        len = 0;
        hasScheme = false;
        hasWwwPrefix = false;
        hasIdnPrefix = false;
        hasUserinfo = false;
    }

    public void append(final char c) {
        if (len == buf.length) {
            buf = Arrays.copyOf(buf, len << 1);
        }
        buf[len++] = c;
    }

    public void scheme() {
        hasScheme = true;
    }

    public void wwwPrefix() {
        hasWwwPrefix = true;
    }

    public void idnPrefix() {
        hasIdnPrefix = true;
    }

    public void userinfo() {
        hasUserinfo = true;
    }

    public boolean canBeUrl() {
        return hasScheme || hasWwwPrefix || hasIdnPrefix || hasUserinfo;
    }

    public int length() {
        return len;
    }

    private void ensureCapacity(final int len) {
        if (len > buf.length) {
            buf = Arrays.copyOf(buf, Math.max(len, buf.length << 1));
        }
    }

    private int prepend(final String prefix) {
        int prefixLen = prefix.length();
        int totalLen = len + prefixLen;
        ensureCapacity(totalLen);
        System.arraycopy(buf, 0, buf, prefixLen, len);
        prefix.getChars(0, prefixLen, buf, 0);
        return totalLen;
    }

    private static boolean isPunct(final char c) {
        return ((PUNCT_MASK >> Character.getType(c)) & 1) != 0 && c != '&';
    }

    private static int parseOctet(final String str) {
        if (str.charAt(0) == '0') {
            if (str.length() > 1) {
                char c = str.charAt(1);
                if (c == 'x' || c == 'X') {
                    return Integer.parseInt(str.substring(2), 16);
                } else {
                    return Integer.parseInt(str.substring(1), 8);
                }
            } else {
                return 0;
            }
        } else {
            return Integer.parseInt(str);
        }
    }

    @SuppressWarnings("StringSplitter")
    private static String normalizeIPv4(final String addr) {
        String[] parts = addr.split("\\.");
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < parts.length - 1; ++i) {
            sb.append(parseOctet(parts[i]));
            sb.append('.');
        }
        for (int i = parts.length - 1; i < 3; ++i) {
            sb.append('0');
            sb.append('.');
        }
        sb.append(parseOctet(parts[parts.length - 1]));
        return new String(sb);
    }

    public FastUri build(final boolean strictHostValidation) {
        try {
            while (len > 0) {
                char c = buf[len - 1];
                if (!isPunct(c)) {
                    break;
                }
                if (c == '/') {
                    break;
                }
                if (c == ')') {
                    boolean found = false;
                    for (int i = len - 2; i >= 0 && !found; --i) {
                        found = buf[i] == '(';
                    }
                    if (found) {
                        break;
                    }
                }
                --len;
            }
            if (len == 0) {
                return null;
            }
            int len = this.len;
            if (!hasScheme) {
                if (hasUserinfo) {
                    len = prepend("mailto:");
                } else if (hasWwwPrefix
                    || hasIdnPrefix
                    || !config.requireSchemeForNonMailto())
                {
                    if (len >= 2 && buf[0] == '/' && buf[1] == '/') {
                        len = prepend(config.shortSchemePrefix());
                    } else {
                        len = prepend(config.fullSchemePrefix());
                    }
                } else {
                    return null;
                }
            }
            FastUri uri = new FastUriParser(buf, 0, len).parse();

            String scheme = uri.scheme().toLowerCase(Locale.ROOT);

            int port = uri.port();
            Integer defaultPort = DEFAULT_PORTS.get(scheme);
            if (defaultPort != null && port == defaultPort.intValue()) {
                port = -1;
            }

            String path = uri.path();
            String query = uri.query();
            String fragment = uri.fragment();
            if ("/".equals(path)
                && (query == null || query.isEmpty())
                && (fragment == null || fragment.isEmpty()))
            {
                path = "";
                query = null;
                fragment = null;
            }

            String host = StringUtils.nullifyEmpty(uri.host());
            if (host != null) {
                boolean goodHost;
                if (strictHostValidation) {
                    goodHost = IPV4_PATTERN.matcher(host).matches();
                    if (goodHost
                        && !DEC_IPV4_PATTERN.matcher(host).matches())
                    {
                        host = normalizeIPv4(host);
                    }
                } else {
                    goodHost = hasScheme
                        || hasIdnPrefix
                        || host.startsWith("www.");
                    if (!goodHost) {
                        goodHost = IPV4_PATTERN.matcher(host).matches();
                        if (goodHost
                            && !DEC_IPV4_PATTERN.matcher(host).matches())
                        {
                            host = normalizeIPv4(host);
                        }
                    }
                }
                if (!goodHost) {
                    String tld = host.substring(host.lastIndexOf('.') + 1);
                    goodHost = KNOWN_TLDS.contains(tld);
                }
                if (!goodHost) {
                    return null;
                }
            }

            if (hasScheme
                && NUMERIC_HOST_SCHEMAS.contains(scheme)
                && NUMERIC_HOST_PATTERN.matcher(host).matches())
            {
                try {
                    int value;
                    if (host.charAt(0) == '0') {
                        if (host.length() > 1) {
                            char c = host.charAt(1);
                            if (c == 'x' || c == 'X') {
                                value = Integer.parseUnsignedInt(
                                    host.substring(2),
                                    16);
                            } else {
                                value = Integer.parseUnsignedInt(
                                    host.substring(1),
                                    8);
                            }
                        } else {
                            value = 0;
                        }
                    } else {
                        value = Integer.parseUnsignedInt(host);
                    }
                    StringBuilder sb = new StringBuilder();
                    sb.append((value >>> 24) & 0xff);
                    sb.append('.');
                    sb.append((value >>> 16) & 0xff);
                    sb.append('.');
                    sb.append((value >>> 8) & 0xff);
                    sb.append('.');
                    sb.append(value & 0xff);
                    host = new String(sb);
                } catch (NumberFormatException e) {
                }
            }

            return new FastUri(
                scheme,
                StringUtils.nullifyEmpty(uri.userInfo()),
                host,
                port,
                path,
                query,
                fragment,
                uri.schemeSpecificPart(),
                uri.authority());
        } catch (RuntimeException | URISyntaxException e) {
            return null;
        }
    }
}

