package ru.yandex.crypta.lab.formatters;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Set;
import java.util.regex.Pattern;

import com.google.common.net.InternetDomainName;

public class UrlFormatter implements RuleConditionFormatter {
    final private static String WWW = "www";
    final private static String REGEXP_PREFIX = "regexp:";

    final public static String HAS_QUERY_TAG = "url.hasQuery";
    final public static String HAS_FRAGMENT_TAG = "url.hasFragment";
    final public static String HAS_PATH_TAG = "url.hasPath";

    final private boolean addPath;

    public UrlFormatter(boolean addPath) {
        this.addPath = addPath;
    }

    public static final UrlFormatter withPath = new UrlFormatter(true);

    public static final UrlFormatter withoutPath = new UrlFormatter(false);

    private static final Set<String> PROTOCOLS = Set.of("http", "https");

    @Override
    public FormattedLine format(final String url) {
        try {
            if (url.startsWith(REGEXP_PREFIX)) {
                Pattern.compile(url.substring(REGEXP_PREFIX.length()));
                return FormattedLine.line(url);
            }

            if (url.contains(" ")) {
                throw new IllegalArgumentException(String.format("Url contains spaces: %s", url));
            }

            var tags = new ArrayList<String>();

            var urlWithSchema = url;
            if (!url.contains("://")) {
                urlWithSchema = "https://" + url;
            }

            var parsed = new URL(urlWithSchema);

            if (!PROTOCOLS.contains(parsed.getProtocol())) {
                throw new IllegalArgumentException(String.format("unsupported protocol: %s", parsed.getProtocol()));
            }
            if (parsed.getQuery() != null) {
                tags.add(HAS_QUERY_TAG);
            }
            if (parsed.getRef() != null) {
                tags.add(HAS_FRAGMENT_TAG);
            }

            var domain = InternetDomainName.from(parsed.getHost());
            if (!domain.isUnderRegistrySuffix()) {
                throw new IllegalArgumentException(String.format("Hostname is invalid: %s", parsed.getHost()));
            }
            if (domain.parts().get(0).equals(WWW)) {
                domain = domain.topPrivateDomain();
            }

            var line = domain.toString();
            if (addPath) {
                line += parsed.getPath();
            } else if (parsed.getPath() != null && !parsed.getPath().isEmpty()) {
                tags.add(HAS_PATH_TAG);
            }

            return FormattedLine.line(url, line.replaceAll("/+$", ""), tags);
        } catch (MalformedURLException | IllegalArgumentException e) {
            return FormattedLine.error(url, e.getMessage());
        }
    }
}
