package ru.yandex.calendar.util.xml;

import java.net.IDN;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Pattern;

import org.jdom.Content;
import org.jdom.Element;
import org.jdom.Text;

import ru.yandex.bolts.collection.Cf;
import ru.yandex.bolts.collection.ListF;
import ru.yandex.bolts.collection.Option;
import ru.yandex.bolts.collection.Try;
import ru.yandex.misc.io.http.UrlUtils;
import ru.yandex.misc.regex.Matcher2;
import ru.yandex.misc.regex.Pattern2;

/**
 * @author dbrylev
 */
public class LinkReplacement {

    // http://data.iana.org/TLD/tlds-alpha-by-domain.txt version 2014092400 IDNA reduced
    private static final String ROOT_DOMAIN_NAMES_PATTERN_STR = "ac|academy|accountants|active|actor|ad|ae|aero|af|ag" +
            "|agency|ai|airforce|al|am|an|ao|aq|ar|archi|army|arpa|as|asia|associates|at|attorney|au|auction|audio" +
            "|autos|aw|ax|axa|az|ba|bar|bargains|bayern|bb|bd|be|beer|berlin|best|bf|bg|bh|bi|bid|bike|bio|biz|bj" +
            "|black|blackfriday|blue|bm|bmw|bn|bnpparibas|bo|boo|boutique|br|brussels|bs|bt|budapest|build|builders" +
            "|business|buzz|bv|bw|by|bz|bzh|ca|cab|cal|camera|camp|cancerresearch|capetown|capital|caravan|cards|care" +
            "|career|careers|casa|cash|cat|catering|cc|cd|center|ceo|cern|cf|cg|ch|channel|cheap|christmas|chrome" +
            "|church|ci|citic|city|ck|cl|claims|cleaning|click|clinic|clothing|club|cm|cn|co|codes|coffee|college" +
            "|cologne|com|community|company|computer|condos|construction|consulting|contractors|cooking|cool|coop" +
            "|country|cr|credit|creditcard|cruises|cu|cuisinella|cv|cw|cx|cy|cymru|cz|dad|dance|dating|day|de|deals" +
            "|degree|democrat|dental|dentist|desi|diamonds|diet|digital|direct|directory|discount|dj|dk|dm|dnp|do" +
            "|domains|durban|dz|eat|ec|edu|education|ee|eg|email|engineer|engineering|enterprises|equipment|er|es|esq" +
            "|estate|et|eu|eus|events|exchange|expert|exposed|fail|farm|feedback|fi|finance|financial|fish|fishing" +
            "|fitness|fj|fk|flights|florist|fly|fm|fo|foo|foundation|fr|frl|frogans|fund|furniture|futbol|ga|gal" +
            "|gallery|gb|gbiz|gd|ge|gent|gf|gg|gh|gi|gift|gifts|gives|gl|glass|gle|global|globo|gm|gmail|gmo|gmx|gn" +
            "|google|gop|gov|gp|gq|gr|graphics|gratis|green|gripe|gs|gt|gu|guide|guitars|guru|gw|gy|hamburg|haus" +
            "|healthcare|help|here|hiphop|hiv|hk|hm|hn|holdings|holiday|homes|horse|host|hosting|house|how|hr|ht|hu" +
            "|id|ie|il|im|immo|immobilien|in|industries|info|ing|ink|institute|insure|int|international|investments" +
            "|io|iq|ir|is|it|je|jetzt|jm|jo|jobs|joburg|jp|juegos|kaufen|ke|kg|kh|ki|kim|kitchen|kiwi|km|kn|koeln|kp" +
            "|kr|krd|kred|kw|ky|kz|la|lacaixa|land|lawyer|lb|lc|lease|lgbt|li|life|lighting|limited|limo|link|lk" +
            "|loans|london|lotto|lr|ls|lt|ltda|lu|luxe|luxury|lv|ly|ma|maison|management|mango|market|marketing|mc" +
            "|md|me|media|meet|melbourne|meme|menu|mg|mh|miami|mil|mini|mk|ml|mm|mn|mo|mobi|moda|moe|monash|mortgage" +
            "|moscow|motorcycles|mov|mp|mq|mr|ms|mt|mu|museum|mv|mw|mx|my|mz|na|nagoya|name|navy|nc|ne|net|network" +
            "|neustar|new|nexus|nf|ng|ngo|nhk|ni|ninja|nl|no|np|nr|nra|nrw|nu|nyc|nz|okinawa|om|ong|onl|ooo|org" +
            "|organic|otsuka|ovh|pa|paris|partners|parts|pe|pf|pg|ph|pharmacy|photo|photography|photos|physio|pics" +
            "|pictures|pink|pizza|pk|pl|place|plumbing|pm|pn|post|pr|praxi|press|pro|prod|productions|prof|properties" +
            "|property|ps|pt|pub|pw|py|qa|qpon|quebec|re|realtor|recipes|red|rehab|reise|reisen|ren|rentals|repair" +
            "|report|republican|rest|restaurant|reviews|rich|rio|ro|rocks|rodeo|rs|rsvp|ru|ruhr|rw|ryukyu|sa|saarland" +
            "|sarl|sb|sc|sca|scb|schmidt|schule|scot|sd|se|services|sexy|sg|sh|shiksha|shoes|si|singles|sj|sk|sl" +
            "|sm|sn|so|social|software|sohu|solar|solutions|soy|space|spiegel|sr|st|su|supplies|supply|support|surf" +
            "|surgery|suzuki|sv|sx|sy|systems|sz|tatar|tattoo|tax|tc|td|technology|tel|tf|tg|th|tienda|tips|tirol" +
            "|tj|tk|tl|tm|tn|to|today|tokyo|tools|top|town|toys|tp|tr|trade|training|travel|tt|tv|tw|tz|ua|ug|uk" +
            "|university|uno|uol|us|uy|uz|va|vacations|vc|ve|vegas|ventures|versicherung|vet|vg|vi|viajes|villas" +
            "|vision|vlaanderen|vn|vodka|vote|voting|voto|voyage|vu|wales|wang|watch|webcam|website|wed|wf|whoswho" +
            "|wien|wiki|williamhill|wme|work|works|world|ws|wtc|wtf|xxx|xyz|yachts|yandex|ye|yokohama|youtube|yt" +
            "|za|zip|zm|zone|zw|москва|қаз|онлайн|сайт|срб|орг|дети|мон|рф";

    private static final String HOST_WORD_PATTERN_STR = "[Ѐ-џA-Za-z\\d][-Ѐ-џA-Za-z\\d]*";

    private static final String URL_PATTERN_STR =
            "(?:(?:http|https|ftp)://" + HOST_WORD_PATTERN_STR + "(?:\\." + HOST_WORD_PATTERN_STR + ")+" +
            "|(?<![A-Za-z]://)(?:www|ftp)(?:\\." + HOST_WORD_PATTERN_STR + ")+" +
            "|(?<![A-Za-z]://)(?:" + HOST_WORD_PATTERN_STR + "\\.)+(?:" + ROOT_DOMAIN_NAMES_PATTERN_STR + ")" +
            ")(?::\\d+)?" +
            "(?:(?:/|\\?|#)[Ѐ-џ-\\w\\$\\.+!\\*\\(\\)\\[\\],;:@&=\\\\?/~#%]*[Ѐ-џA-Za-z=\\d](?:\\(\\)|/)?)?";


    private static final String EMAIL_LOGIN_PATTERN_STR = "[Ѐ-џA-Za-z\\d][-_\\.Ѐ-џA-Za-z\\d]*";

    private static final Pattern2 EMAIL_PATTERN = new Pattern2(Pattern.compile(
            EMAIL_LOGIN_PATTERN_STR + "@(?:" + HOST_WORD_PATTERN_STR + "\\.)*(?:" + HOST_WORD_PATTERN_STR + ")"));

    private static final Pattern2 URL_SURROUND_PATTERN = new Pattern2(Pattern.compile(
            "([^Ѐ-џA-Za-z\\d]|^)(" + URL_PATTERN_STR + ")([^Ѐ-џA-Za-z\\d]|$)"));

    public static ListF<Content> replace(String text) {
        ListF<Content> result = Cf.arrayList();

        for (Content content : replaceEmails(text)) {
            if (content instanceof Text) {
                result.addAll(replaceLinks(((Text) content).getText()));
            } else {
                result.add(content);
            }
        }
        return result;
    }

    public static String appendSchemaIfMissing(String link) {
        return link.matches("^[a-zA-Z]+://.*") ? link : "http://" + link;
    }

    private static ListF<Content> replaceLinks(String text) {
        ListF<Content> result = Cf.arrayList();
        Matcher2 m = URL_SURROUND_PATTERN.matcher2(text);

        int pos = 0;

        while (m.find(pos)) {
            String link = m.group(2).get();
            result.add(new Text(text.substring(pos, m.start() + m.group(1).get().length())));
            result.add(createAnchor(toAsciiUrlWithSchema(link), link));

            pos = m.end() - m.group(3).get().length();
        }
        if (pos < text.length()) {
            result.add(new Text(text.substring(pos)));
        }
        return result;
    }

    private static ListF<Content> replaceEmails(String text) {
        ListF<Content> result = Cf.arrayList();
        Matcher2 m = EMAIL_PATTERN.matcher2(text);

        int pos = 0;

        while (m.find(pos)) {
            String email = m.group(0).get();
            result.add(new Text(text.substring(pos, m.start())));
            result.add(createAnchor("mailto:" + email, email));

            pos = m.end();
        }
        if (pos < text.length()) {
            result.add(new Text(text.substring(pos)));
        }
        return result;
    }

    private static Element createAnchor(String href, String text) {
        Element anchor = new Element("a");
        anchor.setAttribute("href", href);
        anchor.setContent(new Text(text));

        return anchor;
    }

    private static String encodeUrlPart(String input) {
        StringBuilder resultStr = new StringBuilder();
        for (char ch : input.toCharArray()) {
            if (ch > 126 || ch < 32) {
                resultStr.append(UrlUtils.urlEncode(Character.toString(ch)));
            } else {
                resultStr.append(ch);
            }
        }
        return resultStr.toString();
    }

    public static String toAsciiUrlWithSchema(String link) {
        return toAsciiUrl(appendSchemaIfMissing(link));
    }

    public static String toAsciiUrl(String link) {
        try {
            URL url = new URL(link);

            String punycodedHost = Try.tryCatchException(() -> IDN.toASCII(url.getHost())).getOrElse(url.getHost());
            String encodedFile = encodeUrlPart(url.getFile());
            Option<String> encodedRef = Option.ofNullable(url.getRef());

            return new URL(url.getProtocol(), punycodedHost, url.getPort(),
                    encodedFile + (encodedRef.isPresent() ? "#" + encodedRef.get() : "")).toString();

        } catch (MalformedURLException e) {
            return link;
        }
    }
}
