package ru.yandex.search.disk.proxy.suggest;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.text.StringEscapeUtils;

import ru.yandex.io.StringBuilderWriter;
import ru.yandex.search.request.util.SearchRequestText;

public enum HtmlHighlighter implements Highlighter {
    INSTANCE;

    private static final Pattern WORDS =
        Pattern.compile("[\\p{Alnum}]+", Pattern.UNICODE_CHARACTER_CLASS);
    private static final String START =
        "<span class=\"disk-search-highlight\">";
    private static final String END = "</span>";

    @Override
    public String highlight(final String request, final String text) {
        SearchRequestText.SuggestCollector collector =
            new SearchRequestText.SuggestCollector();
        SearchRequestText.parse(
            SearchRequestText.normalizeSuggest(request),
            collector);
        List<String> last = collector.last();
        String prefix;
        if (SearchRequestText.incompleteRequest(request)
            && last == collector.words())
        {
            List<String> words = split(WORDS, last.remove(last.size() - 1));
            for (int i = 1; i < words.size(); i += 2) {
                last.add(words.get(i));
            }
            prefix = toLowerCase(last.remove(last.size() - 1));
        } else {
            prefix = null;
        }
        Set<String> words = new HashSet<>();
        for (String word: collector.words()) {
            List<String> subwords = split(WORDS, word);
            for (int i = 1; i < subwords.size(); i += 2) {
                words.add(toLowerCase(subwords.get(i)));
            }
        }
        // TODO: something better for phrases matching
        for (String phrase: collector.phrases()) {
            List<String> phraseWords = split(WORDS, phrase);
            for (int i = 1; i < phraseWords.size(); i += 2) {
                words.add(toLowerCase(phraseWords.get(i)));
            }
        }
        List<String> textWords = split(WORDS, text);
        StringBuilderWriter sbw = new StringBuilderWriter();
        try {
            for (int i = 0; i < textWords.size(); ++i) {
                String word = textWords.get(i);
                if ((i & 1) == 0) {
                    StringEscapeUtils.ESCAPE_HTML4.translate(word, sbw);
                } else {
                    String lowerCased = toLowerCase(word);
                    if (words.contains(lowerCased)) {
                        sbw.append(START);
                        StringEscapeUtils.ESCAPE_HTML4.translate(word, sbw);
                        sbw.append(END);
                    } else if (prefix == null
                        || !lowerCased.startsWith(prefix))
                    {
                        StringEscapeUtils.ESCAPE_HTML4.translate(word, sbw);
                    } else {
                        sbw.append(START);
                        StringEscapeUtils.ESCAPE_HTML4.translate(
                            word.substring(0, prefix.length()),
                            sbw);
                        sbw.append(END);
                        StringEscapeUtils.ESCAPE_HTML4.translate(
                            word.substring(prefix.length()),
                            sbw);
                    }
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return sbw.toString();
    }

    private static List<String> split(
        final Pattern pattern,
        final String text)
    {
        Matcher matcher = pattern.matcher(text);
        boolean result = matcher.find();
        if (result) {
            List<String> parts = new ArrayList<>();
            int end = 0;
            do {
                int start = matcher.start();
                parts.add(text.substring(end, start));
                end = matcher.end();
                parts.add(text.substring(start, end));
                result = matcher.find();
            } while (result);
            if (end < text.length()) {
                parts.add(text.substring(end));
            }
            return parts;
        } else {
            return Collections.singletonList(text);
        }
    }

    private static String toLowerCase(final String text) {
        int len = text.length();
        char[] buf = new char[len];
        for (int i = 0; i < len; ++i) {
            buf[i] =
                Character.toLowerCase(Character.toUpperCase(text.charAt(i)));
        }
        return new String(buf);
    }
}

