package ru.yandex.wmconsole.service;

import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.cyberneko.html.parsers.SAXParser;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Text;
import org.springframework.beans.factory.annotation.Required;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import ru.yandex.common.util.concurrent.CommonThreadFactory;
import ru.yandex.wmconsole.data.info.SpellErrorInfo;
import ru.yandex.wmconsole.data.info.SpellResultInfo;
import ru.yandex.wmtools.common.error.InternalException;
import ru.yandex.wmtools.common.error.InternalProblem;
import ru.yandex.wmtools.common.service.IService;
import ru.yandex.wmtools.common.util.HttpConnector;
import ru.yandex.wmtools.common.util.HttpResponse;

public class SpellerService implements IService {
    private static final int MAX_TEXT_LENGTH = 10 * 1024;

    private URL spellerUrl;
    private int downloadThreads;

    private ExecutorService executor;

    public void init() {
        CommonThreadFactory threadFactory = new CommonThreadFactory(true, SpellerService.class.getSimpleName() + "-");
        executor = Executors.newFixedThreadPool(downloadThreads, threadFactory);
    }

    public SpellResultInfo checkSpelling(String text) throws InternalException {
        List<String> textChunks = splitTextToChunks(text);

        Collection<Callable<Collection<SpellErrorInfo>>> callables = new ArrayList<Callable<Collection<SpellErrorInfo>>>(textChunks.size());
        for (final String textChunk : textChunks) {
            callables.add(new Callable<Collection<SpellErrorInfo>>() {
                @Override
                public Collection<SpellErrorInfo> call() throws Exception {
                    HttpResponse httpResponse = new HttpConnector.RequestBuilder(spellerUrl)
                                            .method(HttpConnector.HttpMethod.POST)
                                            .header("Content-Type", "application/x-www-form-urlencoded")
                                            .entity("srv=wmconsole&text=" + URLEncoder.encode(textChunk, "UTF-8"), "UTF-8")
                                            .okStatusRequired(true).execute();

                    SpellResultHandler handler = new SpellResultHandler();
                    SAXParser saxParser = new SAXParser();
                    saxParser.setContentHandler(handler);
                    saxParser.parse(new InputSource(httpResponse.getContent()));
                    return handler.getErrors();
                }
            });
        }

        SpellResultInfo spellResult = new SpellResultInfo();

        try {
            List<Future<Collection<SpellErrorInfo>>> futures = executor.invokeAll(callables);
            for (Future<Collection<SpellErrorInfo>> future : futures) {
                spellResult.append(future.get());
            }
        } catch (InterruptedException e) {
            throw new InternalException(InternalProblem.YANDEX_SPELLER_ERROR, "Can not get answer from speller.yandex.net");
        } catch (ExecutionException e) {
            throw new InternalException(InternalProblem.YANDEX_SPELLER_ERROR, "Can not get answer from speller.yandex.net", e.getCause());
        }

        return spellResult;
    }

    public SpellResultInfo checkHTMLSpelling(Document doc) throws InternalException {
        StringBuilder sumStr = extractText(doc.getRootElement());
        if (sumStr.length() > 0) {
            return checkSpelling(sumStr.toString());
        }
        return SpellResultInfo.EMPTY_SPELL_RESULT;
    }

    private List<String> splitTextToChunks(String text) {
        if (text.length() > MAX_TEXT_LENGTH/2) {
            try {
                if (text.getBytes("UTF-8").length > MAX_TEXT_LENGTH) {
                    List<String> strings = new ArrayList<String>();
                    int i = 0;
                    int j = MAX_TEXT_LENGTH/2;
                    while (j < text.length()) {
                        while (j >= i && !Character.isWhitespace(text.charAt(j))) {
                            j--;
                        }
                        if (j > i) {
                            strings.add(text.substring(i, j).trim());
                        } else {
                            j = i + 1;
                        }
                        i = j;
                        if (i >= text.length() - 1) {
                            break;
                        }
                        j += MAX_TEXT_LENGTH/2;
                        j = j >= text.length() ? text.length() - 1 : j;
                    }
                    return strings;
                }
            } catch (UnsupportedEncodingException e) {
                throw new AssertionError(e);
            }
        }
        return Collections.singletonList(text);
    }

    static StringBuilder extractText(Element node) {
        StringBuilder builder = new StringBuilder();
        if (!("script".equalsIgnoreCase(node.getName()) || "style".equalsIgnoreCase(node.getName()))) {
            List childNodes = node.getContent();
            for (int i = 0; i < childNodes.size(); ++i) {
                Object childNode = childNodes.get(i);
                if (childNode instanceof Text) {
                    builder.append(((Text)childNode).getTextTrim());
                    if (builder.length() > 0 && !"span".equalsIgnoreCase(node.getName())) {
                        builder.append(" ");
                    }
                } else if (childNode instanceof Element) {
                    builder.append(extractText((Element)childNode));
                }
            }
        }
        return builder;
    }

    private static class SpellResultHandler extends DefaultHandler {
        private Collection<SpellErrorInfo> errors = new ArrayList<SpellErrorInfo>();
        private String currentError;
        private String currentCorrection;
        boolean error;
        boolean correction;

        public Collection<SpellErrorInfo> getErrors() {
            return errors;
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
            if ("error".equalsIgnoreCase(localName)) {
                currentError = null;
                currentCorrection = null;
            } else if ("word".equalsIgnoreCase(localName)) {
                error = true;
                correction = false;
            } else if ("s".equalsIgnoreCase(localName)) {
                error = false;
                correction = true;
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            String text = new String(ch, start, length);
            if (currentError == null && error) {
                currentError = text;
            }
            if (currentCorrection == null && correction) {
                currentCorrection = text;
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("error".equalsIgnoreCase(localName)) {
                if (currentError != null && currentCorrection != null) {
                    errors.add(new SpellErrorInfo(currentError, currentCorrection));
                    currentError = null;
                    currentCorrection = null;
                }
            }
            error = false;
            correction = false;
        }
    }

    @Required
    public void setSpellerUrl(URL spellerUrl) {
        this.spellerUrl = spellerUrl;
    }

    @Required
    public void setDownloadThreads(int downloadThreads) {
        this.downloadThreads = downloadThreads;
    }
}
