package ru.yandex.wmconsole.servantlet;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;

import org.apache.commons.io.IOUtils;
import org.apache.http.entity.ContentType;
import org.jdom.Document;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.output.DOMOutputter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Required;

import ru.yandex.common.framework.core.ServRequest;
import ru.yandex.common.framework.core.ServResponse;
import ru.yandex.common.util.XmlUtils;
import ru.yandex.common.util.xml.OutputMethod;
import ru.yandex.webmaster.viewer.normalizer.HtmlNormalizerService;
import ru.yandex.wmconsole.data.info.SpellErrorInfo;
import ru.yandex.wmconsole.data.info.SpellResultInfo;
import ru.yandex.wmconsole.data.wrappers.InTagWrapper;
import ru.yandex.wmconsole.data.wrappers.SpellResultInfoWrapper;
import ru.yandex.wmconsole.data.wrappers.StringWrapper;
import ru.yandex.wmconsole.service.HtmlHighlighterService;
import ru.yandex.wmconsole.service.SpellerService;
import ru.yandex.wmconsole.service.error.WMCUserProblem;
import ru.yandex.wmtools.common.error.InternalException;
import ru.yandex.wmtools.common.error.UserException;
import ru.yandex.wmtools.common.servantlet.AbstractServantlet;
import ru.yandex.wmtools.common.sita.DocumentFormatEnum;
import ru.yandex.wmtools.common.sita.SitaException;
import ru.yandex.wmtools.common.sita.SitaRedirectService;
import ru.yandex.wmtools.common.sita.SitaRequestTimeout;
import ru.yandex.wmtools.common.sita.SitaUrlFetchRequest;
import ru.yandex.wmtools.common.sita.SitaUrlFetchRequestBuilder;
import ru.yandex.wmtools.common.sita.SitaUrlFetchResponse;
import ru.yandex.wmtools.common.sita.UserAgentEnum;
import ru.yandex.wmtools.common.util.http.YandexHttpStatus;

public class SpellCheckServantlet extends AbstractServantlet {
    private static final Logger log = LoggerFactory.getLogger(SpellCheckServantlet.class);

    private static final String PARAM_URL = "checkurl";
    private static final String CONTENT_TYPE = "text/html";

    private SpellerService spellerService;
    private HtmlHighlighterService htmlHighlighterService;
    private HtmlNormalizerService htmlNormalizerService;

    private SitaRedirectService sitaRedirectService;

    private String webmasterSpellchecker;

    @Override
    protected void doProcess(ServRequest req, ServResponse res) throws UserException, InternalException {
        checkService(spellerService, SpellerService.class);
        checkService(htmlHighlighterService, HtmlHighlighterService.class);
        checkService(htmlNormalizerService, HtmlHighlighterService.class);

        try {
            URL url = prepareUrl(getRequiredStringParam(req, PARAM_URL), true);
            InputStream inputStream = downloadContent(url);
            byte[] bytes = IOUtils.toByteArray(inputStream);
            log.debug("Document dowloaded");

            if (bytes.length > 0) {
                String html = htmlNormalizerService.normalizeHtml(bytes, url);
                log.debug("Document passed through normalizeHtml");

                SAXBuilder builder = new SAXBuilder();
                // Prevent DTD load
                builder.setFeature(
                        "http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
                Document doc = builder.build(new StringReader(cleanupHtml(html)));
                SpellResultInfo errorInfo = spellerService.checkHTMLSpelling(doc);
                log.debug("Document passed through checkHTMLSpelling");

                Collection<String> errors = new ArrayList<>();
                for (SpellErrorInfo info : errorInfo.getErrors()) {
                    errors.add(info.getError());
                }

                Document highlightedDoc = htmlHighlighterService.highlightHtml(url, doc, errors, webmasterSpellchecker);
                DOMOutputter out = new DOMOutputter();
                org.w3c.dom.Document highlightedDom = out.output(highlightedDoc);
                log.debug("Document passed through highlightHtml");
                String highlightedHtml = XmlUtils.xmlToString(highlightedDom, "UTF-8", OutputMethod.HTML);

                res.addData(new SpellResultInfoWrapper(errorInfo));
                res.addData(new InTagWrapper<StringWrapper>("html-text", new StringWrapper(highlightedHtml, "content")));//new HtmlWrapper(highlightedHtml));
            }
        } catch (IOException e) {
            log.debug("IOException in spellCheck " + e.getMessage());
            throw new UserException(WMCUserProblem.CANT_GET_HTML, "Error downloading html", e);
        } catch (JDOMException e) {
            log.debug(e.getMessage());
            throw new UserException(WMCUserProblem.CANT_GET_HTML, "Error parsing html", e);
        }
    }

    private InputStream downloadContent(URL url) throws InternalException, UserException {
        SitaUrlFetchRequest sitaUrlFetchRequest = new SitaUrlFetchRequestBuilder(url)
                .setUserAgent(UserAgentEnum.WEBMASTER)
                .setDocumentFormat(DocumentFormatEnum.DF_HTTP_RESPONSE)
                .setRequestTimeout(SitaRequestTimeout._15_SECONDS)
                .createSitaUrlFetchRequest();
        try {
            SitaUrlFetchResponse sitaUrlFetchResponse = sitaRedirectService.followRedirects(sitaUrlFetchRequest);
            if (sitaUrlFetchResponse.getSitaHttpStatus() != YandexHttpStatus.HTTP_200_OK) {
                log.error("Wrong http status: " + sitaUrlFetchResponse.getSitaHttpStatus());
                throw new UserException(WMCUserProblem.CANT_GET_HTML, "Wrong http status");
            }
            org.apache.http.HttpResponse document = sitaUrlFetchResponse.getDocument();
            ContentType contentType = ContentType.get(document.getEntity());
            if (!CONTENT_TYPE.equals(contentType.getMimeType())) {
                throw new UserException(WMCUserProblem.CANT_GET_HTML, "Invalid content type");
            }
            return sitaUrlFetchResponse.getDocumentContentStream();
        } catch (SitaException e) {
            throw new UserException(WMCUserProblem.CANT_GET_HTML, "Unable to download content", e);
        }
    }

    private String cleanupHtml(String html) {
        // Correct Yandex jumping out of the speller frame
        return html.replace("self.parent.location=document.location", ";");
    }

    @Required
    public void setSpellerService(SpellerService spellerService) {
        this.spellerService = spellerService;
    }

    @Required
    public void setHtmlHighlighterService(HtmlHighlighterService htmlHighlighterService) {
        this.htmlHighlighterService = htmlHighlighterService;
    }

    @Required
    public void setHtmlNormalizerService(HtmlNormalizerService htmlNormalizerService) {
        this.htmlNormalizerService = htmlNormalizerService;
    }

    @Required
    public void setSitaRedirectService(SitaRedirectService sitaRedirectService) {
        this.sitaRedirectService = sitaRedirectService;
    }

    @Required
    public void setWebmasterSpellchecker(String webmasterSpellchecker) {
        this.webmasterSpellchecker = webmasterSpellchecker;
    }
}
