package ru.yandex.wmconsole.service;

import java.io.IOException;
import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Text;
import org.jdom.filter.ContentFilter;
import org.jdom.filter.ElementFilter;
import org.jdom.input.SAXBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Required;

import ru.yandex.common.util.XmlUtils;
import ru.yandex.wmtools.common.error.InternalException;
import ru.yandex.wmtools.common.error.InternalProblem;
import ru.yandex.wmtools.common.service.IService;

public class HtmlHighlighterService implements IService {
    private static final Logger log = LoggerFactory.getLogger(HtmlHighlighterService.class);

    public static final String SPAN_STYLE = "FONT-SIZE: 100%; COLOR: #000000; BACKGROUND-COLOR: #ffff00";
    private static final String WORD_PATTERN = "[\\p{L}\\d_\\-]+";
    private static final String[] YANDEX_DOMAINS = {"yandex.ru", "yandex.net", "yandex.ua"};
    private static final String[] NO_HEADER_DOMAINS = {"hghltd.yandex.net"};

    private String style;
    private String header;
    private String yaheader;
    private String proxyPrefix;

    public Document highlightHtml(URL documentUrl, Document doc, Collection<String> highlight, String webmasterSpellchecker)
            throws InternalException
    {
        Element body = findBody(doc);
        URI documentUri = null;
        try {
            documentUri = documentUrl.toURI();
        } catch (URISyntaxException e) {
            log.warn("Unable to convert document URL to URI: {}", documentUrl);
        }

        convertLinksToProxy(doc, documentUri);
        htmlHighlight(body, highlight, webmasterSpellchecker);
        String docHost = documentUrl.getHost();
        boolean isYandexHost = false;
        for (String domain : YANDEX_DOMAINS) {
            if (docHost.endsWith(domain)) {
                isYandexHost = true;
            }
        }
        boolean doInsertHeaderFooter = true;
        for (String domain : NO_HEADER_DOMAINS) {
            if (docHost.endsWith(domain)) {
                doInsertHeaderFooter = false;
            }
        }
        if (doInsertHeaderFooter) {
            insertHeaderAndFooter(body, isYandexHost);
        }
        return doc;
    }

    private void convertLinksToProxy(Document doc, URI documentUri) {
        Iterator descendants = doc.getDescendants(new ElementFilter("script").or(new ElementFilter("link")));
        while (descendants.hasNext()) {
            Element el = (Element) descendants.next();
            String name = el.getName();
            if ("script".equals(name)) {
                String scriptSrc = el.getAttributeValue("src");
                if (!StringUtils.isEmpty(scriptSrc)) {
                    scriptSrc = convertLinkToProxyUrl(scriptSrc, documentUri);
                    el.setAttribute("src", scriptSrc);
                }
            } else if ("link".equals(name) && "stylesheet".equalsIgnoreCase(el.getAttributeValue("rel"))) {
                String linkHref = el.getAttributeValue("href");
                if (!StringUtils.isEmpty(linkHref)) {
                    linkHref = convertLinkToProxyUrl(linkHref, documentUri);
                    el.setAttribute("href", linkHref);
                }
            }
        }
    }

    private void insertHeaderAndFooter(Element body, boolean isYandexHost) throws InternalException {
        try {
            List content = body.getContent();

            if (header != null && yaheader != null) {
                Document headerDoc = new SAXBuilder().build(new StringReader(isYandexHost ? yaheader : header));
                Element header = headerDoc.detachRootElement();
                content.add(0, header);
            }

            if (style != null) {
                Document styleDoc = new SAXBuilder().build(new StringReader(style));
                Element style = styleDoc.detachRootElement();
                content.add(0, style);
            }
        } catch (IOException e) {
            throw new InternalException(InternalProblem.YANDEX_SPELLER_ERROR, "Can't insert html header", e);
        } catch (JDOMException e) {
            throw new InternalException(InternalProblem.YANDEX_SPELLER_ERROR, "Can't insert html header", e);
        }
    }

    private Element findBody(Document doc) {
        Element htmlNode = doc.getRootElement();
        if ("html".equalsIgnoreCase(htmlNode.getName())) {
            Element body = htmlNode.getChild("body");
            if (body != null) {
                return body;
            }
        }
        return htmlNode;
    }

    private int htmlHighlight(Object root, Collection<String> highlight, String spellerUrl) {
        int skip = 0;
        if (root instanceof Text) {
            Text node = (Text)root;
            String nodeValue = node.getText();
            Pattern pattern = Pattern.compile(WORD_PATTERN);
            Matcher matcher = pattern.matcher(nodeValue);
            while (matcher.find()) {
                String word = matcher.group();
                if (highlight.contains(word)) {
                    int pos = matcher.start();
                    Element spanNode = new Element("span");
                    spanNode.setAttribute("style", SPAN_STYLE);
                    spanNode.setText(word);

                    Text firstNode = (Text)node.clone();
                    firstNode.setText(nodeValue.substring(0, pos));

                    nodeValue = nodeValue.substring(pos + word.length());
                    node.setText(nodeValue);

                    insertBefore(node.getParentElement(), spanNode, node);
                    insertBefore(node.getParentElement(), firstNode, spanNode);
                    skip += 2;
                    matcher = pattern.matcher(nodeValue);
                }
            }
        } else if (root instanceof Element) {
            Element node = (Element)root;
            String nodeName = node.getName();
            if ("input".equalsIgnoreCase(nodeName)) {
                String value = node.getAttributeValue("value");
                if (value != null && value.length() > 10000) {
                    node.setAttribute("value", value.substring(0, 10000) + "...");
                }
            } else if (spellerUrl != null && "a".equalsIgnoreCase(nodeName)) {
                String url = node.getAttributeValue("href");
                if (url != null) {
                    node.setAttribute("target", "_parent");
                    node.setAttribute("href", spellerUrl + "?checkurl=" + url);
                }
            }
            if (!("script".equalsIgnoreCase(node.getName()) || "style".equalsIgnoreCase(node.getName()))) {
                List content = node.getContent();
                for (int i = 0; i < content.size(); ++i) {
                    i += htmlHighlight(content.get(i), highlight, spellerUrl);
                }
            }
        }
        return skip;
    }

    String convertLinkToProxyUrl(String link, URI documentUri) {
        try {
            URI u;
            if (documentUri != null) {
                u = documentUri.resolve(link).normalize();
            } else {
                u = new URI(link).normalize();
            }
            StringBuilder sb = new StringBuilder(proxyPrefix);

            String protocol = u.getScheme();
            if (protocol == null) {
                // If documentUri is null
                return link;
            }
            if ("https".equalsIgnoreCase(protocol)) {
                sb.append("https/");
            }
            String authority = u.getAuthority();
            if (authority == null) {
                // If documentUri is null
                return link;
            }
            sb.append(authority);
            if(u.getPath() != null) {
                sb.append(u.getPath());
            }
            if (u.getQuery() != null) {
                sb.append('?').append(u.getQuery());
            }
            return sb.toString();
        } catch (Exception e) {
            log.warn("Unable to parse resource url: {}", link);
            return link;
        }
    }

    private void insertBefore(Element el, Object newNode, Object anchorNode) {
        List content = el.getContent();
        Iterator iter = content.iterator();
        int i = 0;
        while (iter.hasNext()) {
            if (iter.next().equals(anchorNode)) {
                content.add(i, newNode);
                return;
            }
            i++;
        }
    }

    @Required
    public void setStyle(String style) {
        this.style = XmlUtils.unescapeXml(style);
    }

    @Required
    public void setHeader(String header) {
        this.header = XmlUtils.unescapeXml(header);
    }

    @Required
    public void setYaheader(String yaheader) {
        this.yaheader = XmlUtils.unescapeXml(yaheader);
    }

    @Required
    public void setProxyPrefix(String proxyPrefix) {
        this.proxyPrefix = proxyPrefix;
    }
}
