package ru.yandex.webmaster3.core.semantic.semantic_document_parser.microformats.transformer;

import org.apache.html.dom.HTMLDocumentImpl;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.cyberneko.html.filters.DefaultFilter;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import ru.yandex.common.util.URLUtils;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microformats.spec.Microformat;

import java.io.IOException;
import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;

/**
 * Created by IntelliJ IDEA.
 * User: rasifiel
 * Date: 7/15/11
 * Time: 1:51 PM
 * To change this template use File | Settings | File Templates.
 */
class SAXNekoHtmlContext implements Context<Document> {


    final static Pattern SPLIT_SPACES = Pattern.compile("\\s+");

    private static String[] splitBySpace(final String s) {
        return SPLIT_SPACES.split(s);
    }

    private List<Document> result = null;
    private final String document;
    private final DocumentProperties documentProperties;

    public SAXNekoHtmlContext(final List<String> documents, final DocumentProperties documentProperties) {
        final String doc = documents.get(0);
        final int opening = doc.indexOf('<');
        if (opening > 0) {
            this.document = doc.substring(opening);
        } else if (opening < 0) {
            this.document = "";
        } else {
            this.document = doc;
        }
        this.documentProperties = documentProperties;
    }

    @Override
    public TransformationStep getStep() {
        return TransformationStep.AFTER_SAX;
    }

    private final LinkedList<Document> mfNodes = new LinkedList<Document>();

    private void visit(final Node htmlNode, String baseUrl) {
        if (htmlNode instanceof Element) {
            final Element node = (Element) htmlNode;
            boolean found = false;
            final String classAtt = node.getAttribute("class");
            if (classAtt != null) {
                final String[] classParts = splitBySpace(classAtt);
                for (final String singleClass : classParts) {
                    final Microformat mf = documentProperties.getManager().get(singleClass);
                    if (mf != null && mf.isRoot()) {
                        Document doc = new HTMLDocumentImpl();
                        Node newRoot = doc.adoptNode(node.getParentNode().cloneNode(true));
                        doc.appendChild(newRoot);
                        mfNodes.add(doc);
                        found = true;
                        break;
                    }
                }
            }
            if ("base".equalsIgnoreCase(node.getTagName()) && node.hasAttribute("href")) {
                baseUrl = node.getAttribute("href");
            }
            if (!found) {
                Node child = node.getFirstChild();
                while (child != null) {
                    visit(child, baseUrl);
                    child = child.getNextSibling();
                }
            } else {
                absolutizeTree(node, baseUrl);
            }
        }
    }

    private void absolutizeTree(final Element root, final String baseUrl) {
        if (!"a".equalsIgnoreCase(root.getTagName()) || !"include".equals(root.getAttribute("class"))) {
            absolutizeAtt(root, baseUrl, "src", "href");
        }
        Node child = root.getFirstChild();
        while (child != null) {
            if (child instanceof Element) {
                absolutizeTree((Element) child, baseUrl);
            }
            child = child.getNextSibling();
        }
    }

    private void absolutizeAtt(final Element root, final String baseUrl, final String... atts) {
        for (final String att : atts) {
            if (root.hasAttribute(att)) {
                final String oldUrl = root.getAttribute(att);
                if (!oldUrl.startsWith("mailto:")) {
                    final String newUrl = absolutize(oldUrl, baseUrl);
                    if (!newUrl.equals(oldUrl)) {
                        root.setAttribute(att, newUrl);
                    }
                }
            }
        }
    }

    private String absolutize(final String oldUrl, final String baseUrl) {
        final String fixedOldUrl = URLUtils.cleanUrl(URLUtils.fixUnderencodedURL(oldUrl, Charset.forName("ASCII")));
        final String fixedBaseUrl = URLUtils.fixUnderencodedURL(baseUrl, Charset.forName("ASCII"));
        final URI baseURI;
        try {
            baseURI = new URI(fixedBaseUrl);
        } catch (URISyntaxException e) {
            return fixedOldUrl;
        }
        final URI newURI;
        try {
            newURI = baseURI.resolve(fixedOldUrl);
        } catch (IllegalArgumentException e) {
            return fixedOldUrl;
        }
        if (newURI != null) {
            return newURI.toString();
        } else {
            return fixedOldUrl;
        }
    }

    private static XMLAttributes stripAttributes(XMLAttributes attrs) {
        XMLAttributes cleanAttrs = new XMLAttributesImpl();
        for (int i = 0; i < attrs.getLength(); ++i) {
            String name = attrs.getLocalName(i);
            cleanAttrs.addAttribute(new QName(null, name, name, null), attrs.getType(i), attrs.getValue(i));
        }
        return cleanAttrs;
    }


    @Override
    public List<Document> getInfo() {
        if (result == null) {
            Document root = null;
            DOMParser parser = new DOMParser();
            try {
                parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
                parser.setProperty("http://cyberneko.org/html/properties/filters",
                        new XMLDocumentFilter[]{new DefaultFilter() {
                            public void startElement(QName element, XMLAttributes attrs, Augmentations augs) throws XNIException {
                                element.uri = null;
                                element.prefix = null;
                                super.startElement(element, stripAttributes(attrs), augs);
                            }

                            public void emptyElement(QName element, XMLAttributes attributes, Augmentations augs) throws XNIException {
                                element.uri = null;
                                element.prefix = null;
                                super.emptyElement(element, stripAttributes(attributes), augs);
                            }

                            public void endElement(QName element, Augmentations augs) throws XNIException {
                                element.uri = null;
                                element.prefix = null;
                                super.endElement(element, augs);
                            }
                        }});
            } catch (SAXNotRecognizedException e) {
                e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
            } catch (SAXNotSupportedException e) {
                e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
            }

            try {
                parser.parse(new InputSource(new StringReader(document)));
                root = parser.getDocument();
            } catch (SAXException e) {
                return Collections.emptyList();
            } catch (IOException e) {
            }
            visit(root.getFirstChild(), documentProperties.getBaseUrl());
            result = new LinkedList<Document>();
            for (final Document nd : mfNodes) {
                result.add(nd);
            }
            result = Collections.unmodifiableList(result);
        }
        return result;
    }

    @Override
    public Context<?> nextStep() {
        return new LinkResolverContext(getInfo(), documentProperties);
    }
}
