package ru.yandex.webmaster3.core.semantic.semantic_document_parser.microformats.transformer;

import org.htmlcleaner.*;
import org.w3c.dom.Document;
import ru.yandex.common.util.URLUtils;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microformats.spec.Microformat;

import javax.xml.parsers.ParserConfigurationException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;

/**
 * Created by IntelliJ IDEA.
 * User: rasifiel
 * Date: 7/15/11
 * Time: 1:51 PM
 * To change this template use File | Settings | File Templates.
 */
class SAXContext implements Context<Document> {


    final static Pattern SPLIT_SPACES = Pattern.compile("\\s+");

    private static String[] splitBySpace(final String s) {
        return SPLIT_SPACES.split(s);
    }

    private List<Document> result = null;
    private final String document;
    private final DocumentProperties documentProperties;

    public SAXContext(final List<String> documents, final DocumentProperties documentProperties) {
        final String doc = documents.get(0);
        final int opening = doc.indexOf('<');
        if (opening > 0) {
            this.document = doc.substring(opening);
        } else if (opening < 0) {
            this.document = "";
        } else {
            this.document = doc;
        }
        this.documentProperties = documentProperties;
    }

    @Override
    public TransformationStep getStep() {
        return TransformationStep.AFTER_SAX;
    }

    private final LinkedList<TagNode> mfNodes = new LinkedList<TagNode>();

    private void visit(final HtmlNode htmlNode, String baseUrl) {
        if (htmlNode instanceof TagNode) {
            final TagNode node = (TagNode) htmlNode;
            if (node != null) {
                boolean found = false;
                final String classAtt = node.getAttributeByName("class");
                if (classAtt != null) {
                    final String[] classParts = splitBySpace(classAtt);
                    for (final String singleClass : classParts) {
                        final Microformat mf = documentProperties.getManager().get(singleClass);
                        if (mf != null && mf.isRoot()) {
                            mfNodes.add(node);
                            found = true;
                            break;
                        }
                    }
                }
                if ("base".equalsIgnoreCase(node.getName()) && node.hasAttribute("href")) {
                    baseUrl = node.getAttributeByName("href");
                }
                if (!found) {
                    for (final TagNode child : node.getChildTags()) {
                        visit(child, baseUrl);
                    }
                } else {
                    absolutizeTree(node, baseUrl);
                }
            }
        }
    }

    private void absolutizeTree(final TagNode root, final String baseUrl) {
        if (!"a".equalsIgnoreCase(root.getName()) || !"include".equals(root.getAttributeByName("class"))) {
            absolutizeAtt(root, baseUrl, "src", "href");
        }
        for (final TagNode child : root.getChildTags()) {
            absolutizeTree(child, baseUrl);
        }
    }

    private void absolutizeAtt(final TagNode root, final String baseUrl, final String... atts) {
        for (final String att : atts) {
            if (root.hasAttribute(att)) {
                final String oldUrl = root.getAttributeByName(att);
                if (!oldUrl.startsWith("mailto:")) {
                    final String newUrl = absolutize(oldUrl, baseUrl);
                    if (!newUrl.equals(oldUrl)) {
                        //Logger.getLogger(this.getClass()).info("URL resolve: was - "+oldUrl+" new - "+newUrl);
                        root.removeAttribute(att);
                        root.addAttribute(att, newUrl);
                    }
                }
            }
        }
    }

    private String absolutize(final String oldUrl, final String baseUrl) {
        final String fixedOldUrl = URLUtils.cleanUrl(URLUtils.fixUnderencodedURL(oldUrl, Charset.forName("ASCII")));
        final String fixedBaseUrl = URLUtils.fixUnderencodedURL(baseUrl, Charset.forName("ASCII"));
        final URI baseURI;
        try {
            baseURI = new URI(fixedBaseUrl);
        } catch (URISyntaxException e) {
            return fixedOldUrl;
        }
        final URI newURI;
        try {
            newURI = baseURI.resolve(fixedOldUrl);
        } catch (IllegalArgumentException e) {
            return fixedOldUrl;
        }
        if (newURI != null) {
            return newURI.toString();
        } else {
            return fixedOldUrl;
        }
    }

    @Override
    public List<Document> getInfo() {
        if (result == null) {
            final HtmlCleaner cleaner = new HtmlCleaner();
            final CleanerProperties props = cleaner.getProperties();
            props.setOmitXmlDeclaration(true);
            props.setNamespacesAware(false);
            props.setPruneTags("script,style");
            props.setTransResCharsToNCR(true);
            props.setAdvancedXmlEscape(true);
            props.setTransSpecialEntitiesToNCR(true);
            props.setTransResCharsToNCR(true);
            TagNode root = null;
            root = cleaner.clean(document);
            visit(root, documentProperties.getBaseUrl());
            result = new LinkedList<Document>();
            final DomSerializer serializer = new DomSerializer(props);
            for (final TagNode nd : mfNodes) {
                try {
                    result.add(serializer.createDOM(nd));
                } catch (ParserConfigurationException e) {
                    // Ignore
                }
            }
            result = Collections.unmodifiableList(result);
        }
        return result;
    }

    @Override
    public Context<?> nextStep() {
        return new LinkResolverContext(getInfo(), documentProperties);
    }
}
