package ru.yandex.webmaster3.core.semantic.semantic_document_parser.microformats.transformer;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.w3c.dom.Document;
import ru.yandex.common.util.URLUtils;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microformats.spec.Microformat;

import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;

import static ru.yandex.common.util.StringUtils.isEmpty;

/**
 * Created by IntelliJ IDEA.
 * User: rasifiel
 * Date: 7/15/11
 * Time: 1:51 PM
 * To change this template use File | Settings | File Templates.
 */
class SAXJsoupContext implements Context<Document> {


    final static Pattern SPLIT_SPACES = Pattern.compile("\\s+");

    private static String[] splitBySpace(final String s) {
        return SPLIT_SPACES.split(s);
    }

    private List<Document> result = null;
    private final String document;
    private final DocumentProperties documentProperties;

    public SAXJsoupContext(final List<String> documents, final DocumentProperties documentProperties) {
        final String doc = documents.get(0);
        final int opening = doc.indexOf('<');
        if (opening > 0) {
            this.document = doc.substring(opening);
        } else if (opening < 0) {
            this.document = "";
        } else {
            this.document = doc;
        }
        this.documentProperties = documentProperties;
    }

    @Override
    public TransformationStep getStep() {
        return TransformationStep.AFTER_SAX;
    }

    private final LinkedList<Element> mfNodes = new LinkedList<Element>();

    private void visit(final Element node, String baseUrl) {
        if (node != null) {
            boolean found = false;
            final String classAtt = node.attr("class");
            if (!isEmpty(classAtt)) {
                final String[] classParts = splitBySpace(classAtt);
                for (final String singleClass : classParts) {
                    final Microformat mf = documentProperties.getManager().get(singleClass);
                    if (mf != null && mf.isRoot()) {
                        mfNodes.add(node);
                        found = true;
                        break;
                    }
                }
            }
            if ("base".equalsIgnoreCase(node.nodeName()) && node.hasAttr("href")) {
                baseUrl = node.attr("href");
            }
            if (!found) {
                for (final Element child : node.children()) {
                    visit(child, baseUrl);
                }
            } else {
                absolutizeTree(node, baseUrl);
            }
        }
    }

    private void absolutizeTree(final Element root, final String baseUrl) {
        if (!"a".equalsIgnoreCase(root.nodeName()) || !"include".equals(root.attr("class"))) {
            absolutizeAtt(root, baseUrl, "src", "href");
        }
        for (final Element child : root.children()) {
            absolutizeTree(child, baseUrl);
        }
    }

    private void absolutizeAtt(final Element root, final String baseUrl, final String... atts) {
        for (final String att : atts) {
            if (root.hasAttr(att)) {
                final String oldUrl = root.attr(att);
                if (!oldUrl.startsWith("mailto:")) {
                    final String newUrl = absolutize(oldUrl, baseUrl);
                    if (!newUrl.equals(oldUrl)) {
                        //Logger.getLogger(this.getClass()).info("URL resolve: was - "+oldUrl+" new - "+newUrl);
                        root.removeAttr(att);
                        root.attr(att, newUrl);
                    }
                }
            }
        }
    }

    private String absolutize(final String oldUrl, final String baseUrl) {
        final String fixedOldUrl = URLUtils.cleanUrl(URLUtils.fixUnderencodedURL(oldUrl, Charset.forName("ASCII")));
        final String fixedBaseUrl = URLUtils.fixUnderencodedURL(baseUrl, Charset.forName("ASCII"));
        final URI baseURI;
        try {
            baseURI = new URI(fixedBaseUrl);
        } catch (URISyntaxException e) {
            return fixedOldUrl;
        }
        final URI newURI;
        try {
            newURI = baseURI.resolve(fixedOldUrl);
        } catch (IllegalArgumentException e) {
            return fixedOldUrl;
        }
        if (newURI != null) {
            return newURI.toString();
        } else {
            return fixedOldUrl;
        }
    }

    @Override
    public List<Document> getInfo() {
        if (result == null) {
            org.jsoup.nodes.Document root = Jsoup.parse(document);
            visit(root, documentProperties.getBaseUrl());
            result = new LinkedList<Document>();

            for (final Element nd : mfNodes) {
                org.jsoup.nodes.Document doc = new org.jsoup.nodes.Document(documentProperties.getBaseUrl());
                doc.appendChild(nd);
                result.add(DOMBuilder.jsoup2DOM(doc));
            }
            result = Collections.unmodifiableList(result);
        }
        return result;
    }

    @Override
    public Context<?> nextStep() {
        return new LinkResolverContext(getInfo(), documentProperties);
    }
}
