package ru.yandex.webmaster3.core.semantic.semantic_document_parser.ogp.transformer;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.ogp.data.OGPData;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.ogp.exception.OGPException;

import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;

import static org.apache.commons.lang3.StringEscapeUtils.unescapeHtml4;

/**
 * Created by IntelliJ IDEA.
 * User: rasifiel
 * Date: 7/15/11
 * Time: 1:51 PM
 * To change this template use File | Settings | File Templates.
 */
class AllContext implements Context<OGPData> {


    final static Pattern SPLIT_SPACES = Pattern.compile("\\s+");

    private static String[] splitBySpace(final String s) {
        return SPLIT_SPACES.split(s);
    }

    private List<OGPData> result = null;
    private final String document;
    private final DocumentProperties documentProperties;
    private final List<OGPException> exceptions = new LinkedList<OGPException>();

    public AllContext(final List<String> documents, final DocumentProperties documentProperties) {
        final String doc = documents.get(0);
        final int opening = doc.indexOf('<');
        int headEnd = doc.indexOf("</head");
        if (headEnd != -1) {
            headEnd = doc.indexOf("<body");
        }
        if (headEnd == -1) {
            headEnd = doc.length();
        }
        if (opening > 0) {
            this.document = doc.substring(opening, headEnd);
        } else if (opening < 0) {
            this.document = "";
        } else {
            this.document = doc.substring(0, headEnd);
        }
        this.documentProperties = documentProperties;
    }

    @Override
    public TransformationStep getStep() {
        return TransformationStep.AFTER_SAX;
    }

    static final Pattern PROP_START = Pattern.compile("^(og|ya):.*");

    @Override
    public List<OGPData> getInfo() {
        if (result == null) {
            result = new LinkedList<OGPData>();
            final Document root = Jsoup.parse(document);
            final Element head = root.head();
            if (head != null) {
                final OGPData data = new OGPData();
                final Elements metas = head.getElementsByTag("meta");
                for (final Element meta : metas) {
                    if (meta.hasAttr("property") && meta.hasAttr("content")) {
                        final String prop = meta.attr("property");
                        final String content = unescapeHtml4(meta.attr("content"));
                        if (PROP_START.matcher(prop).matches()) {
                            data.addData(prop, content);
                        }
                    }
                }
                result.add(data);
            }
        }
        return result;
    }

    @Override
    public Context<?> nextStep() {
        return new FinalContext(getInfo(), exceptions);
    }
}
