package ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.transformer;

import org.apache.commons.lang3.StringEscapeUtils;
import org.htmlcleaner.*;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import ru.yandex.common.util.URLUtils;
import ru.yandex.semantic.jsonld.JSONLDEntity;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.location.EntityLocation;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.MicrodataUtils;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.data.ComplexMicrodata;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.data.Microdata;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.data.MicrodataProperty;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.data.TextMicrodata;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.exceptions.NoItemscopeMicrodataValidatorException;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.*;
import java.util.regex.Pattern;

import static ru.yandex.common.util.StringUtils.isEmpty;

/**
 * Created by IntelliJ IDEA.
 * User: rasifiel
 * Date: 7/15/11
 * Time: 1:51 PM
 * To change this template use File | Settings | File Templates.
 */
public class AllByteContext implements Context<Microdata> {


    final static Pattern SPLIT_SPACES = Pattern.compile("\\s+");
    private static final int MAX_DEPTH = 1000;

    private static String[] splitBySpace(final String s) {
        return SPLIT_SPACES.split(s);
    }

    private List<Microdata> result = null;
    private final byte[] document;
    private final DocumentProperties documentProperties;

    public AllByteContext(final byte[] document, final DocumentProperties documentProperties) {
        this.document = document;
        this.documentProperties = documentProperties;
    }

    @Override
    public TransformationStep getStep() {
        return TransformationStep.AFTER_SAX;
    }

    private final List<TagNode> mfNodes = new ArrayList<TagNode>();
    private final List<TagNode> jsonldNodes = new ArrayList<TagNode>();

    private void visit(final HtmlNode htmlNode, String baseUrl) {
        if (htmlNode instanceof TagNode) {
            final TagNode node = (TagNode) htmlNode;
            boolean found = false;
//            if ("script".equals(node.getName()) && "application/ld+json".equals(node.getAttributeByName("type"))) {
//                jsonldNodes.add(node);
//            }
            if ((node.hasAttribute("itemscope") || node.hasAttribute("itemtype")) &&
                    (!node.hasAttribute("itemprop") || node.getAttributeByName("itemprop").isEmpty())) {
                mfNodes.add(node);
                found = true;
            }
            if ("base".equalsIgnoreCase(node.getName()) && node.hasAttribute("href")) {
                baseUrl = node.getAttributeByName("href");
            }
            for (final TagNode child : node.getChildTags()) {
                visit(child, baseUrl);
            }
            if (found) {
                absolutizeTree(node, baseUrl);
            }
        }
    }

    private void absolutizeTree(final TagNode root, final String baseUrl) {
        if (!"a".equalsIgnoreCase(root.getName()) || !"include".equals(root.getAttributeByName("class"))) {
            absolutizeAtt(root, baseUrl, "src", "href");
        }
        for (final TagNode child : root.getChildTags()) {
            absolutizeTree(child, baseUrl);
        }
    }

    private void absolutizeAtt(final TagNode root, final String baseUrl, final String... atts) {
        if ("meta".equals(root.getName()) && root.hasAttribute("itemprop") &&
                "url".equals(root.getAttributeByName("itemprop"))) {
            final String oldUrl = root.getAttributeByName("content");
            if (oldUrl != null) {
                if (!oldUrl.startsWith("mailto:")) {
                    final String newUrl = absolutize(oldUrl, baseUrl);
                    if (!newUrl.equals(oldUrl)) {
                        root.addAttribute("content", newUrl);
                    }
                }
            }
        }
        for (final String att : atts) {
            if (root.hasAttribute(att)) {
                final String oldUrl = root.getAttributeByName(att);
                if (oldUrl != null) {
                    if (!oldUrl.startsWith("mailto:")) {
                        final String newUrl = absolutize(oldUrl, baseUrl);
                        if (!newUrl.equals(oldUrl)) {
                            root.addAttribute(att, newUrl);
                        }
                    }
                }
            }
        }
    }

    private String absolutize(final String oldUrl, final String baseUrl) {
        final String fixedOldUrl = URLUtils.cleanUrl(oldUrl);
        final String fixedBaseUrl = baseUrl;
        final URI baseURI;
        try {
            baseURI = new URI(fixedBaseUrl);
        } catch (URISyntaxException e) {
            return fixedOldUrl;
        }
        final URI newURI;
        try {
            newURI = baseURI.resolve(fixedOldUrl);
        } catch (IllegalArgumentException e) {
            return fixedOldUrl;
        }
        if (newURI != null) {
            return newURI.toString();
        } else {
            return fixedOldUrl;
        }
    }

    static class HTML5TagProvider extends DefaultTagProvider {

        private TagInfo metaTag;
        private TagInfo linkTag;

        public HTML5TagProvider() {
            super();
            metaTag = super.getTagInfo("meta");
            metaTag.setBelongsTo(BelongsTo.HEAD_AND_BODY);
            linkTag = super.getTagInfo("link");
            linkTag.setBelongsTo(BelongsTo.HEAD_AND_BODY);
        }

        @Override
        public TagInfo getTagInfo(final String tagName) {
            if ("meta".equals(tagName)) {
                return metaTag;
            }
            if ("link".equals(tagName)) {
                return linkTag;
            }
            return super.getTagInfo(tagName);
        }
    }

    public static HTML5TagProvider provider = new HTML5TagProvider();

    TagNode doc;
    Serializer serializer;
    Map<String, TagNode> id2node = new HashMap<String, TagNode>();

    @Override
    public List<Microdata> getInfo() {
        if (result == null) {
            final HtmlCleaner cleaner = new HtmlCleaner(provider);
            final CleanerProperties props = cleaner.getProperties();
            props.setOmitXmlDeclaration(true);
            props.setNamespacesAware(false);
            props.setPruneTags("style");
            serializer = new CompactHtmlSerializer(props);
            final TagNode root;

            byte[] newDocument;
            try {
                newDocument = findNiondexCommentNode(document);
            } catch (UnsupportedEncodingException ignored) {
                throw new RuntimeException(ignored);
            }
            try {
                if (documentProperties.getCharset() != null) {
                    root = cleaner.clean(new ByteArrayInputStream(newDocument), documentProperties.getCharset());
                } else {
                    root = cleaner.clean(new ByteArrayInputStream(newDocument));
                }
            } catch (IOException e) {
                //cannot be
                throw new RuntimeException(e);
            }
            TagNode[] idEls = root.getElementsHavingAttribute("id", true);
            for (TagNode node : idEls) {
                id2node.put(node.getAttributeByName("id"), node);
            }
            doc = root;
            visit(root, documentProperties.getBaseUrl());
            LinkedList<Microdata> result = new LinkedList<Microdata>();
            for (final TagNode node : mfNodes) {
                final ComplexMicrodata res = getMicrodataFromDOM(node, 0);
                if (res != null) {
                    if (documentProperties.isExpandCardTree()) {
                        result.addAll(getAllComplexMicrodataFromTree(res));
                    } else {
                        result.add(res);
                    }
                }
            }
            if (documentProperties.isVerifier() || documentProperties.isAPI()) {
                ComplexMicrodata md = new ComplexMicrodata("");
                md.location = EntityLocation.DEFAULT_LOCATION;
                md.flags |= Microdata.ORPHANS;
                traverseAndExtract(root, md, true, 0, root.getName().equals("noindex"));
                if (!md.isEmpty()) {
                    result.addFirst(md);
                }
            }
            this.result = Collections.unmodifiableList(result);
        }
        return result;
    }

    private byte[] findNiondexCommentNode(byte[] document) throws UnsupportedEncodingException {
        String doc;
        if (documentProperties.getCharset() != null) {
            doc = new String(document, documentProperties.getCharset());
        }
        else{
            doc = new String(document);
        }
        doc = doc.replace("<!--noindex-->", "<noindex>");
        doc = doc.replace("<!--/noindex-->", "</noindex>");
        if (documentProperties.getCharset() != null) {
            return doc.getBytes(documentProperties.getCharset());
        }
        return doc.getBytes();
    }

    private final HashSet<TagNode> visited = new HashSet<TagNode>();

    ComplexMicrodata getMicrodataFromDOM(final TagNode root, final int depth) {
        visited.add(root);
        String itemType = root.getAttributeByName("itemtype");

        /*

            CONNECT 626 crutch

         */
//
//        if(itemType != null && (itemType.startsWith("https://www.schema.org") ||
//                itemType.startsWith("https://"))){
//            itemType = StringUtils.replaceFirst(itemType,"https","http");
//        }
//        if (itemType != null &&
//                (itemType.startsWith("http://www.schema.org") ||
//                        itemType.startsWith("www.schema.org"))) {
//            itemType = StringUtils.replaceFirst(itemType, "www.", "");
//        }


        final ComplexMicrodata result = new ComplexMicrodata(itemType);
        result.location = new EntityLocation(root.getRow(),root.getCol());

        final String itemId = root.getAttributeByName("itemid");
        if (itemId != null && !itemId.isEmpty()) {
            result.addPropValue("itemId", new TextMicrodata(itemId, null, itemId, result.location));
        }
        final String itemRef = root.getAttributeByName("itemref");
        if (itemRef != null && !itemRef.isEmpty()) {
            final String[] parts = itemRef.split("\\s+");
            for (final String part : parts) {
                TagNode addNode = id2node.get(part);
                if (addNode != null) {
                    addNode.addAttribute("itemrefed", "true");
                    if (!visited.add(addNode)) {
                        continue;
                    }
                    traverseAndExtract(addNode, result, false, depth + 1, root.getName().equals("noindex"));
                    visited.remove(addNode);
                }
            }
        }
        for (final TagNode child : root.getChildTags()) {
            traverseAndExtract(child, result, false, depth + 1, root.getName().equals("noindex"));
        }
        if (!root.hasAttribute("itemscope") && (documentProperties.isVerifier() || documentProperties.isAPI())) {
            result.addException(new NoItemscopeMicrodataValidatorException(true, "", result));
        }
        visited.remove(root);
        return result;
    }

    private String serialize(final TagNode node) {
                try {
                    return serializer.getAsString(node);
                } catch (Exception e) {
                    throw new RuntimeException("HTML serialization error", e);
                }
//        return "";
    }

    private void traverseAndExtract(final TagNode node, final ComplexMicrodata result, final boolean onlyOrphans, final int depth, boolean noindex) {
        EntityLocation nodeLocation = new EntityLocation(node.getRow(), node.getCol());
        if (depth > MAX_DEPTH) {
            return;
        }
        if (onlyOrphans && "true".equals(node.getAttributeByName("itemrefed"))) {
            return;
        }
        if(!result.noindex && node.getName().equals("noindex")){
            result.noindex = true;
//            result.addException(new MicrodataValidatorException(false,"",result) {
//                @Override
//                public String getKey() {
//                    return "noindex_tag_microdata";
//                }
//            });
        }
        if (node.hasAttribute("itemprop") && !node.getAttributeByName("itemprop").isEmpty()) {
            final MicrodataProperty property;
            if (isEntityStarts(node)) {
                final ComplexMicrodata extMicrodata = getMicrodataFromDOM(node, depth + 1);
                if (extMicrodata.isEmpty()) {
                    property = new MicrodataProperty(prop(node),
                            new TextMicrodata(getTextContent(node), null, serialize(node),nodeLocation));
                } else {
                    property = new MicrodataProperty(prop(node), extMicrodata);
                }
            } else if ("meta".equals(node.getName())) {
                if (node.hasAttribute("content")) {
                    property = new MicrodataProperty(prop(node),
                            new TextMicrodata(StringEscapeUtils.unescapeHtml4(node.getAttributeByName("content")),
                                    null,
                                    serialize(node),nodeLocation));
                } else {
                    property = null;
                }
            } else if ("time".equals(node.getName())) {
                if (node.hasAttribute("datetime")) {

                    property = new MicrodataProperty(prop(node),
                            new TextMicrodata(node.getAttributeByName("datetime"), null, serialize(node),nodeLocation));
                } else {
                    property = null;
                }
            } else if ("data".equals(node.getName())) {
                if (node.hasAttribute("value")) {
                    property = new MicrodataProperty(prop(node),
                            new TextMicrodata(node.getAttributeByName("value"), null, serialize(node),nodeLocation));
                } else {
                    property = null;
                }
            } else if (node.hasAttribute("src")) {
                property = new MicrodataProperty(prop(node),
                        new TextMicrodata(node.hasAttribute("alt") ? node.getAttributeByName("alt") : "",
                                node.getAttributeByName("src"), serialize(node),nodeLocation));
            } else if (node.hasAttribute("href")) {
                property = new MicrodataProperty(prop(node),
                        new TextMicrodata(MicrodataUtils.stringifyNode(node), node.getAttributeByName("href"),
                                serialize(node),nodeLocation));
            } else if (node.hasAttribute("data")) {
                property = new MicrodataProperty(prop(node),
                        new TextMicrodata(MicrodataUtils.stringifyNode(node), node.getAttributeByName("data"),
                                serialize(node),nodeLocation));
            } else {
                if (node.hasAttribute("content")) {
                    property = new MicrodataProperty(prop(node),
                            new TextMicrodata(StringEscapeUtils.unescapeHtml4(node.getAttributeByName("content")),
                                    null,
                                    serialize(node), nodeLocation));}
                else {
                    property = new MicrodataProperty(prop(node),
                            new TextMicrodata(MicrodataUtils.stringifyNode(node), null, serialize(node),nodeLocation));
                }
            }
            if (property != null) {
                result.addPropValue(property);
            }
        }
        if (!node.hasAttribute("itemscope")  && !node.hasAttribute("itemtype")) {
            for (final TagNode child : node.getChildTags()) {
                traverseAndExtract(child, result, onlyOrphans, depth + 1, noindex);
            }
        }
    }

    private String prop(final TagNode node) {
        return node.getAttributeByName("itemprop");
    }

    private boolean isEntityStarts(TagNode node) {
        return node.hasAttribute("itemtype") || node.hasAttribute("itemscope");
    }

    public List<ComplexMicrodata> getAllComplexMicrodataFromTree(final ComplexMicrodata microdata) {
        final List<ComplexMicrodata> microdatas = new LinkedList<ComplexMicrodata>();
        microdatas.add(microdata);
        for (final String propName : microdata.getPropList()) {
            for (final Microdata data : microdata.getPropAsList(propName)) {
                if (data instanceof ComplexMicrodata) {
                    microdatas.addAll(getAllComplexMicrodataFromTree((ComplexMicrodata) data));
                }
            }
        }
        return microdatas;
    }

    private String getTextContent(final TagNode node) {
        return node.getText().toString();
    }

    private JSONLDEntity extractLDRoot(final JSONObject obj) {
        Object context;
        try {
            context = obj.get("@context");
        } catch (JSONException e1) {
            return null;
        }
        if (context instanceof String) {
            URI contextUri = null;
            try {
                contextUri = new URI((String) context);
                if (!contextUri.getHost().endsWith("/") && isEmpty(contextUri.getPath()) &&
                        contextUri.getFragment() == null) {
                    contextUri = new URI(context + "/");
                }
                JSONLDEntity result = extractLD(obj, contextUri, null);
                return result;
            } catch (URISyntaxException e) {
                return extractLD(obj, null, null);
            }
        } else if (context instanceof JSONObject) {
            JSONLDEntity contextObj = extractLD((JSONObject) context, null, null);
            JSONLDEntity result = extractLD(obj, null, contextObj);
            return result;
        }
        return null;
    }

    private JSONLDEntity extractLD(final JSONObject obj, final URI namespace, final JSONLDEntity contextObj) {
        String type;
        try {
            type = obj.getString("@type");
        } catch (JSONException e) {
            type = null;
        }
        String completeType;
        if (type == null) {
            completeType = null;
        } else {
            if (namespace != null) {
                URI res = namespace.resolve(type);
                completeType = res.toString();
            } else {
                completeType = type;
            }
        }
        Iterator it = obj.keys();
        try {
            JSONLDEntity md = new JSONLDEntity(completeType);
            if (contextObj != null) {
                md.addPropValue("@context", contextObj);
            }
            while (it.hasNext()) {
                String key = (String) it.next();
                if (!key.startsWith("@")) {
                    Object value = obj.get(key);
                    if (value instanceof JSONObject) {
                        JSONLDEntity field = extractLD((JSONObject) value, namespace, null);
                        if (field != null) {
                            md.addPropValue(key, field);
                        }
                    } else if (value instanceof JSONArray) {
                        for (int i = 0; i < ((JSONArray) value).length(); i++) {
                            Object val = ((JSONArray) value).get(i);
                            if (val instanceof JSONObject) {
                                JSONLDEntity field = extractLD((JSONObject) val, namespace, null);
                                if (field != null) {
                                    md.addPropValue(key, field);
                                }
                            }
                        }
                    } else {
                        md.addPropValue(key, new TextMicrodata(value.toString(), null, value.toString()));
                    }

                }
            }
            return md;
        } catch (JSONException e) {
            //cannot be
            throw new RuntimeException(e);
        }
    }


    @Override
    public Context<?> nextStep() {
        return new FinalContext(getInfo());
    }
}
