package ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.transformers;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.regex.Pattern;

import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.TreeNode;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.NullNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.ValueNode;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceRequiredException;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import lombok.extern.slf4j.Slf4j;
import org.cyberneko.html.parsers.SAXParser;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.CompactHtmlSerializer;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.Serializer;
import org.htmlcleaner.TagNode;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Element;
import org.semarglproject.jena.core.sink.AbstractJenaSink;
import org.semarglproject.jena.core.sink.JenaSink;
import org.semarglproject.rdf.ParseException;
import org.semarglproject.rdf.rdfa.RdfaParser;
import org.semarglproject.ri.RIUtils;
import org.semarglproject.source.StreamProcessor;
import org.semarglproject.vocab.RDF;
import org.semarglproject.vocab.RDFa;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;

import ru.yandex.common.util.StringEscapeUtils;
import ru.yandex.common.util.Su;
import ru.yandex.common.util.URLUtils;
import ru.yandex.common.util.collections.MultiMap;
import ru.yandex.common.util.collections.Pair;
import ru.yandex.common.util.functional.Filter;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.location.EntityLocation;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.MicrodataUtils;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.transformer.AllByteContext;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.data.JSONLDEntity;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.data.RDFaComplexProperty;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.data.RDFaEntity;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.data.RDFaProperty;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.data.RDFaValueProperty;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.exceptions.JsonCollidingKeywordsRDFaException;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.exceptions.JsonParsingRDFaException;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.exceptions.RDFaException;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.exceptions.UnknownPrefixRDFaException;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.exceptions.UnknownVocabException;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.exceptions.XmlnsPrefixRDFaException;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.rdfa.jsonld.RDFaEntityJsonLdDeserialilizer;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.serialize.util.APIVersion;

import static ru.yandex.common.util.StringUtils.isEmpty;
import static ru.yandex.common.util.collections.CollectionFactory.pair;

/**
 * Created by IntelliJ IDEA.
 * User: rasifiel
 * Date: 14.08.12
 * Time: 13:22
 */
@Slf4j
public class ExperimentalExtractor {

    public static final String KNOWN_PREFIX = "(vk:.*|al:.*|https?://.*\\..*)";

    private static final int MAX_PROPERTY_COUNT = 1000;
    private Set<String> xmlnsErrPrefixes = new HashSet<>();

    private static Pattern RDFA_COPY_PATTERN = Pattern.compile("https?://www.w3.org/ns/rdfa#copy");
    private static Pattern PROTOCOL_PREFIX_PATTERN = Pattern.compile("https?://(www.)?.*");
    private final static Filter<RDFaProperty>  NOT_COPY_FIELD = new Filter<RDFaProperty>() {
        @Override
        public boolean fits(final RDFaProperty entity) {
            return !RDFA_COPY_PATTERN.matcher(entity.propId).matches();
        }
    };
    MultiMap<String,String> locationMap;

    HashMap<String,RDFaEntity> ogEntities = new HashMap<>();

    private ExperimentalExtractor(final String content, final String baseUrl, APIVersion version) {
        locationMap = new MultiMap<>();
        InputStream in = null;
        try {
            in = new ByteArrayInputStream(content.getBytes("UTF-8"));
        } catch (UnsupportedEncodingException e) {
            log.error("{}", e.getMessage(), e);
        }

        //EXTRACT OG

        extractOG(baseUrl, in, version);


        //EXTRACT RDFA
        in = null;
        try {
            in = new ByteArrayInputStream(content.getBytes("UTF-8"));
        } catch (UnsupportedEncodingException e) {
            log.error("{}", e.getMessage(), e);
        }

        Serializer serializer;
        final HtmlCleaner cleaner = new HtmlCleaner(AllByteContext.provider);
        final CleanerProperties props = cleaner.getProperties();
        props.setOmitXmlDeclaration(true);
        props.setNamespacesAware(false);
        props.setPruneTags("style");
        serializer = new CompactHtmlSerializer(props);
        final TagNode root;
        String newDocument = content;
        try {
            newDocument = findNiondexCommentNode(newDocument);
        } catch (UnsupportedEncodingException ignored) {
            throw new RuntimeException(ignored);
        }
        if(isPureJSON(content)){
            newDocument = addScriptTags(newDocument);
        }
        try {
            root = cleaner.clean(new ByteArrayInputStream(newDocument.getBytes()));
        } catch (IOException e) {
            log.error("Cannot parse",e);
            throw new RuntimeException(e);
        }
//        Document tempDoc;
//
//        if (isPureJSON(content)) {
//            tempDoc = Jsoup.parse(addScriptTags(content), baseUrl);
//        } else {
//            tempDoc = Jsoup.parse(content, baseUrl);
//        }

        final TagNode doc = root;
        final RDFaEntity rootEntity = getById("", "", EntityLocation.DEFAULT_LOCATION);
        rootEntity.isRoot = true;
        rootEntity.setIdFromUrl(true);
        String vocab = null;
        if (hasVocab(doc)) {
            vocab = extractVocab(doc);
        }
//        prefixStack.push(DefaultPrefixProvider.instance.getPrefixList());
        visit(doc, rootEntity, vocab, null);
        semarglRDFaParser(in, baseUrl, rootEntity);
        for (final String prefix : unresolvedPrefixes) {
            RDFaException e = new UnknownPrefixRDFaException(true, rootEntity, prefix);
//            System.out.println(e.hashCode());
            exceptions.add(e);
        }

        RDFaEntity xmlnsErrorEntityHolder = rootEntity;
        if (rootEntity.isEmpty()) {
            if (entityList.size() > 1) {
                xmlnsErrorEntityHolder = entityList.get(1);
            }
        }
        Map<String,RDFaEntity> entityMap = new HashMap<>();
        for (RDFaEntity e : entityList) {
            if (e.id!=null) {
                entityMap.put(e.id,e);
            }
        }
        for (RDFaEntity e : entityList) {
            for (final RDFaProperty prop : e.getProperty(MicrodataUtils.protocolAliases("http://www.w3.org/ns/rdfa#copy"))) {
                if (prop instanceof RDFaValueProperty) {
                    if (entityMap.containsKey(((RDFaValueProperty) prop).getValue())) {
                        e.appendProperties(entityMap.get(((RDFaValueProperty) prop).getValue()).getValuePairs());
                        if (e.getPropertyList().size()>MAX_PROPERTY_COUNT)
                            throw new RuntimeException("Too large entity");
                    }
                }
            }
            e.filterEntity(NOT_COPY_FIELD);
        }

        for (final String xmlnsPrefix : xmlnsErrPrefixes) {
            exceptions.add(new XmlnsPrefixRDFaException(false, xmlnsErrorEntityHolder, xmlnsPrefix));
        }
    }

    private void extractOG(String baseUrl, InputStream in, APIVersion version) {
        boolean oldVersions = version.equals(APIVersion.VERSION_0_1) || version.equals(APIVersion.VERSION_1_0);
        RootEntitiesBuilder rootEntitiesBuilder = new RootEntitiesBuilder(oldVersions);
        try {
            OpenGraph og = new OpenGraph(in, baseUrl, true);

            Map<String, Boolean> isStructuredProperty = new HashMap<>();
            MultiMap<String, Pair<RDFaEntity, RDFaValueProperty>> properties = new MultiMap<>();

            for (OpenGraph.Attribute attribute : og.attributes) {
                String name = attribute.element.getNamespace().getSchemaURI() + attribute.name;
                String value = attribute.element.getContent();
                String nameGroup = attribute.element.getNamespace().getNamespaceGroup();

                if (!attribute.name.contains(":")) {

                    RDFaEntity subentity = new RDFaEntity("", "");
                    properties.append(name,
                            new Pair<>(subentity, new RDFaValueProperty("@value", value, null, value, attribute.location)));
                    if (!isStructuredProperty.containsKey(name)) {
                        isStructuredProperty.put(name, false);
                    }

                    rootEntitiesBuilder.appendProperty(nameGroup, new RDFaComplexProperty(name, subentity, attribute.location));

                    ogEntities.put(name, subentity);
                } else {

                    boolean found = false;

                    if (ogEntities.containsKey(name)) {

                        RDFaEntity subentity = new RDFaEntity("", "");
                        properties.append(name,
                                new Pair<>(subentity, new RDFaValueProperty("@value", value, null, value, attribute.location)));
                        if (!isStructuredProperty.containsKey(name)) {
                            isStructuredProperty.put(name, false);
                        }

                        rootEntitiesBuilder.appendProperty(nameGroup, new RDFaComplexProperty(name, subentity, attribute.location));

                        ogEntities.put(name, subentity);
                        found = true;
                    }


                    String[] parts = name.split(":");
                    String param;
                    for (int i = parts.length - 1; i > 1; i--) {
                        param = Su.join(Arrays.copyOfRange(parts, 0, i), ":");

                        if (ogEntities.containsKey(param)) {

                            RDFaEntity currentEntity = ogEntities.get(param);
                            RDFaEntity subentity = new RDFaEntity("", "");
                            subentity.addProperty(new RDFaValueProperty("@value", value, null, value, attribute.location));
                            currentEntity.appendProperty(new RDFaComplexProperty(name, subentity, attribute.location));
                            found = true;
                            isStructuredProperty.put(param, true);
                        }
                    }

                    if (!found) {
                        RDFaEntity subentity = new RDFaEntity("", "");
                        properties.append(name,
                                new Pair<>(subentity, new RDFaValueProperty("@value", value, null, value, attribute.location)));
                        if (!isStructuredProperty.containsKey(name)) {
                            isStructuredProperty.put(name, false);
                        }

                        rootEntitiesBuilder.appendProperty(nameGroup, new RDFaComplexProperty(name, subentity, attribute.location));

                        ogEntities.put(name, subentity);
                    }
                }


            }
            for (Map.Entry<String, Boolean> entry : isStructuredProperty.entrySet()) {
                if (oldVersions || !entry.getValue()) {
                    for (Pair<RDFaEntity, RDFaValueProperty> entityProperty : properties.get(entry.getKey())) {
                        entityProperty.getFirst().addProperty(entityProperty.getSecond());
                    }
                } else {
                    for (Pair<RDFaEntity, RDFaValueProperty> entityProperty : properties.get(entry.getKey())) {
                        RDFaEntity inner = new RDFaEntity("", "");
                        inner.addProperty(entityProperty.getSecond());
                        entityProperty.getFirst()
                                .addProperty(new RDFaComplexProperty("_:content", inner));
                    }
                }
            }
            entityList.addAll(rootEntitiesBuilder.getEntities());


        } catch (Exception e) {
            log.warn("opengraph parsing error, skipping...", e);
        }
    }

    private void semarglRDFaParser(InputStream in, String url, RDFaEntity metarootEntity)  {
        if(url.isEmpty()){
            url = "http://localhost/";
        }
        Model model = ModelFactory.createDefaultModel();
        AbstractJenaSink tripleSink = (AbstractJenaSink) JenaSink.connect(model);
        RdfaParser rdfaParser = (RdfaParser) RdfaParser.connect(tripleSink);
        XMLReader htmlReader = new SAXParser();

        try {
            htmlReader.setFeature("http://cyberneko.org/html/features/override-namespaces", false) ;

            htmlReader.setFeature("http://cyberneko.org/html/features/balance-tags", true);
            htmlReader.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment",true);
            htmlReader.setFeature("http://cyberneko.org/html/features/scanner/allow-selfclosing-tags", true);
            htmlReader.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
        } catch (SAXNotRecognizedException | SAXNotSupportedException e) {
            log.error("{}", e.getMessage(), e);
        }


        StreamProcessor streamProcessor = new StreamProcessor(rdfaParser);
        streamProcessor.setProperty(StreamProcessor.XML_READER_PROPERTY, htmlReader);



        try {
            streamProcessor.process(in, url);
        } catch (ParseException e) {
            exceptions.add(new RDFaException(true, metarootEntity, e.getMessage()) {
                @Override
                public String getKey() {
                    return "json_parsing";
                }
            });
        }

        StmtIterator iterator = model.listStatements();

        MultiMap<String,Statement> propertyHashMap = new MultiMap<>();

        HashSet<Statement> roots = new HashSet<>();
        while (iterator.hasNext()) {
            Statement stmt = iterator.nextStatement();

            Resource subject = stmt.getSubject();
            Property predicate = stmt.getPredicate();
            RDFNode object = stmt.getObject();

//            System.out.println(subject.toString() +" "+predicate.toString()+" "+object.toString());
            if(predicate.toString().equals(RDF.DOCUMENT_LOCATION)){
                int first$ = object.toString().indexOf("$");
                String objectString = object.toString();
                String last = objectString.substring(first$+1);
                int second$ = last.indexOf("$");
                String secondPart = last.substring(second$+1);
                if(secondPart.startsWith("_:")) {
                    try {
                        secondPart = tripleSink.getBNodeHash(secondPart);
                    } catch (NullPointerException e) {

                    }
                }
                String key = subject.toString()+object.toString().substring(first$,second$+first$+1)+"$"+secondPart;
                String value = object.toString().substring(0,first$);
                locationMap.append(key,value);
            }
            else {
                propertyHashMap.append(subject.toString(), stmt);
                roots.add(stmt);
            }
        }

        LinkedList<Statement> heads = new LinkedList<>();
        for(String resource : propertyHashMap.keySet()){

            if(resource.equals(url)) {
                for (Statement pred : propertyHashMap.get(resource)){
                        heads.addFirst(pred);
                }
            } else {
                for (Statement pred : propertyHashMap.get(resource)) {
                    try {
                        if (model.contains(null, null, pred.getSubject())) {
                            roots.remove(pred);
                        }
                    } catch (ResourceRequiredException ignored) {

                    }
                }
            }
        }
        LinkedList<Statement>  listOfRoots= new LinkedList<>();
        listOfRoots.addAll(roots);
        for(Statement head : heads){
            listOfRoots.addFirst(head);
        }

        List<RDFaEntity> rootEntities = new ArrayList<>();
        for(Statement root: listOfRoots){
            RDFaEntity rootEntity = null;
            if (root.getSubject().isURIResource()) {
                rootEntity = new RDFaEntity("", root.getSubject().toString());

            } else {
                rootEntity = new RDFaEntity("", url);
                rootEntity.setIdFromUrl(true);
            }
            rootEntity.isRoot = true;
            if("http://localhost/".equals(rootEntity.id)){
                rootEntity.setIdFromUrl(true);
            }


            String key = root.getSubject().toString()+"$"+root.getPredicate().toString()+"$"+root.getObject().toString();



            visitChildren(root.getSubject().toString(), propertyHashMap, rootEntity);

            rootEntities.add(rootEntity);
        }

    }

    private HashSet<String> visitedNodes = new HashSet<>();

    private void visitChildren(String localName, MultiMap<String, Statement> propertyHashMap, RDFaEntity rootEntity) {

        if(!visitedNodes.contains(localName)) {
            visitedNodes.add(localName);
            boolean hasExceptions = false;
            for (Statement statement : propertyHashMap.get(localName)) {
                String locationKey = statement.getSubject().toString()+"$"+statement.getPredicate().toString()+"$"+statement.getObject().toString();
                EntityLocation statementLocation = EntityLocation.DEFAULT_LOCATION;
                if(locationMap.containsKey(locationKey)) {
                    statementLocation = new EntityLocation(locationMap.get(locationKey).get(0));
                }
                RDFNode obj = statement.getObject();
                Property predicate = statement.getPredicate();
                String pred = predicate.toString();

                if (pred.contains(":") && !pred.matches(KNOWN_PREFIX)) {
                    exceptions.add(new UnknownPrefixRDFaException(true, rootEntity, pred.split(":")[0], statementLocation));
                    hasExceptions = true;
                }
                String ogName = pred;
                if (pred.matches(KNOWN_PREFIX)) {
                    String[] parts = pred.split(":");
                    boolean known = false;
                    for (int i = parts.length; i > 1; i--) {
                        ogName = Su.join(Arrays.copyOfRange(parts, 0, i), ":");
                        if (ogEntities.containsKey(ogName)) {
                            known = true;
                        }
                    }
                    if (known) {
                        continue;
                    }
                }

                if (obj.isResource()) {

                    RDFaEntity child;
                    if (obj.isURIResource()) {
                        child = new RDFaEntity("", obj.toString());
                    } else {
                        child = new RDFaEntity("", null);
                    }


                    if (isTypeAttr(pred)) {
                        String type = obj.toString();
                        rootEntity.setType(type);
                        if(rootEntity.getLocation().equals(EntityLocation.DEFAULT_LOCATION))
                        rootEntity.setLocation(statementLocation);
                        continue;
                    }
                    if (isVocabAttr(pred)) {
                        if (!RIUtils.isIri(obj.toString())) {
                            exceptions.add(new UnknownVocabException(true, rootEntity, obj.toString(), statementLocation));
                            hasExceptions = true;
                        }
                        continue;
                    }
                    if (isContexOrXHTMLVocabtAttr(pred)) {
                        continue;
                    }
                    if (propertyHashMap.containsKey(obj.toString())) {
                        rootEntity.appendProperty(new RDFaComplexProperty(pred, child));
                        visitChildren(obj.toString(), propertyHashMap, child);
                    } else if (!isContexOrXHTMLVocabtAttr(pred)) {
                        if (obj.isURIResource()) {
                            rootEntity.appendProperty(new RDFaValueProperty(pred, null, extractValue(obj), extractValue(obj), statementLocation));

                        } else if (obj.isLiteral()) {
                            rootEntity.appendProperty(new RDFaValueProperty(pred, extractValue(obj), null, extractValue(obj), statementLocation));

                        }
                    }
                } else if (obj.isURIResource()) {
                    rootEntity.appendProperty(new RDFaValueProperty(pred, null, extractValue(obj), extractValue(obj), statementLocation));

                } else if (obj.isLiteral()) {
                    if (isContexOrXHTMLVocabtAttr(pred)) {
                        continue;
                    }
                    rootEntity.appendProperty(new RDFaValueProperty(pred, extractValue(obj), null, extractValue(obj), statementLocation));

                }
            }

            if (!isOmitType(rootEntity.type) || hasExceptions) {
                entityList.add(rootEntity);
            }

        }
    }

    private boolean isVocabAttr(String pred) {
        return RDFa.USES_VOCABULARY.equals(pred);
    }

    private static final String XHTML_VOCAB_PREFIX = "http://www.w3.org/1999/xhtml/vocab#";
    private static final Set<String> XHTML_VOCABS = new HashSet<>();
    static {
        XHTML_VOCABS.add(XHTML_VOCAB_PREFIX + "stylesheet");
        XHTML_VOCABS.add(XHTML_VOCAB_PREFIX + "icon");
        XHTML_VOCABS.add(XHTML_VOCAB_PREFIX + "prefetch");
    }

    private boolean isContexOrXHTMLVocabtAttr(String pred) {
        return RDFa.CONTEXT.equals(pred) || XHTML_VOCABS.contains(pred);
    }

    private String extractValue(RDFNode obj) {
        if(obj instanceof Literal) {
            Literal literal = (Literal) obj;
            return literal.getString();
        }
        return obj.toString();
    }


    private boolean isOmitType(String type){
        boolean isError = type.equals(RDFa.UNRESOLVED_TERM)||
                type.equals(RDFa.CONTEXT)||
                type.equals(RDFa.WARNING)||
                type.equals(RDFa.PREFIX_REDEFINITION)||
                type.equals(RDFa.UNRESOLVED_CURIE)||
//                type.equals(RDFa.USES_VOCABULARY)||
                type.equals(RDFa.ERROR)||
                type.equals(RDFa.UNRESOLVED_PREFIX);

        return isError;
    }

    private static boolean isTypeAttr(String s) {
        return RDF.TYPE.equals(s);
    }


    final HashMap<String, RDFaEntity> entities = new HashMap<>();
    final List<RDFaEntity> entityList = new LinkedList<>();
    final Set<RDFaException> exceptions = new HashSet<>();

    public static Pair<List<RDFaEntity>, List<RDFaException>> getResults(final String content, final String baseUrl, APIVersion version) {
        final ExperimentalExtractor extractor = new ExperimentalExtractor(content, baseUrl, version);
        return pair(Collections.unmodifiableList(extractor.entityList),
                Collections.unmodifiableList(new ArrayList<RDFaException>(extractor.exceptions)));
    }



    private RDFaEntity getById(final String id, final String type) {
        if (entities.containsKey(id)) {
            return entities.get(id);
        }
        final RDFaEntity entity = new RDFaEntity(type, id);
        if (id != null) {
            entities.put(id, entity);
        }
        entityList.add(entity);
        return entity;
    }

    private RDFaEntity getById(final String id, final String type, EntityLocation location) {
        if (entities.containsKey(id)) {
            return entities.get(id);
        }
        final RDFaEntity entity = new RDFaEntity(type, id,location);
        if (id != null) {
            entities.put(id, entity);
        }
        entityList.add(entity);
        return entity;
    }

    private String currentVocab = null;
    private Stack<List<Prefix2URI>> prefixStack = new Stack<>();

    private void visit(final TagNode node, final RDFaEntity currentEntity, String vocab, String rel) {
        if (hasVocab(node)) {
            vocab = extractVocab(node, currentEntity);
        }
        currentVocab = vocab;
        if (node.hasAttribute("rel") && !Su.isEmpty(node.getAttributeByName("rel")) && isCURIE(node.getAttributeByName("rel"))) {
            rel = resolveIds(node.getAttributeByName("rel"));
        }

        if ("script".equals(node.getName()) && "application/ld+json".equals(node.getAttributeByName("type"))) {
            String jsonString = node.getText().toString();
            try {
                extractLD("True".equals(node.getAttributeByName("fake")), jsonString, node);
            } catch (JsonParseException e) {
                int off = (int) e.getLocation().getCharOffset();
                jsonldErrorProcess(jsonString, off);
            } catch (JsonProcessingException e) {
                int off = (int) e.getLocation().getCharOffset();
                jsonldErrorProcess(jsonString, off);
            } catch (IOException e) {
                exceptions.add(new JsonParsingRDFaException(true, null, ""));
            }
        }

        final boolean hasPrefixes = processPrefixes(node);
        if (isPropertyNode(node)) {
            if (!isStartingNode(node)) {
                for (final TagNode child : node.getChildTags()) {
                    visit(child, currentEntity, vocab, null);
                }
            }
        } else if (isStartingNode(node)) {
            final Set<String> savedPrefixes = unresolvedPrefixes;
            unresolvedPrefixes = new HashSet<>();
            final RDFaEntity entity = new RDFaEntity("","", new EntityLocation(node.getRow(),node.getCol()));
            for (final TagNode child : node.getChildTagList()) {
                visit(child, entity, vocab, rel);
            }
            unresolvedPrefixes = savedPrefixes;
        } else {
            for (final TagNode child : node.getChildTags()) {
                visit(child, currentEntity, vocab, rel);
            }
        }
        if (hasPrefixes) {
            prefixStack.pop();
        }
    }

    public void extractLD(final boolean fake, String jsonString, TagNode node) throws IOException {
        List<JsonCollidingKeywordsRDFaException> duplicateFieldsExceptions = new ArrayList<>();

        JsonParser parser = new ObjectMapper()
                .registerModule(new SimpleModule().addDeserializer(RDFaEntity.class,
                        new RDFaEntityJsonLdDeserialilizer(duplicateFieldsExceptions, node)))
                .getFactory()
                .createParser(jsonString);

        JsonToken token = parser.nextToken();
        if (token == null) {
            return;
        }
        switch (token) {
            case START_ARRAY:
                List<RDFaEntity> entities = parser.readValueAs(new TypeReference<List<RDFaEntity>>() {});
                for (RDFaEntity entity : entities) {

                    if (fake) {
                        boolean validLD = entityContainsContext(entity);
                        if (validLD) {
                            entity.isRoot = true;
                            entityList.add(entity);
                        }
                    } else {
                        entity.isRoot = true;
                        entityList.add(entity);
                    }
                }
                break;
            case START_OBJECT:
                RDFaEntity entity = parser.readValueAs(RDFaEntity.class);
                if (fake) {
                    boolean validLD = entityContainsContext(entity);
                    if (validLD) {
                        entity.isRoot = true;
                        entityList.add(entity);
                    }
                } else {
                    entity.isRoot = true;
                    entityList.add(entity);
                }
                break;
            default:
                throw new JsonParseException("Unexpected token: " + parser.getCurrentToken(), parser.getCurrentLocation());
        }

        exceptions.addAll(duplicateFieldsExceptions);
    }

    public static List<RDFaEntity> extractAllLD(final TreeNode rr) {
        final List<RDFaEntity> result = new ArrayList<>();
        if (rr.isArray()) {
            for (JsonNode x : ((ArrayNode) rr)) {
                RDFaEntity entity = extractLD(x);
                result.add(entity);
            }
        } else {
            RDFaEntity entity = extractLD(rr);
            result.add(entity);
        }
        return result;
    }

    private boolean entityContainsContext(RDFaEntity entity) {
        Set<String> props = entity.getPropertyList();
        return props.contains("@context");
    }

    private void jsonldErrorProcess(final String jsonString, int errorOffset) {
        if (errorOffset >= jsonString.length()) {
            errorOffset = jsonString.length() - 1;
        }
        int min = Math.max(errorOffset - 10, 0);
        int max = Math.min(errorOffset + 11, jsonString.length());

        String codeSample =
                String.format("%s%s<span style=\"text-decoration: underline;\">%s</span>%s%s", min > 0 ? "..." : "",
                        StringEscapeUtils.escapeXml(jsonString.substring(min, errorOffset)),
                        StringEscapeUtils.escapeXml(jsonString.charAt(errorOffset) + ""),
                        StringEscapeUtils.escapeXml(jsonString.substring(errorOffset + 1, max)),
                        max < jsonString.length() ? "..." : "").trim();
        JsonParsingRDFaException ex = new JsonParsingRDFaException(true, null, codeSample, EntityLocation.DEFAULT_LOCATION);
        ex.isEscapedMessage = true;
        exceptions.add(ex);
    }

    private static final Pattern CURIE_PREFIX = Pattern.compile(
            "([a-zA-Z]|_)([a-zA-Z0-9\\.\\-_]|\u00B7|\u02D0|\u02D1|\u0387|\u0640|\u0E46|\u0EC6|\u3005|[\u3031-\u3035]|\u309D|\u309E|[\u30FC-\u30FE]|\t[\u0300-\u0345] | [\u0360-\u0361] | [\u0483-\u0486] | [\u0591-\u05A1] | [\u05A3-\u05B9] | [\u05BB-\u05BD] | \u05BF | [\u05C1-\u05C2] | \u05C4 | [\u064B-\u0652] | \u0670 | [\u06D6-\u06DC] | [\u06DD-\u06DF] | [\u06E0-\u06E4] | [\u06E7-\u06E8] | [\u06EA-\u06ED] | [\u0901-\u0903] | \u093C | [\u093E-\u094C] | \u094D | [\u0951-\u0954] | [\u0962-\u0963] | [\u0981-\u0983] | \u09BC | \u09BE | \u09BF | [\u09C0-\u09C4] | [\u09C7-\u09C8] | [\u09CB-\u09CD] | \u09D7 | [\u09E2-\u09E3] | \u0A02 | \u0A3C | \u0A3E | \u0A3F | [\u0A40-\u0A42] | [\u0A47-\u0A48] | [\u0A4B-\u0A4D] | [\u0A70-\u0A71] | [\u0A81-\u0A83] | \u0ABC | [\u0ABE-\u0AC5] | [\u0AC7-\u0AC9] | [\u0ACB-\u0ACD] | [\u0B01-\u0B03] | \u0B3C | [\u0B3E-\u0B43] | [\u0B47-\u0B48] | [\u0B4B-\u0B4D] | [\u0B56-\u0B57] | [\u0B82-\u0B83] | [\u0BBE-\u0BC2] | [\u0BC6-\u0BC8] | [\u0BCA-\u0BCD] | \u0BD7 | [\u0C01-\u0C03] | [\u0C3E-\u0C44] | [\u0C46-\u0C48] | [\u0C4A-\u0C4D] | [\u0C55-\u0C56] | [\u0C82-\u0C83] | [\u0CBE-\u0CC4] | [\u0CC6-\u0CC8] | [\u0CCA-\u0CCD] | [\u0CD5-\u0CD6] | [\u0D02-\u0D03] | [\u0D3E-\u0D43] | [\u0D46-\u0D48] | [\u0D4A-\u0D4D] | \u0D57 | \u0E31 | [\u0E34-\u0E3A] | [\u0E47-\u0E4E] | \u0EB1 | [\u0EB4-\u0EB9] | [\u0EBB-\u0EBC] | [\u0EC8-\u0ECD] | [\u0F18-\u0F19] | \u0F35 | \u0F37 | \u0F39 | \u0F3E | \u0F3F | [\u0F71-\u0F84] | [\u0F86-\u0F8B] | [\u0F90-\u0F95] | \u0F97 | [\u0F99-\u0FAD] | [\u0FB1-\u0FB7] | \u0FB9 | [\u20D0-\u20DC] | \u20E1 | [\u302A-\u302F] | \u3099 | \u309A )*");

    private boolean isCURIE(final String rel) {
        return rel.indexOf(':') >= 0 && CURIE_PREFIX.matcher(rel.substring(0, rel.indexOf(':'))).matches();
    }

    private static final String PREFIX_ATTR = "prefix";



    public static class Prefix2URI extends Pair<String, String> {

        public Prefix2URI(String first, String second) {
            this(first, second, false);
        }

        private Prefix2URI(final String first, final String second, final boolean xmlns) {
            super(first, second);
            isXmlns = xmlns;
        }

        public final boolean isXmlns;
    }

    private boolean processPrefixes(final Element node) {
        List<Prefix2URI> prefixes = new LinkedList<>();
        if (node.hasAttr(PREFIX_ATTR)) {
            prefixes = splitPrefixes(node.attr(PREFIX_ATTR));
        }
        for (final Attribute attr : node.attributes()) {
            if (attr.getKey().startsWith("xmlns:")) {
                final String key = attr.getKey().substring(6);
                final String uri = attr.getValue();
                prefixes.add(new Prefix2URI(key, uri, true));
            }
        }
        if (prefixes.isEmpty()) {
            return false;
        }
        prefixStack.push(prefixes);
        return true;
    }

    private boolean processPrefixes(final TagNode node) {
        List<Prefix2URI> prefixes = new LinkedList<>();
        if (node.hasAttribute(PREFIX_ATTR)) {
            prefixes = splitPrefixes(node.getAttributeByName(PREFIX_ATTR));
        }
        for (final String attr : node.getAttributes().keySet()) {
            if (attr.startsWith("xmlns:")) {
                final String key = attr.substring(6);
                final String uri = node.getAttributes().get(attr);
                prefixes.add(new Prefix2URI(key, uri, true));
            }
        }
        if (prefixes.isEmpty()) {
            return false;
        }
        prefixStack.push(prefixes);
        return true;
    }


    private static final String VOCAB_ATTR = "vocab";



    private String extractVocab(final Element node) {
        return node.attr(VOCAB_ATTR);
    }

    private String extractVocab(final TagNode node) {
        return node.getAttributeByName(VOCAB_ATTR);
    }

    private String extractVocab(final Element node, RDFaEntity entity) {
        String itemType = node.attr(VOCAB_ATTR);
        if(!itemType.matches("[^ ]*")){
            exceptions.add(new UnknownVocabException(false, entity, itemType));
            String[] parts = itemType.split("\\s");
            for(String candidate : parts){
                if(!candidate.isEmpty()){
                    itemType = candidate;
                    break;
                }
            }
        }
        return itemType;
    }
    private String extractVocab(final TagNode node, RDFaEntity entity) {
        String itemType = node.getAttributeByName(VOCAB_ATTR);
        if(!itemType.matches("[^ ]*")){
            exceptions.add(new UnknownVocabException(false, entity, itemType));
            String[] parts = itemType.split("\\s");
            for(String candidate : parts){
                if(!candidate.isEmpty()){
                    itemType = candidate;
                    break;
                }
            }
        }
        return itemType;
    }

    private boolean hasVocab(final Element node) {
        return (node.attributes().hasKey(VOCAB_ATTR) && !node.attr(VOCAB_ATTR).isEmpty());
    }


    private boolean hasVocab(final TagNode node) {
        return (node.hasAttribute(VOCAB_ATTR) && !node.getAttributeByName(VOCAB_ATTR).isEmpty());
    }


    private Set<String> unresolvedPrefixes = new HashSet<>();

    private String resolveIds(final String props) {
        final List<String> parts = Su.split(props);
        final List<String> result = new LinkedList<>();
        for (final String part : parts) {
            final String resolved = resolveId(part);
            if (resolved != null) {
                result.add(resolved);
            }
        }
        return Su.join(result, " ");
    }


    private String resolveId(final String id) {
        final Pair<String, String> prefix_prop = splitPropAttr(id);
        final String resolved_prefix;
        if (prefix_prop.first != null) {
            resolved_prefix = resolveVocab(prefix_prop.first);
        } else {
            resolved_prefix = null;
        }
        return buildPropId(resolved_prefix, prefix_prop.second, prefix_prop.first);
    }

    private String buildPropId(final String resolved_prefix, final String propName, final String prefix) {
        if (resolved_prefix != null) {
            if (resolved_prefix.charAt(resolved_prefix.length() - 1) == '#' ||
                    resolved_prefix.charAt(resolved_prefix.length() - 1) == '/') {
                return String.format("%s%s", resolved_prefix, propName);
            } else {
                return String.format("%s#%s", resolved_prefix, propName);
            }
        } else {
            if (URLUtils.isValidHttpURL(propName)) {
                return propName;
            }
            if (currentVocab != null) {
                if (currentVocab.charAt(currentVocab.length() - 1) == '#' ||
                        currentVocab.charAt(currentVocab.length() - 1) == '/') {
                    return String.format("%s%s", currentVocab, propName);
                } else {
                    return String.format("%s#%s", currentVocab, propName);
                }
            } else {
                return (isEmpty(prefix) ? "" : prefix + ":") + propName;
            }
        }
    }

    private String resolveVocab(final String prefix) {
        prefixStack.push(DefaultPrefixProvider.instance.getPrefixList());
        for (final List<Prefix2URI> prefixes : prefixStack) {
            for (final Prefix2URI prefixPair : prefixes) {
                if (prefixPair.first.equals(prefix)) {
                    if (prefixPair.isXmlns) {
                        xmlnsErrPrefixes.add(prefixPair.first);
                    }
                    return prefixPair.second;
                }
            }
        }
        prefixStack.pop();
        unresolvedPrefixes.add(prefix);
        return null;
    }

    private List<Prefix2URI> splitPrefixes(final String prefixes) {
        final List<Prefix2URI> result = new LinkedList<>();
        final List<String> parts = Su.split(prefixes);
        String abbr = null;
        for (final String prefix : parts) {
            if (prefix.charAt(prefix.length() - 1) == ':') {
                abbr = prefix;
            } else {
                if (abbr != null) {
                    result.add(new Prefix2URI(abbr.substring(0, abbr.length() - 1), prefix));
                    abbr = null;
                }
            }
        }
        return result;
    }

    private Pair<String, String> splitPropAttr(final String prop_attr) {
        if (URLUtils.isValidHttpURL(prop_attr)) {
            return pair(null, prop_attr);
        }
        if (prop_attr.indexOf(':') >= 0) {
            final String[] prts = Su.split(prop_attr, ':', 2);
            return pair(prts[0], prts[1]);
        }
        return pair(null, prop_attr);
    }

    private static final String PROPERTY_ATTR = "property";

    private boolean isPropertyNode(final Element node) {
        return (node.hasAttr(PROPERTY_ATTR) && !node.attr(PROPERTY_ATTR).isEmpty());
    }

    private boolean isPropertyNode(final TagNode node) {
        return (node.hasAttribute(PROPERTY_ATTR) && !node.getAttributeByName(PROPERTY_ATTR).isEmpty());
    }


    private static final String RESOURCE_ATTR = "resource";
    private static final String TYPE_ATTR = "typeof";

    private boolean isStartingNode(final Element node) {
        return (node.hasAttr(RESOURCE_ATTR) && !node.attr(RESOURCE_ATTR).isEmpty() && !isPropertyNode(node)) ||
                node.hasAttr(TYPE_ATTR) && !node.attr(TYPE_ATTR).isEmpty();
    }

    private boolean isStartingNode(final TagNode node) {
        return (node.hasAttribute(RESOURCE_ATTR) && !node.getAttributeByName(RESOURCE_ATTR).isEmpty() && !isPropertyNode(node)) ||
                node.hasAttribute(TYPE_ATTR) && !node.getAttributeByName(TYPE_ATTR).isEmpty();
    }

    private static String getTextOrNull(TreeNode t) {
        if (t == null) {
            return null;
        }
        if (t instanceof ArrayNode) {
            final List<String> parts = new ArrayList<>();
            for (int i = 0; i < t.size(); i++) {
                if (t.get(i) instanceof ValueNode) {
                    parts.add(((ValueNode) t.get(i)).asText());
                }
            }
            return Su.join(parts, " ");
        }
        if (t instanceof ValueNode) {
            return ((ValueNode) t).asText();
        }
        return null;
    }

    public static RDFaEntity extractLD(final TreeNode obj ) {
//        RDFaLocation location = new RDFaLocation(obj);
        String type = getTextOrNull(obj.get("@type"));
        Iterator it = obj.fieldNames();
        String id = getTextOrNull(obj.get("@id"));
        RDFaEntity md = new JSONLDEntity(null, null);
        while (it.hasNext()) {
            String key = (String) it.next();
            Object value = obj.get(key);
            if (value instanceof ObjectNode) {
                RDFaEntity field = extractLD((JsonNode) value);
                if (field != null) {
                    md.addProperty(new RDFaComplexProperty(key, field));
                }
            } else if (value instanceof ArrayNode) {
                for (JsonNode val : (ArrayNode) value) {
                    if (val instanceof ValueNode) {
                        md.appendProperty(new RDFaValueProperty(key, val.asText(), null, val.asText()));
                    } else {
                        RDFaEntity field = extractLD(val);
                        if (field != null) {
                            md.appendProperty(new RDFaComplexProperty(key, field));
                        }
                    }
                }
            } else if (value instanceof ValueNode) {
                if (value instanceof NullNode) {
                    md.addProperty(new RDFaValueProperty(key, null, null, "null"));
                } else {
                    md.addProperty(new RDFaValueProperty(key, ((ValueNode) value).asText(), null,
                            ((ValueNode) value).asText()));
                }
            }
        }
        return md;
    }
    public static RDFaEntity extractLD(TreeNode obj, String prefix) {
        String type = getTextOrNull(obj.get("@type"));
        Iterator it = obj.fieldNames();
        String id = getTextOrNull(obj.get("@id"));
        RDFaEntity md = new JSONLDEntity(null, null);
        while (it.hasNext()) {
            String key = (String) it.next();
            Object value = obj.get(key);
            if (value instanceof ObjectNode) {
                RDFaEntity field = extractLD((JsonNode) value, prefix);
                if (field != null) {
                    md.addProperty(new RDFaComplexProperty(key, field));
                }
            } else if (value instanceof ArrayNode) {
                for (JsonNode val : (ArrayNode) value) {
                    if (val instanceof ValueNode) {
                        md.appendProperty(new RDFaValueProperty(key,setVal((ValueNode) value,prefix),
                                null, setVal((ValueNode) value,prefix)));
                    } else {
                        RDFaEntity field = extractLD(val, prefix);
                        if (field != null) {
                            md.appendProperty(new RDFaComplexProperty(key, field));
                        }
                    }
                }
            } else if (value instanceof ValueNode) {
                if (value instanceof NullNode) {
                    md.addProperty(new RDFaValueProperty(key, null, null, "null"));
                } else {
                    md.addProperty(new RDFaValueProperty(key, setVal((ValueNode) value,prefix)
                            , null,setVal((ValueNode) value,prefix)));
                }
            }
        }
        return md;
    }

    private final static Pattern PURE_JSON_REGEXP = Pattern.compile("^\\s*(\\{.*}|\\[.*])\\s*$", Pattern.DOTALL);

    private boolean isPureJSON(String content) {
        return PURE_JSON_REGEXP.matcher(content).matches();
    }

    private String addScriptTags(String content) {
        String head = "<script type=\"application/ld+json\" fake=\"True\">";
        String tail = "</script>";
        return new StringBuilder().append(head).append(content).append(tail).toString();
    }

    private static String setVal(ValueNode val,String prefix){
        String pre = prefix;
        if( val.asText().startsWith("@")){
            pre =  "";
        }
        return pre + MicrodataUtils.cutPrefix(val.asText());
    }

    private String findNiondexCommentNode(String document) throws UnsupportedEncodingException {
        String doc = document;
        doc = doc.replace("<!--noindex-->", "<noindex>");
        doc = doc.replace("<!--/noindex-->", "</noindex>");
        return doc;
    }


    private static class RootEntitiesBuilder {
        private final Map<String, RDFaEntity> entities = new HashMap<>();
        private final boolean isSingleRdfaMode;

        public RootEntitiesBuilder(boolean isSingleRdfaMode) {
            this.isSingleRdfaMode = isSingleRdfaMode;
            if (isSingleRdfaMode) {
                RDFaEntity rootEntity = new RDFaEntity("", "");
                rootEntity.isRoot = true;
                entities.put(null, rootEntity);
            }
        }

        public Collection<RDFaEntity> getEntities() {
            return entities.values();
        }

        public void appendProperty(String nameGroup, RDFaComplexProperty rdFaComplexProperty) {
            RDFaEntity nameSpaceEntity;
            if (isSingleRdfaMode) {
                nameSpaceEntity = entities.get(null);
            } else if (entities.containsKey(nameGroup)) {
                nameSpaceEntity = entities.get(nameGroup);
            } else {
                nameSpaceEntity = new RDFaEntity(nameGroup, "");
                nameSpaceEntity.isRoot = true;
                entities.put(nameGroup, nameSpaceEntity);
            }
            nameSpaceEntity.appendProperty(rdFaComplexProperty);
        }
    }
}
