package ru.yandex.webmaster3.tanker.digest.html;


import org.apache.xerces.dom.DocumentImpl;
import org.apache.xerces.dom.DocumentTypeImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.DOMConfiguration;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSOutput;
import org.w3c.dom.ls.LSSerializer;

import java.io.StringWriter;
import java.util.HashSet;
import java.util.Set;

/**
 * Created by ifilippov5 on 24.09.17.
 */
public class HtmlDomUtil {
    private static final Logger log = LoggerFactory.getLogger(HtmlDomUtil.class);
    private static final DOMImplementationLS DOM_IMPLEMENTATION;
    private static final Set<String> KNOWN_ENTITIES = new HashSet<>();

    static {
        DOMImplementationRegistry registry;
        try {
            registry = DOMImplementationRegistry.newInstance();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        DOM_IMPLEMENTATION = (DOMImplementationLS) registry.getDOMImplementation("LS");

        KNOWN_ENTITIES.add("nbsp");
        KNOWN_ENTITIES.add("lt");
        KNOWN_ENTITIES.add("gt");
        KNOWN_ENTITIES.add("amp");
        KNOWN_ENTITIES.add("quot");
        KNOWN_ENTITIES.add("apos");
        KNOWN_ENTITIES.add("bull");
    }

    public static String makePageHtml(HtmlNode node) {
        Node page = makePageDom(node);
        LSSerializer lsSerializer = DOM_IMPLEMENTATION.createLSSerializer();
        DOMConfiguration domConfig = lsSerializer.getDomConfig();
        domConfig.setParameter("format-pretty-print", true);  //if you want it pretty and indented
        domConfig.setParameter("xml-declaration",false);

        LSOutput lsOutput = DOM_IMPLEMENTATION.createLSOutput();
        lsOutput.setEncoding("UTF-8");
        StringWriter sw = new StringWriter();
        lsOutput.setCharacterStream(sw);
        lsSerializer.write(page, lsOutput);
        return sw.toString();
    }

    public static Node makePageDom(HtmlNode node) {
        Document document = new MyHTMLDocumentImpl();
        buildMainDom(document, makeNodeDom(document, node));
        return document;
    }

    private static Node makeNodeDom(Document document, HtmlNode node) {
        return node.toDom(document);
    }

    static Node createUnescapedTextNode(Document document, String text) {
        Node root = document.createDocumentFragment();
        StringBuilder sb = new StringBuilder();
        boolean inEntity = false;
        for (int i = 0; i < text.length(); i++) {
            char c = text.charAt(i);
            if (inEntity) {
                if (c == ';') {
                    String entityName = sb.toString();
                    if (KNOWN_ENTITIES.contains(entityName)) {
                        root.appendChild(document.createEntityReference(entityName));
                    } else {
                        root.appendChild(document.createTextNode("&" + entityName + ";"));
                    }
                    inEntity = false;
                    sb.setLength(0);
                } else {
                    sb.append(c);
                }
            } else {
                if (c == '&') {
                    inEntity = true;
                    if (sb.length() > 0) {
                        root.appendChild(document.createTextNode(sb.toString()));
                        sb.setLength(0);
                    }
                } else {
                    sb.append(c);
                }
            }
        }
        if (inEntity || sb.length() > 0) {
            root.appendChild(document.createTextNode((inEntity ? "&" : "") + sb.toString()));
        }
        return root;
    }

    public static void buildMainDom(Document document, Node node) {
        DocumentType docType = new DocumentTypeImpl(null, "html",
                "-//W3C//DTD XHTML 1.0 Strict//EN",
                "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
        document.appendChild(docType);
        Element html = document.createElement("html");
        html.setAttribute("lang", "ru");

        Element head = document.createElement("head");
        Element meta1 = document.createElement("meta");
        meta1.setAttribute("charset", "UTF-8");
        Element meta2 = document.createElement("meta");
        meta2.setAttribute("name", "viewport");
        meta2.setAttribute("content", "width=device-width, initial-scale=1.0");
        Element meta3 = document.createElement("meta");
        meta3.setAttribute("http-equiv", "X-UA-Compatible");
        meta3.setAttribute("content", "ie=edge");
        Element meta4 = document.createElement("meta");
        meta4.setAttribute("name", "format-detection");
        meta4.setAttribute("content", "telephone=no,date=no,address=no,email=no,url=no");
        Element title = document.createElement("title");
        title.setTextContent("Digest");

        head.appendChild(meta1);
        head.appendChild(meta2);
        head.appendChild(meta3);
        head.appendChild(meta4);
        head.appendChild(title);

        Element body = document.createElement("body");
        body.setAttribute("style", "margin: 0; padding: 0");
        body.appendChild(node);

        html.appendChild(head);
        html.appendChild(body);

        document.appendChild(html);
    }

    public static class MyHTMLDocumentImpl extends DocumentImpl {

    }
}
