package ru.yandex.chemodan.app.docviewer.utils.html;

import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

import org.dom4j.Attribute;
import org.dom4j.Branch;
import org.dom4j.CDATA;
import org.dom4j.CharacterData;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ru.yandex.bolts.collection.Option;
import ru.yandex.chemodan.app.docviewer.utils.UriUtils;
import ru.yandex.misc.lang.StringUtils;

public class HtmlFilter {

    private static final Logger logger = LoggerFactory.getLogger(HtmlFilter.class);

    static final Set<String> ALLOWED_PROTOCOLS = Collections.unmodifiableSet(new HashSet<>(
            Arrays.asList("http", "https", "ftp")));

    private static final Set<String> ATTRIBUTES_ALLOWED = Collections
            .unmodifiableSet(new HashSet<>(Arrays.asList("align", "alt", "border",
                    "cellspacing", "charset", "color", "cols", "colspan", "class", "face", "frame",
                    "height", "id", "lang", "name", "rowspan", "rules", "size", "start", "title",
                    "type", "valign", "width")));

    private static final Set<String> ELEMENTS_ALLOWED = Collections
            .unmodifiableSet(new HashSet<>(Arrays.asList(
                    "A", "ABBR", "ACRONIM", "ADDRESS", "ASIDE", "ARTICLE",
                    "B", "BR", "BLOCKQUOTE", "CAPTION", "CITE", "CODE", "COL", "DD", "DIV", "DL",
                    "DT", "EM", "FONT", "I", "IMG", "LI", "H1", "H2", "H3", "H4", "H5", "H6", "HR", "NAV",
                    "OL", "P", "PRE", "SECTION", "SPAN", "STRIKE", "STRONG", "SUB", "SUP", "TABLE", "TBODY",
                    "TD", "TFOOT", "TH", "THEAD", "TR", "U", "UL")));

    private static final Set<String> ELEMENTS_DISABLED = Collections
            .unmodifiableSet(new HashSet<>(Arrays.asList("COMMENT", "IFRAME", "LINK",
                    "OBJECT", "SCRIPT", "STYLE", "TITLE", "XML")));

    private static final String PATTERN_SELECTO = "\\s*(\\.?\\w*|\\w*\\.\\w*)\\:?\\w*\\s*";

    static final Pattern PATTERN_SELECTOR = Pattern.compile("\\s*" + PATTERN_SELECTO + "(\\s*\\,"
            + PATTERN_SELECTO + ")*\\s*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);

    private static final Set<String> PROPERTIES_ALLOWED = Collections
            .unmodifiableSet(new HashSet<>(Arrays.asList(
                    //

                    "background-color", "border", "border-bottom", "border-left", "border-top",
                    "border-right",

                    "color",

                    "font-family", "font-size", "font-variant", "font-weight",

                    "margin", "margin-left", "margin-right", "margin-top", "margin-bottom",

                    "padding", "padding-left", "padding-right", "padding-top", "padding-bottom",

                    "text-decoration", "text-shadow", "text-transform",

                    "vertical-align", "visibility"

            )));

    private final StringBuilder cssManual = new StringBuilder();

    private final Document document;

    HtmlFilter(Document document) {
        this.document = document;
    }

    private String filterText(String text) {
        StringBuilder stringBuilder = new StringBuilder(text.length());
        for (char c : text.toCharArray()) {
            if (c >= 0x20 || c == 0x09 || c == 0x0A || c == 0x0D) {
                stringBuilder.append(c);
            }
        }
        return stringBuilder.toString();
    }

    protected boolean outputElementAttr(Element element, Attribute attr) {

        if (HtmlUtils.ATTRIBUTES_PROTOCOLS.contains(attr.getName().toLowerCase())) {
            String value = attr.getValue();

            if (StringUtils.equalsIgnoreCase("A", element.getName())
                    && StringUtils.equalsIgnoreCase("href", attr.getName())
                    && StringUtils.startsWith(value, "#"))
            {
                return true;
            }

            if (HyperlinkProcessor.LOCAL_FILE_PATTERN.matcher(value).matches()) {
                return true;
            }

            try {
                URL url = UriUtils.toUrl(value);
                return ALLOWED_PROTOCOLS.contains(url.getProtocol());
            } catch (Exception exc) {
                logger.trace("Unable to parse URL '" + value + "': " + exc, exc);
                return false;
            }
        }

        return ATTRIBUTES_ALLOWED.contains(attr.getName().toLowerCase());
    }

    protected boolean outputElementChildren(Element element) {
        return !ELEMENTS_DISABLED.contains(element.getName().toUpperCase());
    }

    protected boolean outputElementSelf(Element element) {
        return ELEMENTS_ALLOWED.contains(element.getName().toUpperCase());
    }

    private void parseStyle(String originalText) {
        String text = StringUtils.trimToEmpty(originalText);
        if (text.startsWith("<!--"))
            text = StringUtils.substringAfter(text, "<!--");
        if (text.endsWith("-->"))
            text = StringUtils.substringBeforeLast(text, "-->");

        int current = 0;
        while (current < text.length()) {
            int nextStart = text.indexOf("{", current);
            int nextEnd = text.indexOf("}", current);
            if (nextStart == -1 || nextEnd == -1 || nextEnd <= nextStart) {
                break;
            }
            String selector = StringUtils.trimToEmpty(text.substring(current, nextStart));
            String oldStyle = text.substring(nextStart + 1, nextEnd);

            current = nextEnd + 1;

            if (PATTERN_SELECTOR.matcher(selector).matches()) {
                // good ones, we understood them
                String newStyle = processStyle(oldStyle);

                cssManual.append(selector);
                cssManual.append("{");
                cssManual.append(newStyle);
                cssManual.append("}\n");
            }
        }
    }

    void process() {
        document.setDocType(null);
        final Element rootElement = document.getRootElement();

        {
            List<Node> toRemove = new ArrayList<Node>(document.processingInstructions());
            removeAll(document, toRemove);
        }

        {
            List<Node> toRemove = new ArrayList<>();
            for (Object rootNode : document.content()) {
                if (!(rootNode instanceof Node))
                    throw new IllegalArgumentException("Unknown child type in doc: "
                            + rootNode.getClass().getName());

                if (!(rootNode instanceof Element)) {
                    toRemove.add((Node) rootNode);
                    continue;
                }
            }
            removeAll(rootElement, toRemove);
        }

        {
            List<Node> toRemove = new ArrayList<>();
            for (Object childNode : rootElement.content()) {
                if (!(childNode instanceof Node))
                    throw new IllegalArgumentException("Unknown child type in root: "
                            + childNode.getClass().getName());

                final Element childElement = (Element) childNode;
                final String childName = childElement.getName();

                if (StringUtils.equalsIgnoreCase("HEAD", childName)) {
                    processHead(childElement);
                    continue;
                }

                if (StringUtils.equalsIgnoreCase("BODY", childName)) {
                    processElement(childElement);
                    continue;
                }

                toRemove.add((Node) childNode);
            }
            removeAll(rootElement, toRemove);
        }

        if (this.cssManual.length() > 0) {
            Option<Element> head = HtmlUtils.findHead(document);
            if (!head.isPresent()) {
                Element newHead = DocumentHelper.createElement("HEAD");
                List<Node> content = rootElement.content();
                content.add(0, newHead);
                document.setContent(content);
                head = Option.of(newHead);
            }
            final Element styleElement = DocumentHelper.createElement("STYLE");
            styleElement.add(DocumentHelper.createComment(this.cssManual.toString()));
            head.get().add(styleElement);
        }
    }

    private static void removeAll(final Branch parent, List<Node> toRemove) {
        for (Node node : toRemove) {
            parent.remove(node);
        }
    }

    private void processAttributes(Element element) {
        List<Attribute> attributes = new ArrayList<Attribute>(element.attributes());
        for (Attribute attribute : attributes) {
            if (StringUtils.equalsIgnoreCase("style", attribute.getName())) {
                String oldValue = StringUtils.trimToEmpty(attribute.getValue());
                String newValue = processStyle(oldValue);
                attribute.setValue(newValue);
                continue;
            }

            if (!outputElementAttr(element, attribute)) {

                logger.trace(
                        "Removing attribute with name '{}' and value '{}' from element with name '{}'",
                        attribute.getName(), attribute.getValue(), element.getName());

                element.remove(attribute);
            }
        }
    }

    private void processElement(Element element) {
        final List<Node> newContent = new ArrayList<>(element.content().size());
        final List<Node> oldContent = new ArrayList<Node>(element.content());
        processElementChildren(oldContent, newContent);
        element.setContent(newContent);
    }

    private void processElementChildren(List<Node> oldContent, List<Node> newContent)
            throws AssertionError
    {
        for (Node childNode : oldContent) {
            if (childNode instanceof CDATA || childNode instanceof Text) {
                final String newText = filterText(childNode.getText());
                // DOM4J bug workaround
                if (StringUtils.isNotEmpty(newText)) {
                    childNode.setText(newText);
                    newContent.add(childNode);
                }
                continue;
            }

            if (!(childNode instanceof Element))
                continue;

            final Element childElement = (Element) childNode;

            final boolean outputChildSelf = outputElementSelf(childElement);
            final boolean outputChildChildren = outputElementChildren(childElement);

            if (!outputChildSelf && !outputChildChildren) {
                logger.trace("Removing element with name '{}' and it's children",
                        childElement.getName());

                continue;
            }

            if (outputChildSelf && !outputChildChildren) {
                throw new AssertionError(childElement.getName());
            }

            if (!outputChildSelf && outputChildChildren) {
                logger.trace("Removing element with name '{}' (but preserving children)",
                        childElement.getName());

                processElementChildren(childElement.content(), newContent);
            }

            if (outputChildSelf && outputChildChildren) {
                processElement(childElement);
                processAttributes(childElement);
                newContent.add(childElement);
            }
        }
    }

    private void processHead(Element element) {
        List<Node> toRemove = new ArrayList<>();
        for (Object childNode : element.content()) {
            if (!(childNode instanceof Node))
                throw new IllegalArgumentException("Unknown child type in HEAD: "
                        + childNode.getClass().getName());

            if (!(childNode instanceof Element)) {
                toRemove.add((Node) childNode);
                continue;
            }

            final Element childElement = (Element) childNode;
            final String childName = childElement.getName().toUpperCase();

            if ("STYLE".equals(childName)) {
                for (Object obj : childElement.content()) {
                    if (obj instanceof CharacterData)
                        parseStyle(((CharacterData) obj).getStringValue());
                }
            }

            if ("TITLE".equals(childName)) {
                childElement.setContent(Collections.singletonList(DocumentHelper.createText(childElement
                        .getText())));
                continue;
            }

            if ("META".equals(childName)) {
                final String httpEquiv = childElement.attributeValue("http-equiv");
                final String content = childElement.attributeValue("content");

                if (StringUtils.equalsIgnoreCase("Content-Type", httpEquiv) && content != null
                        && content.toLowerCase().startsWith("text/"))
                {
                    continue;
                }
            }

            toRemove.add((Node) childNode);
        }

        removeAll(element, toRemove);
    }

    private String processStyle(String oldStyle) {
        StringBuilder result = new StringBuilder();
        for (String token : StringUtils.split(oldStyle, ";")) {
            if (StringUtils.isEmpty(StringUtils.trim(token)))
                continue;

            String name = StringUtils.trimToEmpty(StringUtils.substringBefore(token, ":"));
            String value = StringUtils.trimToEmpty(StringUtils.substringAfter(token, ":"));

            if (PROPERTIES_ALLOWED.contains(name.toLowerCase())) {
                result.append(name);
                result.append(':');
                result.append(value);
                result.append(';');
            }
        }
        return result.toString();
    }
}
