package ru.yandex.chemodan.app.docviewer.utils.html;

import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringEscapeUtils;
import org.dom4j.Attribute;
import org.dom4j.CDATA;
import org.dom4j.Comment;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.Text;
import org.dom4j.io.OutputFormat;

import ru.yandex.bolts.collection.Cf;
import ru.yandex.bolts.collection.ListF;
import ru.yandex.bolts.collection.MapF;
import ru.yandex.bolts.collection.Option;
import ru.yandex.bolts.collection.SetF;
import ru.yandex.bolts.function.Function;
import ru.yandex.chemodan.app.docviewer.utils.DataSizeUtils;
import ru.yandex.misc.dataSize.DataSize;
import ru.yandex.misc.lang.StringUtils;
import ru.yandex.misc.lang.Validate;
import ru.yandex.misc.xml.dom4j.Dom4jUtils;
import ru.yandex.misc.xml.stream.XmlWriter;
import ru.yandex.misc.xml.stream.XmlWriterFactory;

class HtmlSplitterWorker {

    private static final String ATTR_ID = "ID";

    private static final String ID_TEMPCSS = "ru.yandex.chemodan.app.docviewer.utils.html.tempcss";

    private static final Pattern PATTERN_CLASS_ONLY = Pattern.compile("\\w*\\.([a-z0-9_]*)\\s*",
            Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);

    private static final Pattern PATTERN_ELEMENT_CLASS = Pattern.compile(
            "\\s*([a-z0-9_]*)\\.([a-z0-9_]*)\\s*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);

    private static final String TAG_STYLE = "style";

    private static boolean isIgnoreListChild(Element child) {
        String childName = child.getName().toLowerCase();
        return !childName.equals("li");
    }

    private static Function<Element, Boolean> isIgnoreListChildF() {
        return child -> isIgnoreListChild(child);
    }

    private static boolean isIgnoreTableBodyChild(Element child) {
        String childName = child.getName().toLowerCase();
        return !childName.equals("th") && !childName.equals("tr");
    }

    private static Function<Element, Boolean> isIgnoreTableBodyChildF() {
        return child -> isIgnoreTableBodyChild(child);
    }

    private String cssFixed;

    private StringBuilder cssNonFixed;

    private ByteArrayOutputStream currentResult;

    private XmlWriter currentXmlWriter;

    private final String encoding;

    private final DataSize maxSizePart;

    private final DataSize maxSizeSingle;

    private final Map<String, String> selectorToStyle = new LinkedHashMap<>();

    private final Element sourceElementBody;

    private final Option<Element> sourceElementHead;

    private final Element sourceElementHtml;

    private final Set<String> usedStyles = new LinkedHashSet<>();

    private final XmlWriterFactory xmlWriterFactory;

    // XXX apply it to every output xml part. At the moment, for performance reasons,
    // it is only applied in #processAnchors() to modified xml parts (to avoid spaces).
    private final OutputFormat xmlPreferredOutputFormat;

    private final boolean processAnchors;

    HtmlSplitterWorker(Document sourceHtml,
            XmlWriterFactory xmlWriterFactory, boolean xmlPreferDefaultFormat, String encoding,
            DataSize maxSizePart, DataSize maxSizeSingle, boolean processAnchors)
    {
        this.xmlWriterFactory = xmlWriterFactory;
        this.xmlPreferredOutputFormat = xmlPreferDefaultFormat ?
                Dom4jUtils.createDefaultFormat() : Dom4jUtils.createPrettyFormat();
        this.encoding = encoding;
        this.maxSizePart = maxSizePart;
        this.maxSizeSingle = maxSizeSingle;
        this.processAnchors = processAnchors;

        this.sourceElementHtml = sourceHtml.getRootElement();
        this.sourceElementHead = HtmlUtils.findHead(sourceHtml);
        this.sourceElementBody = HtmlUtils.findBody(sourceHtml).getOrThrow(
                () -> new IllegalArgumentException("Unsupported HTML document: no BODY element found"));

        Validate.isTrue(sourceElementBody != null, "Document doesn't have BODY");

        parseStyles();
    }

    private void addUsedStyle(final String selector) {
        final String key = selector.trim().toLowerCase();
        if (selectorToStyle.containsKey(key)) {
            if (!usedStyles.contains(key)) {
                usedStyles.add(key);
                cssNonFixed.append(selectorToStyle.get(key));
            }
        }
    }

    private DataSize getResultSize() {
        Validate.isTrue(currentXmlWriter != null, "XmlWriter is not created yet");
        currentXmlWriter.flush();

        return DataSize.fromBytes(currentResult.size() + cssFixed.length() + cssNonFixed.length());
    }

    void outputAttribute(Element element, Attribute attr) {
        if (StringUtils.isNotEmpty(attr.getNamespaceURI())) {
            currentXmlWriter.addNamespace(attr.getNamespacePrefix(), attr.getNamespaceURI());
            currentXmlWriter.addAttribute(attr.getNamespacePrefix(), attr.getNamespaceURI(),
                    attr.getName(), attr.getValue());
        } else {
            currentXmlWriter.addAttribute(attr.getName(), attr.getValue());
        }

        if (attr.getName().equalsIgnoreCase("class")) {
            // TODO: anything else here?
            String[] classes = StringUtils.split(attr.getValue(), " ,;\t\r\n");
            for (String className : classes) {
                addUsedStyle("." + className);
                addUsedStyle(element.getName() + "." + className);
            }
        }
    }

    private int outputChildren(int nextSubchildIndex, List<?> subchildNodes,
            Function<Element, Boolean> isIgnoreChildFunction)
    {
        int newNextSubchildIndex = nextSubchildIndex;
        for (int subchildIndex = nextSubchildIndex; subchildIndex < subchildNodes.size(); subchildIndex++)
        {
            newNextSubchildIndex = subchildIndex + 1;

            Node subchildNode = (Node) subchildNodes.get(subchildIndex);
            if (subchildNode instanceof Element) {
                Element subchildElement = (Element) subchildNode;

                if (isIgnoreChildFunction.apply(subchildElement))
                    continue;

                outputElementR(subchildElement);

                if (getResultSize().gt(maxSizePart)) {
                    break;
                }
            } else {
                outputIfTextOrComment(subchildNode);
            }
        }

        return newNextSubchildIndex;
    }

    void outputElementAttributes(Element element) {
        for (Object attributeNode : element.attributes()) {
            if (!(attributeNode instanceof Attribute))
                continue;

            Attribute attr = (Attribute) attributeNode;
            outputAttribute(element, attr);
        }
    }

    private void outputElementIfPresent(Element element, String childElementName) {
        Option<Element> childElement = HtmlUtils.findChildElement(element, childElementName);
        if (childElement.isPresent())
            outputElementR(childElement.get());
    }

    private void outputElementR(Element element) {
        if (!element.hasContent()) {
            if (StringUtils.isNotEmpty(element.getNamespaceURI())) {
                currentXmlWriter.emptyElement(element.getNamespacePrefix(), element.getName(),
                        element.getNamespaceURI());
            } else {
                currentXmlWriter.emptyElement(element.getName());
            }

            outputElementAttributes(element);
            return;
        }

        outputElementUnconditionally(element);

        for (Object objChild : element.content()) {
            if (!(objChild instanceof Node))
                continue;

            Node child = (Node) objChild;

            if (child instanceof Element) {
                outputElementR((Element) child);
            }
            outputIfTextOrComment(child);

        }

        currentXmlWriter.endElement();
    }

    private void outputElementUnconditionally(Element element) {
        if (StringUtils.isNotEmpty(element.getNamespaceURI())) {
            currentXmlWriter.startElement(element.getNamespacePrefix(), element.getName(),
                    element.getNamespaceURI());
        } else {
            currentXmlWriter.startElement(element.getName());
        }

        outputElementAttributes(element);
    }

    void outputHead() {
        if (sourceElementHead.isPresent())
            outputElementUnconditionally(sourceElementHead.get());
        else
            currentXmlWriter.startElement("HEAD");

        currentXmlWriter.startElement(TAG_STYLE);
        currentXmlWriter.addAttribute(ATTR_ID, ID_TEMPCSS);
        currentXmlWriter.endElement();

        if (sourceElementHead.isPresent()) {
            for (Object objChild : sourceElementHead.get().content()) {
                if (!(objChild instanceof Node))
                    continue;

                Node child = (Node) objChild;

                if (child instanceof Element) {
                    if (child.getName().equals(TAG_STYLE))
                        continue;

                    outputElementR((Element) child);
                }

                outputIfTextOrComment(child);
            }
        }

        currentXmlWriter.endElement();
    }

    private void outputIfTextOrComment(Node node) {
        final String stringValue = node.getStringValue();
        if (StringUtils.isEmpty(stringValue))
            return;

        if (node instanceof Comment) {
            currentXmlWriter.addComment(stringValue);
        }
        if (node instanceof Text) {
            if (node instanceof CDATA) {
                currentXmlWriter.addCData(stringValue);
            } else {
                currentXmlWriter.addCharacters(stringValue);
            }
        }
    }

    private void parseStyles() {
        if (!sourceElementHead.isPresent()) {
            this.cssFixed = "";
            return;
        }

        StringBuilder unparsed = new StringBuilder();

        for (Element style : HtmlUtils.findChildElements(sourceElementHead.get(), TAG_STYLE)) {
            String text = style.getText();

            int current = 0;
            while (current < text.length()) {
                int nextStart = text.indexOf("{", current);
                int nextEnd = text.indexOf("}", current);
                if (nextStart == -1 || nextEnd == -1 || nextEnd <= nextStart) {
                    break;
                }
                String name = text.substring(current, nextStart);
                String content = text.substring(current, nextEnd + 1);
                current = nextEnd + 1;

                final String key = name.trim().toLowerCase();

                Matcher matcher1 = PATTERN_CLASS_ONLY.matcher(name);
                Matcher matcher2 = PATTERN_ELEMENT_CLASS.matcher(name);

                if (matcher1.matches() || matcher2.matches()) {
                    if (selectorToStyle.containsKey(key)) {
                        selectorToStyle.put(key, selectorToStyle.get(key) + content);
                    } else {
                        selectorToStyle.put(key, content);
                    }
                    continue;
                }

                unparsed.append(content);
            }

            unparsed.append(text.substring(current));
        }

        this.cssFixed = unparsed.toString();
    }

    List<byte[]> splitHtml() {
        Option<byte[]> single = trySingle();
        if (single.isPresent())
            return single;

        List<byte[]> resultList = new ArrayList<>();
        List<Object> bodyContent = sourceElementBody.content();
        int nextSubchildIndex = 0;
        for (int i = 0; i < bodyContent.size(); i++) {

            this.currentResult = new ByteArrayOutputStream(DataSizeUtils.toBytesInt(maxSizePart) << 1);
            this.currentXmlWriter = xmlWriterFactory.createXmlWriter(this.currentResult);
            this.cssNonFixed = new StringBuilder();
            this.usedStyles.clear();

            currentXmlWriter.startDocument(encoding, "1.0");
            outputElementUnconditionally(sourceElementHtml);
            if (sourceElementHead != null) {
                outputHead();
            }
            outputElementUnconditionally(sourceElementBody);

            for (; i < bodyContent.size(); i++) {
                Node child = (Node) bodyContent.get(i);
                if (child instanceof Element) {

                    Element childElement = (Element) child;
                    String localName = childElement.getName().toLowerCase();

                    switch (localName) {
                        case "table": {
                            outputElementUnconditionally(childElement);

                            outputElementIfPresent(childElement, "caption");
                            outputElementIfPresent(childElement, "colgroup");
                            outputElementIfPresent(childElement, "thead");

                            final List<Object> subchildNodes;
                            final Option<Element> tBodyO = HtmlUtils.findChildElement(childElement,
                                    "tbody");
                            if (tBodyO.isPresent()) {
                                subchildNodes = tBodyO.get().content();
                                outputElementUnconditionally(tBodyO.get());
                            } else {
                                currentXmlWriter.startElement("tbody");
                                subchildNodes = childElement.content();
                            }

                            nextSubchildIndex = outputChildren(nextSubchildIndex, subchildNodes,
                                    isIgnoreTableBodyChildF());

                            if (nextSubchildIndex >= subchildNodes.size()) {
                                nextSubchildIndex = 0;
                            } else {
                                i--;
                            }

                            currentXmlWriter.endElement();
                            outputElementIfPresent(childElement, "tfoot");
                            currentXmlWriter.endElement();
                            break;
                        }
                        case "ul": {
                            outputElementUnconditionally(childElement);

                            final List<?> subchildNodes = childElement.content();
                            nextSubchildIndex = outputChildren(nextSubchildIndex, subchildNodes,
                                    isIgnoreListChildF());

                            if (nextSubchildIndex >= subchildNodes.size()) {
                                nextSubchildIndex = 0;
                            } else {
                                i--;
                            }

                            currentXmlWriter.endElement();
                            break;
                        }
                        default:
                            nextSubchildIndex = 0;
                            outputElementR((Element) child);
                            break;
                    }
                } else {
                    outputIfTextOrComment(child);
                }

                if (getResultSize().gt(maxSizePart)) {
                    break;
                }
            }

            currentXmlWriter.endElement();
            currentXmlWriter.endDocument();
            currentXmlWriter.close();
            this.currentXmlWriter = null;

            resultList.add(toByteArray());
        }

        if (processAnchors) {
            resultList = processAnchors(resultList, xmlPreferredOutputFormat);
        }

        return resultList;
    }

    static List<byte[]> processAnchors(List<byte[]> resultList, OutputFormat xmlPreferredOutputFormat) {
        final MapF<String, Integer> idToPageMap = Cf.hashMap();
        final ListF<Document> parts = Cf.arrayList();

        for (int i = 0; i < resultList.size(); i++) {
            Document doc = Dom4jUtils.read(resultList.get(i));
            parts.add(doc);
            List<Attribute> idAttrs = Cf.toList(doc.selectNodes("//*/@id"))
                    .plus(doc.selectNodes("//a/@name"));
            for (Attribute id : idAttrs) {
                idToPageMap.put(id.getValue(), i + 1);
            }
        }

        final SetF<Integer> modifiedPats = Cf.hashSet();
        for (int i = 0; i < resultList.size(); i++) {
            final int currentPartNum = i;
            new AbstractDom4jVisitor() {
                protected void visit(Element element) {
                    if (StringUtils.equalsIgnoreCase("a", element.getName())) {
                        String url = element.attributeValue("href");
                        if (url != null && url.startsWith("#")) {
                            String id = StringUtils.substringAfter(url, "#");
                            if (idToPageMap.containsKeyTs(id)) {
                                HtmlPostprocessor.addPageNumberRef(element, idToPageMap.getTs(id));
                                modifiedPats.add(currentPartNum);
                            }
                        }
                    }

                }
            }.visit(parts.get(i));
        }

        ListF<byte[]> result = Cf.arrayList();
        for (int i = 0; i < parts.size(); i++) {
            if (modifiedPats.containsTs(i)) {
                result.add(Dom4jUtils.write(parts.get(i), xmlPreferredOutputFormat));
            } else {
                result.add(resultList.get(i));
            }
        }

        return result;
    }

    byte[] toByteArray() {
        /* More safe, slower */
        // Document doc = Dom4jUtils.read(currentResult.toByteArray());
        // Element tempCss = doc.elementByID(ID_TEMPCSS);
        // if (tempCss == null)
        // throw new
        // IllegalStateException("Temporary CSS element not found in result");
        // tempCss.addText(cssNonFixed + cssFixed);
        // tempCss.addAttribute(ATTR_ID, null);
        // return Dom4jUtils.write(doc);

        /* Less safe, faster */
        String result = new String(currentResult.toByteArray());

        for (String s : Cf.list("<style ID=\"" + ID_TEMPCSS + "\"></style>", "<style ID=\"" + ID_TEMPCSS + "\"/>")) {
            result = StringUtils.replace(result, s, "<style>" + StringEscapeUtils.escapeXml(cssNonFixed + cssFixed) + "</style>");
        }

        return result.getBytes();
    }

    private Option<byte[]> trySingle() {
        this.currentResult = new ByteArrayOutputStream(DataSizeUtils.toBytesInt(maxSizeSingle) << 1);
        this.currentXmlWriter = xmlWriterFactory.createXmlWriter(this.currentResult);
        this.cssNonFixed = new StringBuilder();
        this.usedStyles.clear();

        currentXmlWriter.startDocument(encoding, "1.0");

        outputElementUnconditionally(sourceElementHtml);

        if (sourceElementHead != null) {
            outputHead();

            if (getResultSize().gt(maxSizePart)) {
                throw new IllegalArgumentException("Document HEAD (" + getResultSize()
                        + ") is bigger than required single part length (" + maxSizePart + ")");
            }
        }

        outputElementUnconditionally(sourceElementBody);

        List<?> childNodes = sourceElementBody.content();
        for (int i = 0; i < childNodes.size(); i++) {
            Object child = childNodes.get(i);
            if (child instanceof Element) {
                outputElementR((Element) child);
            }

            if (getResultSize().gt(maxSizeSingle)) {
                return Option.empty();
            }
        }

        currentXmlWriter.endElement();
        currentXmlWriter.endDocument();
        currentXmlWriter.close();
        this.currentXmlWriter = null;

        return Option.of(toByteArray());
    }

}
