package ru.yandex.chemodan.app.docviewer.utils.pdf.text;

import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.util.List;

import org.apache.commons.io.output.NullWriter;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSFloat;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ru.yandex.bolts.collection.Cf;
import ru.yandex.bolts.collection.ListF;
import ru.yandex.bolts.collection.Option;
import ru.yandex.bolts.function.Function;
import ru.yandex.chemodan.app.docviewer.utils.pdf.text.Document.LinePosition;
import ru.yandex.chemodan.app.docviewer.utils.pdf.text.Document.Page;
import ru.yandex.chemodan.app.docviewer.utils.pdf.text.Document.WordPosition;
import ru.yandex.misc.ExceptionUtils;
import ru.yandex.misc.io.IoUtils;
import ru.yandex.misc.xml.XmlUtils;

/**
 * @author vlsergey
 * @author akirakozov
 */
public class PdfPageWordsExtractor extends PdfTextStripperImpl {

    private static final Logger logger = LoggerFactory.getLogger(PdfPageWordsExtractor.class);

    private final PDDocument pdDocument;
    private final ListF<Page> pages = Cf.arrayList();

    private final ListF<WordPosition> curWords = Cf.arrayList();
    private final ListF<LinePosition> curLines = Cf.arrayList();
    private final Rectangle curLineRegion = new Rectangle();

    protected PdfPageWordsExtractor(PDDocument pdDocument) throws IOException {
        this.pdDocument = pdDocument;
        setForceParsing(true);
        setSortByPosition(true);
        setShouldSeparateByBeads(true);
    }

    private static class Rectangle {
        public int minX;
        public int minY;
        public int maxX;
        public int maxY;

        public Rectangle() {
            reset();
        }

        public void reset() {
            this.minX = Integer.MAX_VALUE;
            this.minY = Integer.MAX_VALUE;
            this.maxX = Integer.MIN_VALUE;
            this.maxY = Integer.MIN_VALUE;
        }
    }

    private static Rectangle2D.Float toRectangle(PDRectangle r) {
        return new Rectangle2D.Float(r.getLowerLeftX(), r.getLowerLeftY(), r.getWidth(), r.getHeight());
    }

    protected Document getWords(int fromPage, int toPage) {
        try {
            List<PDPage> allPages = pdDocument.getDocumentCatalog().getAllPages();
            if (allPages.size() > 0) {
                for (int p = fromPage; p <= toPage; p++) {
                    curWords.clear();
                    curLines.clear();

                    setStartPage(p + 1);
                    setEndPage(p + 1);

                    PDPage pdPage = allPages.get(p);
                    fixCropBoxIfNeeded(pdPage);

                    PDRectangle r = pdPage.findCropBox();
                    int rotation = pdPage.findRotation();
                    float width, height;

                    if (rotation != 90 && rotation != 270) {
                        width = r.getWidth();
                        height = r.getHeight();
                    } else {
                        height = r.getWidth();
                        width = r.getHeight();
                    }

                    writeText(pdDocument, new NullWriter());
                    addLine();

                    pages.add(new Page(
                            p + 1,
                            Math.round(r.getLowerLeftX()), Math.round(r.getLowerLeftY()),
                            Math.round(width), Math.round(height),
                            curLines));
                }
            }

            return new Document(pages);
        } catch (Exception exc) {
            throw ExceptionUtils.translate(exc);
        }
    }

    private void fixCropBoxIfNeeded(PDPage pdPage) {
        // In some PDF cropBox is greater than mediaBox (DOCVIEWER-1277).
        // PDF reference  section 14.11.2.1: “The crop, bleed, trim, and art boxes shall not
        // ordinarily extend beyond the boundaries of the media box. If they do, they are effectively
        // reduced to their intersection with the media box.”
        //
        // Pdf box doesn't reduce cropBox to mediaBox, this hack fix it.

        Rectangle2D.Float cropBox = toRectangle(pdPage.findCropBox());
        Rectangle2D.Float mediaBox = toRectangle(pdPage.findMediaBox());

        if (!mediaBox.contains(cropBox)) {
            Rectangle2D.Float r = (Rectangle2D.Float) cropBox.createIntersection(mediaBox);
            COSArray coordinates = new COSArray();
            coordinates.add(new COSFloat(r.x));
            coordinates.add(new COSFloat(r.y));
            coordinates.add(new COSFloat(r.x + r.width));
            coordinates.add(new COSFloat(r.y + r.height));
            pdPage.setCropBox(new PDRectangle(coordinates));
        }
    }

    private void addLine() {
        if (curWords.isNotEmpty()) {
            int targetLeft = curLineRegion.minX;
            int targetWidth = (curLineRegion.maxX - curLineRegion.minX);
            int targetTop = curLineRegion.minY;
            int targetHeight = (curLineRegion.maxY - curLineRegion.minY);

            curLines.add(new LinePosition(curWords, targetLeft, targetTop, targetWidth, targetHeight));
        }
        curWords.clear();
        curLineRegion.reset();
    }

    @Override
    protected void onWord(String text, float minX, float minY, float maxX, float maxY) {
        // Pdf box uses average font height to calculate y coordinates of glyphs,
        // for highlighting text we should extend text bounding box otherwise
        // not all glyph is highlighted (DOCVIEWER-1268)
        float height = maxY - minY;
        float highlightMinY = minY - height * 0.6f;
        float highlightMaxY = maxY + height * 0.3f;
        int targetTop = Math.round(highlightMinY);
        int targetHeight = Math.round(highlightMaxY - highlightMinY);
        int targetLeft = Math.round(minX);
        int targetWidth = Math.round(maxX - minX);

        String cleaned = XmlUtils.invalidCharsToSpaces(text).toString();

        curWords.add(new WordPosition(cleaned, targetLeft, targetTop, targetWidth, targetHeight));

        curLineRegion.maxX = Math.max(curLineRegion.maxX, Math.round(maxX));
        curLineRegion.minX = Math.min(curLineRegion.minX, Math.round(minX));
        curLineRegion.maxY = Math.max(curLineRegion.maxY, Math.round(highlightMaxY));
        curLineRegion.minY = Math.min(curLineRegion.minY, Math.round(highlightMinY));
    }

    @Override
    protected void onNewLine() {
        addLine();
    }

    public static Document getDocumentWithExtractedWords(PDDocument pdDocument) {
        return getDocumentWithExtractedWords(pdDocument, Option.empty(), Option.empty());
    }

    public static Document getDocumentWithExtractedWords(PDDocument pdDocument, int fromPage, int toPage) {
        return getDocumentWithExtractedWords(pdDocument, Option.of(fromPage), Option.of(toPage));
    }

    public static Document getDocumentWithExtractedWords(PDDocument pdDocument,
            Option<Integer> fromPage, Option<Integer> toPage)
    {
        try {
            return new PdfPageWordsExtractor(pdDocument).getWords(
                    fromPage.getOrElse(0), toPage.getOrElse(pdDocument.getNumberOfPages() - 1));
        } catch (IOException e) {
            throw IoUtils.translate(e);
        }
    }

    public static Function<PDDocument, Document> getDocumentWithExtractedWordsF() {
        return PdfPageWordsExtractor::getDocumentWithExtractedWords;
    }

}
