package ru.yandex.chemodan.app.docviewer.utils.pdf.text;

import java.awt.Color;
import java.awt.Graphics;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.net.URL;
import java.util.List;

import javax.imageio.ImageIO;

import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;

import ru.yandex.chemodan.app.docviewer.DocviewerSpringTestBase;
import ru.yandex.chemodan.app.docviewer.TestResources;
import ru.yandex.chemodan.app.docviewer.adapters.poppler.ResizeOption;
import ru.yandex.chemodan.app.docviewer.utils.DimensionO;
import ru.yandex.chemodan.app.docviewer.utils.FileUtils;
import ru.yandex.chemodan.app.docviewer.utils.pdf.PdfUtils;
import ru.yandex.chemodan.app.docviewer.utils.pdf.image.PdfRenderer;
import ru.yandex.chemodan.app.docviewer.utils.pdf.text.Document.LinePosition;
import ru.yandex.chemodan.app.docviewer.utils.pdf.text.Document.Page;
import ru.yandex.chemodan.app.docviewer.utils.pdf.text.Document.WordPosition;
import ru.yandex.misc.io.IoUtils;
import ru.yandex.misc.io.file.FileOutputStreamSource;
import ru.yandex.misc.io.url.UrlInputStreamSource;
import ru.yandex.misc.test.Assert;

public class PdfPageWordsExtractorTest extends DocviewerSpringTestBase {

    private static final Logger logger = LoggerFactory.getLogger(PdfPageWordsExtractorTest.class);

    private static final float EPS = 0.001f;

    @Autowired
    @Qualifier("pdfRenderer")
    private PdfRenderer pdfRenderer;

    private void extractTextAndCheckWordPosition(URL documentUrl, final WordPosition expected) {
        final UrlInputStreamSource pdfInputStreamSource = new UrlInputStreamSource(documentUrl);

        PdfUtils.withExistingDocument(pdfInputStreamSource, true, d -> {
            boolean found = false;
            Page p = PdfPageWordsExtractor.getDocumentWithExtractedWords(d).getPage(0);
            for (WordPosition w : p.getAllWordPositions()) {
                if (expected.getWord().equals(w.getWord())) {
                    Assert.equals(expected.getLeft(), w.getLeft());
                    Assert.equals(expected.getTop(), w.getTop());
                    Assert.equals(expected.getWidth(), w.getWidth());
                    Assert.equals(expected.getHeight(), w.getHeight());

                    found = true;
                    break;
                }
            }

            Assert.isTrue(found, "Word '" + expected.getWord() + "' not found in text");
        });
    }

    private void test(final URL url, final int page) {

        final UrlInputStreamSource pdfInputStreamSource = new UrlInputStreamSource(url);
        PdfUtils.withExistingDocument(pdfInputStreamSource, true, document -> {
           final Page p = PdfPageWordsExtractor.getDocumentWithExtractedWords(document, page - 1, page -1).getPage(0);
           final List<WordPosition> pageWordPositions = p.getAllWordPositions();

           FileUtils.withEmptyTemporaryFile("temp", ".png", temporaryPngFile -> {
               try {
                   pdfRenderer.render(pdfInputStreamSource, page, ResizeOption.scale(DimensionO.cons(p.getWidth())),
                           new FileOutputStreamSource(temporaryPngFile));

                   BufferedImage bufferedImage = ImageIO.read(temporaryPngFile.getFile());
                   Graphics graphics = bufferedImage.getGraphics();
                   graphics.setColor(Color.BLUE);

                   final int pageWidth = bufferedImage.getWidth();
                   final int pageHeight = bufferedImage.getHeight();

                   for (WordPosition position : pageWordPositions) {

                       Assert.assertTrue("Text outside the page (" + position.getLeft() + " > "
                               + pageWidth + ")", position.getLeft() <= pageWidth);
                       Assert.assertTrue("Text outside the page (" + position.getTop() + " > "
                               + pageHeight + ")", position.getTop() <= pageHeight);

                       graphics.drawRect(position.getLeft(), position.getTop(), position.getWidth(), position.getHeight());
                   }

                   ImageIO.write(bufferedImage, "png", temporaryPngFile.getFile());
               } catch (IOException exc) {
                   throw IoUtils.translate(exc);
               }

               logger.info("PNG file with text borders saved in "
                       + temporaryPngFile.getAbsolutePath());

           });

        });
    }

    @Test
    public void testFormulas() {
        test(TestResources.Adobe_Acrobat_1_5_114p, 34);
    }

    @Test
    public void testJapaneese() {
        test(TestResources.Adobe_Acrobat_1_4_004p, 2);
    }

    @Test
    public void extractPageRange() {
        final UrlInputStreamSource pdfInputStreamSource =
                new UrlInputStreamSource(TestResources.Adobe_Acrobat_1_5_114p);

        PdfUtils.withExistingDocument(pdfInputStreamSource, true, d -> {
            Document doc = PdfPageWordsExtractor.getDocumentWithExtractedWords(d, 10, 14);
            Assert.hasSize(5, doc.getPages());
        });
    }

    @Test
    public void extractText() {
        // PDF file with different cropBox and MediaBox,
        // and cropBox (left, top) moved from (0, 0)
        extractTextAndCheckWordPosition(
                TestResources.Adobe_Acrobat_Docviewer_1277,
                new WordPosition("Best", 38, 98, 84, 56));
    }

    @Test
    public void extractText2() {
        // PDF file with cropBox larger than mediaBox
        extractTextAndCheckWordPosition(
                TestResources.Adobe_Acrobat_Docviewer_1277_2,
                new WordPosition("Using", 299, 144, 58, 36));
    }

    @Test
    public void extractWords() {
        final UrlInputStreamSource pdfInputStreamSource = new UrlInputStreamSource(
                TestResources.class.getResource("test/pdf/words_extraction.pdf"));

        test(TestResources.class.getResource("test/pdf/words_extraction.pdf"), 1);
        PdfUtils.withExistingDocument(pdfInputStreamSource, true, d -> {
            Document doc = PdfPageWordsExtractor.getDocumentWithExtractedWords(d);

            Assert.hasSize(1, doc.getPages());
            Page page = doc.getPage(0);
            Assert.hasSize(2, page.getLines());
            Assert.hasSize(3, doc.getPage(0).getLine(0).getWords());

            Assert.equals(0, page.getLeft());
            Assert.equals(0, page.getTop());
            Assert.equals(612, page.getWidth());
            Assert.equals(792, page.getHeight());

            WordPosition word = doc.getPage(0).getLine(0).getWords().get(1);
            Assert.equals(82, word.getLeft());
            Assert.equals(55, word.getTop());
            Assert.equals(21, word.getWidth());
            Assert.equals(15, word.getHeight());

            LinePosition line = doc.getPage(0).getLine(1);
            Assert.equals(57, line.getLeft());
            Assert.equals(193, line.getTop());
            Assert.equals(80, line.getWidth());
            Assert.equals(15, line.getHeight(), EPS);
        });
    }

    @Test
    public void extractTextFromMultiColumnDocument() {
        final UrlInputStreamSource pdfInputStreamSource = new UrlInputStreamSource(
                TestResources.class.getResource("test/pdf/2columns.pdf"));

        PdfUtils.withExistingDocument(pdfInputStreamSource, true, d -> {
            Document doc = PdfPageWordsExtractor.getDocumentWithExtractedWords(d);
            Assert.sizeIs(4, doc.getPage(0).getLines());
        });
    }

}
