/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package ru.yandex.chemodan.app.docviewer.utils.pdf.text;

import java.io.IOException;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
import org.apache.pdfbox.util.QuickSort;
import org.apache.pdfbox.util.TextNormalize;
import org.apache.pdfbox.util.TextPosition;
import org.apache.pdfbox.util.TextPositionComparator;

import ru.yandex.chemodan.app.docviewer.convert.pdf.TextPositionWrapper;

/**
 * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
 * @author akirakozov
 * @version $Revision: 1.70 $
 */
abstract class PdfTextStripperImpl extends org.apache.pdfbox.util.PDFTextStripper {

    private static Field fEndX;
    private static Field fEndY;
    private static Field fRot;

    private static final float RESET_VALUE_ENDOFLASTTEXTX = -1;
    private static final float RESET_VALUE_EXPECTEDSTARTOFNEXTWORDX = -Float.MAX_VALUE;
    private static final float RESET_VALUE_LASTWORDSPACING = -1;
    private static final float RESET_VALUE_MAXHEIGHTFORLINE = -1;
    private static final float RESET_VALUE_MAXYFORLINE = -Float.MAX_VALUE;
    private static final float RESET_VALUE_MINYTOPFORLINE = Float.MAX_VALUE;

    private static final TextPositionComparator comparator = new TextPositionComparator();

    static {
        try {
            fRot = TextPosition.class.getDeclaredField("rot");
            fRot.setAccessible(true);

            fEndX = TextPosition.class.getDeclaredField("endX");
            fEndX.setAccessible(true);

            fEndY = TextPosition.class.getDeclaredField("endY");
            fEndY.setAccessible(true);
        } catch (Exception exc) {
            throw new ExceptionInInitializerError(exc);
        }
    }

    private static boolean overlap(float y1, float height1, float y2, float height2) {
        return within(y1, y2, .1f) || (y2 <= y1 && y2 >= y1 - height1)
                || (y1 <= y2 && y1 >= y2 - height2);
    }

    private static boolean within(float first, float second, float variance) {
        return second < first + variance && second > first - variance;
    }

    private static boolean isWordSeparator(TextPosition text) {
        return text instanceof WordSeparator || text.getCharacter().length() == 1 && Character.isWhitespace(text.getCharacter().charAt(0));
    }

    private TextNormalize normalize;

    public PdfTextStripperImpl() throws IOException {
        super();

        normalize = new TextNormalize(this.outputEncoding);
    }

    private void onLine(List<TextPosition> line, boolean isRtlDominant, boolean hasRtl) {
        onNewLine();

        StringBuilder lineBuilder = new StringBuilder();
        float minY = Float.MAX_VALUE, maxY = Float.MIN_VALUE, minX = Float.MAX_VALUE, maxX = Float.MIN_VALUE;

        for (TextPosition text : line) {
            if (isWordSeparator(text)) {
                addWord(lineBuilder, isRtlDominant, hasRtl, minY, maxY, minX, maxX);

                lineBuilder = new StringBuilder();
                minY = Float.MAX_VALUE;
                maxY = Float.MIN_VALUE;
                minX = Float.MAX_VALUE;
                maxX = Float.MIN_VALUE;

            } else {

                lineBuilder.append(text.getCharacter());
                minX = Math.min(minX, text.getX());
                minY = Math.min(minY, text.getY() - text.getHeight());
                maxX = Math.max(maxX, text.getX() + text.getWidth());
                maxY = Math.max(maxY, text.getY());
            }
        }

        addWord(lineBuilder, isRtlDominant, hasRtl, minY, maxY, minX, maxX);
    }

    private void addWord(StringBuilder lineBuilder, boolean isRtlDominant,
            boolean hasRtl, float minY, float maxY, float minX, float maxX)
    {
        if (lineBuilder.length() > 0) {
            String lineStr = lineBuilder.toString();
            if (hasRtl) {
                lineStr = normalize.makeLineLogicalOrder(lineStr, isRtlDominant);
            }
            lineStr = normalize.normalizePres(lineStr);
            onWord(lineStr, minX, minY, maxX, maxY);
        }
    }

    protected abstract void onWord(String text, float minX, float minY, float maxX, float maxY);
    protected abstract void onNewLine();

    @Override
    protected void processTextPosition(TextPosition textPosition) {
        super.processTextPosition(new TextPositionWrapper(textPosition) {
            /**
             * XXX: Max height of 'Cambria' font hack
             * PdfBox calculate average font height using font bounding box @see PDSimpleFont.getFontHeight,
             * but height of bounding box for 'Cambria' font is too huge.
             * We should use capHeight for this case (DOCVIEWER-1279).
             *
             * @param text position
             * @return fixe position height
             */
            @Override
            public float getHeight() {
                PDFontDescriptor desc = getFont().getFontDescriptor();
                if (desc != null
                        && desc.getFontName() != null
                        && desc.getFontName().contains("Cambria")
                        && desc.getFontBoundingBox() != null)
                {
                    float boundingBoxBasedHeight = desc.getFontBoundingBox().getHeight() / 2;
                    return super.getHeight() * desc.getCapHeight() / boundingBoxBasedHeight;
                } else {
                    return super.getHeight();
                }
            }

            @Override
            public float getHeightDir() {
                return getHeight();
            }
        });
    }

    @SuppressWarnings("unchecked")
    protected void writePage() throws IOException {
        float maxYForLine = RESET_VALUE_MAXYFORLINE;
        float minYTopForLine = RESET_VALUE_MINYTOPFORLINE;
        float endOfLastTextX = RESET_VALUE_ENDOFLASTTEXTX;
        float lastWordSpacing = RESET_VALUE_LASTWORDSPACING;
        float maxHeightForLine = RESET_VALUE_MAXHEIGHTFORLINE;
        PositionWrapperImpl lastPosition = null;
        PositionWrapperImpl lastLineStartPosition = null;

        boolean startOfPage = true;// flag to indicate start of page
        boolean startOfArticle = true;
        if (charactersByArticle.size() > 0) {
            writePageStart();
        }

        for (int i = 0; i < charactersByArticle.size(); i++) {
            List<TextPosition> textList = charactersByArticle.get(i);
            if (getSortByPosition()) {
                QuickSort.sort(textList, comparator);
            }

            Iterator<TextPosition> textIter = textList.iterator();

            int ltrCnt = 0;
            int rtlCnt = 0;

            while (textIter.hasNext()) {
                TextPosition position = textIter.next();
                String stringValue = position.getCharacter();
                for (int a = 0; a < stringValue.length(); a++) {
                    byte dir = Character.getDirectionality(stringValue.charAt(a));
                    if ((dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT)
                            || (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING)
                            || (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE))
                    {
                        ltrCnt++;
                    } else if ((dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
                            || (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
                            || (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING)
                            || (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE))
                    {
                        rtlCnt++;
                    }
                }
            }

            boolean isRtlDominant = rtlCnt > ltrCnt;

            startArticle(!isRtlDominant);
            startOfArticle = true;
            boolean hasRtl = rtlCnt > 0;

            List<TextPosition> line = new ArrayList<>();

            textIter = textList.iterator();
            float previousAveCharWidth = -1;
            while (textIter.hasNext()) {
                TextPosition position = textIter.next();
                PositionWrapperImpl current = new PositionWrapperImpl(position);
                String characterValue = position.getCharacter();

                if (lastPosition != null
                        && ((position.getFont() != lastPosition.getTextPosition().getFont()) || (position
                                .getFontSize() != lastPosition.getTextPosition().getFontSize())))
                {
                    previousAveCharWidth = -1;
                }

                float positionX;
                float positionY;
                float positionWidth;
                float positionHeight;

                if (getSortByPosition()) {
                    positionX = position.getXDirAdj();
                    positionY = position.getYDirAdj();
                    positionWidth = position.getWidthDirAdj();
                    positionHeight = position.getHeightDir();
                } else {
                    positionX = position.getX();
                    positionY = position.getY();
                    positionWidth = position.getWidth();
                    positionHeight = position.getHeight();
                }

                int wordCharCount = position.getIndividualWidths().length;

                float wordSpacing = position.getWidthOfSpace();
                float deltaSpace;
                if ((wordSpacing == 0) || Float.isNaN(wordSpacing)) {
                    deltaSpace = Float.MAX_VALUE;
                } else {
                    if (lastWordSpacing < 0) {
                        deltaSpace = (wordSpacing * getSpacingTolerance());
                    } else {
                        deltaSpace = (((wordSpacing + lastWordSpacing) / 2f) * getSpacingTolerance());
                    }
                }

                float averageCharWidth = -1;
                if (previousAveCharWidth < 0) {
                    averageCharWidth = (positionWidth / wordCharCount);
                } else {
                    averageCharWidth = (previousAveCharWidth + (positionWidth / wordCharCount)) / 2f;
                }
                float deltaCharWidth = (averageCharWidth * getAverageCharTolerance());

                float expectedStartOfNextWordX = RESET_VALUE_EXPECTEDSTARTOFNEXTWORDX;
                if (endOfLastTextX != RESET_VALUE_ENDOFLASTTEXTX) {
                    if (deltaCharWidth > deltaSpace) {
                        expectedStartOfNextWordX = endOfLastTextX + deltaSpace;
                    } else {
                        expectedStartOfNextWordX = endOfLastTextX + deltaCharWidth;
                    }
                }

                if (lastPosition != null) {
                    if (startOfArticle) {
                        lastPosition.setArticleStart();
                        startOfArticle = false;
                    }

                    if (!overlap(positionY, positionHeight, maxYForLine, maxHeightForLine)
                        || (positionX - endOfLastTextX > deltaSpace * 5))
                    {
                        onLine(line, isRtlDominant, hasRtl);
                        line.clear();

                        lastLineStartPosition = (PositionWrapperImpl) handleLineSeparation(current,
                                lastPosition, lastLineStartPosition, maxHeightForLine);

                        expectedStartOfNextWordX = RESET_VALUE_EXPECTEDSTARTOFNEXTWORDX;
                        maxYForLine = RESET_VALUE_MAXYFORLINE;
                        maxHeightForLine = RESET_VALUE_MAXHEIGHTFORLINE;
                        minYTopForLine = RESET_VALUE_MINYTOPFORLINE;
                    }

                    if (expectedStartOfNextWordX != RESET_VALUE_EXPECTEDSTARTOFNEXTWORDX
                            && expectedStartOfNextWordX < positionX
                            && lastPosition.getTextPosition().getCharacter() != null
                            && !lastPosition.getTextPosition().getCharacter().endsWith(" "))
                    {
                        line.add(WordSeparator.getSeparator());
                    }
                }

                if (positionY >= maxYForLine) {
                    maxYForLine = positionY;
                }

                endOfLastTextX = positionX + positionWidth;

                if (characterValue != null) {
                    if (startOfPage && lastPosition == null) {
                        writeParagraphStart();
                    }
                    line.add(position);
                }
                maxHeightForLine = Math.max(maxHeightForLine, positionHeight);
                minYTopForLine = Math.min(minYTopForLine, positionY - positionHeight);
                lastPosition = current;
                if (startOfPage) {
                    lastPosition.setParagraphStart();
                    lastPosition.setLineStart();
                    lastLineStartPosition = lastPosition;
                    startOfPage = false;
                }
                lastWordSpacing = wordSpacing;
                previousAveCharWidth = averageCharWidth;
            }

            if (line.size() > 0) {
                onLine(line, isRtlDominant, hasRtl);
                writeParagraphEnd();
            }
            endArticle();
        }
        writePageEnd();
    }

}
