package ru.yandex.chemodan.app.docviewer.convert.pdf;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.graphics.color.PDColorState;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.pdfbox.util.ResourceLoader;
import org.apache.pdfbox.util.TextNormalize;
import org.apache.pdfbox.util.TextPosition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ru.yandex.misc.lang.ObjectUtils;
import ru.yandex.misc.lang.StringUtils;

abstract class AbstractPdfTextStripper extends PDFTextStripper {
    private static final Logger logger = LoggerFactory.getLogger(AbstractPdfTextStripper.class);

    private static final float ENDOFLASTTEXTX_RESET_VALUE = -1;
    private static final float EXPECTEDSTARTOFNEXTWORDX_RESET_VALUE = -Float.MAX_VALUE;
    private static final float LASTWORDSPACING_RESET_VALUE = -1;
    private static final float MAXHEIGHTFORLINE_RESET_VALUE = -1;
    private static final float MAXYFORLINE_RESET_VALUE = -Float.MAX_VALUE;
    private static final float MINYTOPFORLINE_RESET_VALUE = Float.MAX_VALUE;

    private final TextNormalize normalize = new TextNormalize("utf-8");

    protected AbstractPdfTextStripper() throws IOException {
        super(ResourceLoader.loadProperties(
                "ru/yandex/chemodan/app/docviewer/convert/pdf/PDFTextStripperImpl.properties", true));
    }

    private boolean isNotSameFormat(TextPosition prev, TextPosition text) {
        if (prev == null)
            return false;

        final String prevCharacter = prev.getCharacter();
        final String thisCharacter = text.getCharacter();

        if (!StringUtils.isAlphanumeric(prevCharacter)
                || !StringUtils.isAlphanumeric(thisCharacter))
            return true;
        if (StringUtils.isNumeric(prevCharacter) != StringUtils.isNumeric(thisCharacter))
            return true;
        if (StringUtils.isAlpha(prevCharacter) != StringUtils.isAlpha(thisCharacter))
            return true;
        if (!StringUtils.isUpperCase(prevCharacter) && StringUtils.isUpperCase(thisCharacter))
            return true;

        if (!prev.getClass().equals(text.getClass()))
            return true;

        if (prev instanceof ColoredTextPosition) {
            final ColoredTextPosition prevColored = (ColoredTextPosition) prev;
            final ColoredTextPosition textColored = (ColoredTextPosition) text;

            if (prevColored.getTextRenderingMode() != textColored.getTextRenderingMode()
                || !isSameColorState(prevColored.getNonStrokingColor(), textColored.getNonStrokingColor())
                || !isSameColorState(prevColored.getStrokingColor(), textColored.getStrokingColor()))
            {
                return true;
            }
        }

        return prev.getY() != prev.getY() || prev.getFontSize() != text.getFontSize()
                || !StringUtils.equals(prev.getFont().getBaseFont(), text.getFont().getBaseFont());
    }

    // XXX: this not correct equality check, but it would helpful for us in many cases
    private boolean isSameColorState(PDColorState state1, PDColorState state2) {
        if (!ObjectUtils.equals(state1.getColorSpace(), state2.getColorSpace())) {
            return false;
        }

        if (!ObjectUtils.equals(state1.getPattern(), state2.getPattern())) {
            return false;
        }

        return Arrays.equals(
                state1.getCOSColorSpaceValue().toFloatArray(),
                state2.getCOSColorSpaceValue().toFloatArray());
    }

    private boolean overlap(float y1, float height1, float y2, float height2) {
        return within(y1, y2, .1f) || (y2 <= y1 && y2 >= y1 - height1)
                || (y1 <= y2 && y1 >= y2 - height2);
    }

    @Override
    public void processPages(List<COSObjectable> pages) throws IOException {
        super.processPages(pages);
    }

    @Override
    protected void processTextPosition(TextPosition text) {
        TextPosition wrapper = new ColoredTextPosition(text,
                getGraphicsState().getTextState().getRenderingMode(),
                (PDColorState) getGraphicsState().getStrokingColor().clone(),
                (PDColorState) getGraphicsState().getNonStrokingColor().clone());
        super.processTextPosition(wrapper);
    }

    private boolean within(float first, float second, float variance) {
        return second < first + variance && second > first - variance;
    }

    private void writeLine(List<ColoredTextPosition> line, boolean isRtlDominant, boolean hasRtl) {
        ColoredTextPosition first = null;
        ColoredTextPosition prev = null;
        StringBuilder lineBuilder = new StringBuilder();

        logger.trace("Writing line: " + line.size() + " elements");
        for (ColoredTextPosition text : line) {
            if (text.getClass().getName().endsWith("WordSeparator"))
                logger.trace("\t words separator");
            else
                logger.trace("\t" + text.getX() + "; " + text.getY() + " - " + text.getCharacter());

            if (text.getClass().getName().endsWith("WordSeparator") || isNotSameFormat(prev, text)
                    || (prev != null && prev.getX() > text.getX()))
            {
                if (text.getClass().getName().endsWith("WordSeparator"))
                    lineBuilder.append(" \u00a0");

                String lineStr = lineBuilder.toString();
                if (hasRtl) {
                    lineStr = normalize.makeLineLogicalOrder(lineStr, isRtlDominant);
                }
                lineStr = normalize.normalizePres(lineStr);

                if (first != null) {
                    writeLine(first, first.getX(), first.getY(), lineStr);
                }

                lineBuilder = new StringBuilder();
                first = null;
            }

            if (!text.getClass().getName().endsWith("WordSeparator")) {
                if (first == null) {
                    first = text;
                }
                lineBuilder.append(text.getCharacter());
            }

            prev = text;
        }

        if (lineBuilder.length() > 0) {
            String lineStr = lineBuilder.toString();
            if (hasRtl) {
                lineStr = normalize.makeLineLogicalOrder(lineStr, isRtlDominant);
            }
            lineStr = normalize.normalizePres(lineStr);

            if (first != null) {
                writeLine(first, first.getX(), first.getY(), lineStr);
            }
        }
    }

    protected abstract void writeLine(ColoredTextPosition formatHolder, float x, float y,
            String text);

    @Override
    protected void writePage() throws IOException {
        float maxYForLine = MAXYFORLINE_RESET_VALUE;
        float minYTopForLine = MINYTOPFORLINE_RESET_VALUE;
        float endOfLastTextX = ENDOFLASTTEXTX_RESET_VALUE;
        float lastWordSpacing = LASTWORDSPACING_RESET_VALUE;
        float maxHeightForLine = MAXHEIGHTFORLINE_RESET_VALUE;
        PositionWrapperImpl lastPosition = null;
        PositionWrapperImpl lastLineStartPosition = null;

        boolean startOfPage = true;
        boolean startOfArticle = true;
        if (charactersByArticle.size() > 0) {
            writePageStart();
        }

        for (int i = 0; i < charactersByArticle.size(); i++) {
            List<TextPosition> textList = charactersByArticle.get(i);
            Iterator<TextPosition> textIter = textList.iterator();

            int ltrCnt = 0;
            int rtlCnt = 0;

            while (textIter.hasNext()) {
                TextPosition position = textIter.next();
                String stringValue = position.getCharacter();
                for (int a = 0; a < stringValue.length(); a++) {
                    byte dir = Character.getDirectionality(stringValue.charAt(a));
                    if ((dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT)
                            || (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING)
                            || (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE))
                    {
                        ltrCnt++;
                    } else if ((dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
                            || (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
                            || (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING)
                            || (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE))
                    {
                        rtlCnt++;
                    }
                }
            }

            boolean isRtlDominant = rtlCnt > ltrCnt;

            startArticle(!isRtlDominant);
            startOfArticle = true;
            boolean hasRtl = rtlCnt > 0;

            List<ColoredTextPosition> line = new ArrayList<>();

            textIter = textList.iterator();
            float previousAveCharWidth = -1;
            while (textIter.hasNext()) {
                ColoredTextPosition position = (ColoredTextPosition) textIter.next();
                PositionWrapperImpl current = new PositionWrapperImpl(position);
                String characterValue = position.getCharacter();

                if (lastPosition != null
                        && ((position.getFont() != lastPosition.getTextPosition().getFont()) || (position
                                .getFontSize() != lastPosition.getTextPosition().getFontSize())))
                {
                    previousAveCharWidth = -1;
                }

                float positionX;
                float positionY;
                float positionWidth;
                float positionHeight;

                if (getSortByPosition()) {
                    positionX = position.getXDirAdj();
                    positionY = position.getYDirAdj();
                    positionWidth = position.getWidthDirAdj();
                    positionHeight = position.getHeightDir();
                } else {
                    positionX = position.getX();
                    positionY = position.getY();
                    positionWidth = position.getWidth();
                    positionHeight = position.getHeight();
                }

                int wordCharCount = position.getIndividualWidths().length;
                float wordSpacing = position.getWidthOfSpace();
                float deltaSpace = 0;
                if ((wordSpacing == 0) || (wordSpacing == Float.NaN)) {
                    deltaSpace = Float.MAX_VALUE;
                } else {
                    if (lastWordSpacing < 0) {
                        deltaSpace = (wordSpacing * getSpacingTolerance());
                    } else {
                        deltaSpace = (((wordSpacing + lastWordSpacing) / 2f) * getSpacingTolerance());
                    }
                }

                float averageCharWidth = -1;
                if (previousAveCharWidth < 0) {
                    averageCharWidth = (positionWidth / wordCharCount);
                } else {
                    averageCharWidth = (previousAveCharWidth + (positionWidth / wordCharCount)) / 2f;
                }
                float deltaCharWidth = (averageCharWidth * getAverageCharTolerance());

                float expectedStartOfNextWordX = EXPECTEDSTARTOFNEXTWORDX_RESET_VALUE;
                if (endOfLastTextX != ENDOFLASTTEXTX_RESET_VALUE) {
                    if (deltaCharWidth > deltaSpace) {
                        expectedStartOfNextWordX = endOfLastTextX + deltaSpace;
                    } else {
                        expectedStartOfNextWordX = endOfLastTextX + deltaCharWidth;
                    }
                }

                if (lastPosition != null) {
                    if (startOfArticle) {
                        lastPosition.setArticleStart();
                        startOfArticle = false;
                    }

                    if (!overlap(positionY, positionHeight, maxYForLine, maxHeightForLine)) {
                        writeLine(line, isRtlDominant, hasRtl);
                        line.clear();

                        lastLineStartPosition = (PositionWrapperImpl) handleLineSeparation(current,
                                lastPosition, lastLineStartPosition, maxHeightForLine);

                        endOfLastTextX = ENDOFLASTTEXTX_RESET_VALUE;
                        expectedStartOfNextWordX = EXPECTEDSTARTOFNEXTWORDX_RESET_VALUE;
                        maxYForLine = MAXYFORLINE_RESET_VALUE;
                        maxHeightForLine = MAXHEIGHTFORLINE_RESET_VALUE;
                        minYTopForLine = MINYTOPFORLINE_RESET_VALUE;
                    }

                    if (expectedStartOfNextWordX != EXPECTEDSTARTOFNEXTWORDX_RESET_VALUE
                            && expectedStartOfNextWordX < positionX
                            && lastPosition.getTextPosition().getCharacter() != null
                            && !lastPosition.getTextPosition().getCharacter().endsWith(" "))
                    {
                        line.add(WordSeparator.getSeparator());
                    }
                }

                if (positionY >= maxYForLine) {
                    maxYForLine = positionY;
                }

                endOfLastTextX = positionX + positionWidth;

                if (characterValue != null) {
                    line.add(position);
                }
                maxHeightForLine = Math.max(maxHeightForLine, positionHeight);
                minYTopForLine = Math.min(minYTopForLine, positionY - positionHeight);
                lastPosition = current;
                if (startOfPage) {
                    lastPosition.setParagraphStart();
                    lastPosition.setLineStart();
                    lastLineStartPosition = lastPosition;
                    startOfPage = false;
                }
                lastWordSpacing = wordSpacing;
                previousAveCharWidth = averageCharWidth;
            }

            if (line.size() > 0) {
                writeLine(line, isRtlDominant, hasRtl);
            }
        }
    }
}
