package ru.yandex.chemodan.app.docviewer.utils.pdf.text;

import java.io.IOException;
import java.lang.reflect.Method;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import javax.annotation.PostConstruct;

import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.encoding.DictionaryEncoding;
import org.apache.pdfbox.encoding.StandardEncoding;
import org.apache.pdfbox.encoding.WinAnsiEncoding;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageNode;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDFontFactory;
import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ru.yandex.bolts.function.Function1V;
import ru.yandex.misc.ExceptionUtils;
import ru.yandex.misc.io.RuntimeIoException;
import ru.yandex.misc.lang.StringUtils;

public class PdfEncodingFixer {

    private static final Logger logger = LoggerFactory.getLogger(PdfEncodingFixer.class);


    private static class CyrrilicEncoding extends DictionaryEncoding {

        private static final COSDictionary DICTIONARY;

        static CyrrilicEncoding INSTANCE;

        static {
            DICTIONARY = new COSDictionary();
            DICTIONARY.setItem(COSName.BASE_ENCODING, StandardEncoding.INSTANCE);

            COSArray differences = new COSArray();

            Charset windows1251 = Charset.forName("Windows-1251");
            for (char c : (new String(CYRRILIC_ABC_UPPERCASE) + new String(CYRRILIC_ABC_LOWERCASE))
                    .toCharArray())
            {
                try {
                    String name = unicode2GlyphName.get(c);
                    if (StringUtils.isEmpty(name))
                        continue;

                    byte[] enc = new String(new char[] { c }).getBytes(windows1251);
                    differences.add(COSInteger.get(enc[0] & 0xff));
                    differences.add(COSName.getPDFName(name));
                } catch (Exception exc) {
                    logger.warn("Unable to add mapping for char '" + c + "': " + exc, exc);
                }
            }
            DICTIONARY.setItem(COSName.DIFFERENCES, differences);
        }

        static {
            try {
                INSTANCE = new CyrrilicEncoding();
            } catch (Exception exc) {
                throw new RuntimeIoException(exc);
            }
        }

        private CyrrilicEncoding() throws IOException {
            super(DICTIONARY);
        }
    }

    private static final char[] CYRRILIC_ABC_LOWERCASE = "абвгдеёжзийклмнопрстуфхцчшщъыьэюяґђѓєѕіїјљњћќў"
            .toCharArray();

    private static final char[] CYRRILIC_ABC_UPPERCASE = "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯҐЂЃЄЅІЇЈЉЊЋЌЎ"
            .toCharArray();

    private static final Map<Character, String> unicode2GlyphName;

    static {
        unicode2GlyphName = new HashMap<>(CYRRILIC_ABC_UPPERCASE.length
                + CYRRILIC_ABC_LOWERCASE.length);

        for (int i = 0; i < CYRRILIC_ABC_UPPERCASE.length; i++) {
            unicode2GlyphName.put(CYRRILIC_ABC_UPPERCASE[i], "afii10"
                    + StringUtils.leftPad(String.valueOf(17 + i), 3, '0'));
        }
        for (int i = 0; i < CYRRILIC_ABC_LOWERCASE.length; i++) {
            unicode2GlyphName.put(CYRRILIC_ABC_LOWERCASE[i], "afii10"
                    + StringUtils.leftPad("" + (65 + i), 3, '0'));
        }
    }

    // @Value()
    private List<Pattern> cyrillicFontNames;

    private Method setEncoding;

    @PostConstruct
    public void afterPropertiesSet() throws Exception {
        this.setEncoding = PDFont.class.getDeclaredMethod("setEncoding", COSBase.class);
        this.setEncoding.setAccessible(true);
    }

    private boolean fixFonts(final Map<String, PDFont> fontCache, final PDResources resources)
            throws IOException
    {
        COSDictionary fonts = (COSDictionary) resources.getCOSDictionary().getDictionaryObject(
                COSName.FONT);
        boolean hasChanges = false;

        if (fonts == null)
            return false;

        for (COSName fontName : new LinkedHashSet<>(fonts.keySet())) {
            COSBase font = fonts.getDictionaryObject(fontName);

            /*
             * data-000174.pdf contains a font that is a COSArray, looks to be
             * an error in the PDF, we will just ignore entries that are not
             * dictionaries.
             */
            if (!(font instanceof COSDictionary))
                continue;

            final COSDictionary fontDictionary = (COSDictionary) font;
            final PDFont pdFont = PDFontFactory.createFont(fontDictionary, fontCache);

            final String baseFontName = pdFont.getBaseFont();
            if (StringUtils.isEmpty(baseFontName))
                continue;
            if (!isOldCyrillicFontName(baseFontName))
                continue;

            if (!(pdFont instanceof PDTrueTypeFont))
                continue;
            if (!(pdFont.getFontEncoding() instanceof WinAnsiEncoding))
                continue;

            try {
                setEncoding.invoke(pdFont, CyrrilicEncoding.INSTANCE.getCOSObject());
                pdFont.setFontEncoding(CyrrilicEncoding.INSTANCE);
                hasChanges = true;
            } catch (Exception exc) {
                logger.warn(String.valueOf(exc), exc);
            }
        }

        return hasChanges;
    }

    public void fixFonts(PDDocument document) {
        try {
            final Map<String, PDFont> fontCache = new HashMap<>();

            final List<PDPage> pages = document.getDocumentCatalog().getAllPages();
            for (PDPage pdPage : pages) {

                {
                    PDResources pdResources = pdPage.getResources();
                    if (pdResources == null)
                        continue;

                    boolean needToSave = fixFonts(fontCache, pdResources);
                    if (needToSave)
                        pdPage.setResources(pdResources);
                }

                PDPageNode pageNode = pdPage.getParent();
                while (pageNode != null) {

                    {
                        PDResources pdResources = pageNode.getResources();
                        if (pdResources != null) {
                            boolean needToSave = fixFonts(fontCache, pdResources);
                            if (needToSave)
                                pageNode.setResources(pdResources);
                        }
                    }

                    pageNode = pageNode.getParent();
                }
            }

        } catch (IOException exc) {
            throw ExceptionUtils.translate(exc);
        }
    }

    public Function1V<PDDocument> fixFontsHandler() {
        return this::fixFonts;
    }

    public List<Pattern> getCyrillicFontNames() {
        return cyrillicFontNames;
    }

    public boolean isOldCyrillicFontName(String name) {
        for (Pattern pattern : cyrillicFontNames) {
            if (pattern.matcher(name).matches())
                return true;
        }
        return false;
    }

    public void setCyrillicFontNames(List<Pattern> cyrillicFontNames) {
        this.cyrillicFontNames = cyrillicFontNames;
    }

}
