package ru.yandex.chemodan.app.docviewer.convert.pdf.fonts;

import java.util.EnumMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ru.yandex.bolts.collection.Cf;
import ru.yandex.bolts.function.Function;
import ru.yandex.misc.cache.impl.LruCache;
import ru.yandex.misc.lang.StringUtils;
import ru.yandex.misc.lang.Validate;


/**
 * By given pdf font name, does its all best
 * to parse font family, css font families and styles
 *
 * @author ssytnik
 */
public class FontNameParser {

    private static final Logger logger = LoggerFactory.getLogger(FontNameParser.class);
    private static final LruCache<String, FontNameInfo> cache = new LruCache<>(1000);

    public static Optional<FontNameInfo> parse(final String pdfFontName) {
        return cache.getFromCache(pdfFontName, ()  -> new FontNameParser(pdfFontName).parse());
    }


    // [subset] fontFamily [styles] [ending]
    private static final Pattern PDF_FONT_NAME = Pattern.compile(
            "^([A-Z]{6}\\+)?([\\w\\(\\)\\*\\!\\. ]+?)((?:\\-|,).*?)?((?:~|\\+|\\*)\\w+)?$");

    private String pdfFontName;

    private FontNameParser(String pdfFontName) {
        this.pdfFontName = pdfFontName;
    }


    private String fontFamily;
    private String fontStyles;

    /**
     * since order is preserved, more specific font families should be added first,
     * while more generic ones should be added last
     */
    private LinkedHashSet<String> cssFamilies = new LinkedHashSet<>();
    /**
     * since put rewrites data, more generic styles (added by Roman, Normal, Medium etc.)
     * should be added first, while more specific styles should be added last
     */
    private EnumMap<CssFontStyleProperty, String> cssStyles = new EnumMap<>(CssFontStyleProperty.class);


    public Optional<FontNameInfo> parse() {
        Matcher matcher = PDF_FONT_NAME.matcher(pdfFontName);

        if (!matcher.matches()) {
            logger.warn("Could not parse {}", pdfFontName);
            return Optional.empty();
        }

        fontFamily = matcher.group(2);
        fontStyles = StringUtils.defaultIfEmpty(matcher.group(3), "");

        // TODO anyone please review this copy/paste mapping from obsoleted pdftohtml.xml
        check(LookupAt.FF, Cf.list("Arial"), Cf.list("arial", "sans-serif"));
        check(LookupAt.FF, Cf.list("TimesNewRoman"), Cf.list("Times New Roman", "times")); // was 'arial'
        check(LookupAt.FF, Cf.list("BatangChe"), Cf.list("Batang Che", "Un Jamo Batang", "Batang", "serif"));
        check(LookupAt.FF, Cf.list("DejaVuSerif"), Cf.list("DejaVu Serif", "serif"));
        check(LookupAt.FF, Cf.list("FreeSetC"), Cf.list("FreeSetC", "arial", "sans-serif"));
        check(LookupAt.FF, Cf.list("HelveticaNeueLTPro"), Cf.list("Helvetica Neue LT Pro", "HelveticaNeueLT Pro", "arial", "sans-serif"));
        check(LookupAt.FF, Cf.list("HelveticaNeueLTStd"), Cf.list("Helvetica Neue LT Std", "HelveticaNeueLT Std", "arial", "sans-serif"));
        check(LookupAt.FF, Cf.list("MyriadPro"), Cf.list("Myriad Pro", "Gill Sans", "Gill Sans MT", "Calibri", "sans-serif"));
        check(LookupAt.FF, Cf.list("NimbusRomNo9L"), Cf.list("NimbusRomNo9L", "times", "serif"));
        check(LookupAt.FF, Cf.list("OfficinaSerif"), Cf.list("Officina Serif", "Times New Roman", "Liberation Serif", "serif"));
        check(LookupAt.FF, Cf.list("Tahoma"), Cf.list("Times New Roman", "times", "serif"));
        check(LookupAt.FF, Cf.list("TextBookC"), Cf.list("TextBookC", "sans-serif"));

        // Hack for readme.pdf
        check(LookupAt.FF, Cf.list("TextbookNew"), Cf.list("arial"));

        if (cssFamilies.isEmpty()) {
            cssFamilies.add(fontFamily + fontStyles);
            cssFamilies.add(fontFamily);
        }

        // TODO add more font families here:
        // * serif (Times), sans-serif (Helvetica, Arial),
        //   cursive (Zapf-Chancery), fantasy (Western), monospace (Courier), Tahoma,
        // * the ones from http://cssfontstack.com/,
        // * investigate which other font-families are understood by most browsers (e.g. Geneva)

        // Generic font families - heuristics
        check(LookupAt.STYLES, Cf.list("MT", "Monotype", "Monospace"), Cf.list("monospace"));
        if (!check(LookupAt.BOTH, Cf.list("Sans", "Semisans", "Sansserif"), Cf.list("sans-serif"))) {
            check(LookupAt.BOTH, Cf.list("Serif"), Cf.list("serif"));
        }

        // Weight
        check(LookupAt.STYLES, Cf.list("Md", "Medium", "Plain", "Rg", "Regular", "Roman"), CssFontStyleProperty.WEIGHT, "400"); // "normal"

        if (!check(LookupAt.STYLES, Cf.list("XLight", "ExtraLight"), CssFontStyleProperty.WEIGHT, "100")) {
            check(LookupAt.STYLES, Cf.list("Lt", "Light"), CssFontStyleProperty.WEIGHT, "200");
        }
        check(LookupAt.STYLES, Cf.list("Tn", "Thin", "Hairline"), CssFontStyleProperty.WEIGHT, "100");
        check(LookupAt.STYLES, Cf.list("Fine"), CssFontStyleProperty.WEIGHT, "200");
        check(LookupAt.STYLES, Cf.list("Bk", "Book"), CssFontStyleProperty.WEIGHT, "300");
        check(LookupAt.STYLES, Cf.list("Dm", "Demi", "Demibold", "Semibold", "Halbfett"), CssFontStyleProperty.WEIGHT, "500");
        if (!check(LookupAt.STYLES, Cf.list("XBold", "ExtraBold"), CssFontStyleProperty.WEIGHT, "800")) {
            check(LookupAt.STYLES, Cf.list("Bold", "Bd"), CssFontStyleProperty.WEIGHT, "700"); // "bold"
        }
        check(LookupAt.STYLES, Cf.list("Hv", "Heavy", "Heavyface"), CssFontStyleProperty.WEIGHT, "800");
        if (!check(LookupAt.STYLES, Cf.list("XBlack", "ExtraBlack", "UltBlack", "UltraBlack"), CssFontStyleProperty.WEIGHT, "900")) {
            check(LookupAt.STYLES, Cf.list("Blk", "Black"), CssFontStyleProperty.WEIGHT, "800");
        }
        check(LookupAt.STYLES, Cf.list("Fat", "DS", "Display", /*"Su",*/ "Super", /*"Po",*/ "Poster" /*"Ad"*/), CssFontStyleProperty.WEIGHT, "900");

        // Style/slope
        check(LookupAt.STYLES, Cf.list(/*"Rg", "Regular",*/ "Roman"), CssFontStyleProperty.STYLE, "normal");
        check(LookupAt.STYLES, Cf.list(/*"Ic",*/ "Inclined"), CssFontStyleProperty.STYLE, "italic");
        check(LookupAt.STYLES, Cf.list("It", "Italic", /*"Up",*/ "Upright"), CssFontStyleProperty.STYLE, "italic");
        check(LookupAt.STYLES, Cf.list(/*"Ks",*/ "Kusriv"), CssFontStyleProperty.STYLE, "italic");
        check(LookupAt.STYLES, Cf.list(/*"Sl",*/ "Sloped", "Slanted"), CssFontStyleProperty.STYLE, "italic");
        check(LookupAt.STYLES, Cf.list("Obl", "Oblique"), CssFontStyleProperty.STYLE, "oblique");


        // Width/stretch
        check(LookupAt.STYLES, Cf.list("Normal", "Medium", "Regular"), CssFontStyleProperty.STRETCH, "normal");

        if (
                !check(LookupAt.STYLES, Cf.list("UltraCompressed", "UltCompressed", "UltraCondensed", "UltCondensed"), CssFontStyleProperty.STRETCH, "ultra-condensed") &&
                !check(LookupAt.STYLES, Cf.list("ExtraCompressed", "XCompressed", "ExtraCondensed", "XCondensed"), CssFontStyleProperty.STRETCH, "extra-condensed")
        ) {
            check(LookupAt.STYLES, Cf.list(/*"Cm",*/ "Compressed", /*"Ct",*/ "Compact"), CssFontStyleProperty.STRETCH, "condensed");
            check(LookupAt.STYLES, Cf.list(/*"Cn",*/ "Cnd", "Cond", "Condensed"), CssFontStyleProperty.STRETCH, "condensed");
        }
        check(LookupAt.STYLES, Cf.list(/*"Nr",*/ "Narrow", "Semicondensed"), CssFontStyleProperty.STRETCH, "semi-condensed");
        if (!check(LookupAt.STYLES, Cf.list("Semiextended", "SemiExt", "Semiexpanded"), CssFontStyleProperty.STRETCH, "semi-expanded")) {
            check(LookupAt.STYLES, Cf.list(/*"Ex",*/ "Ext", "Extended", "Elongated"), CssFontStyleProperty.STRETCH, "expanded");
        }
        if (!check(LookupAt.STYLES, Cf.list("XExpanded", "ExtraExpanded"), CssFontStyleProperty.STRETCH, "extra-expanded")) {
            check(LookupAt.STYLES, Cf.list("Expanded"), CssFontStyleProperty.STRETCH, "expanded");
        }
        check(LookupAt.STYLES, Cf.list("Wide"), CssFontStyleProperty.STRETCH, "ultra-expanded");


        FontNameInfo fontNameInfo = new FontNameInfo(fontFamily, fontStyles, Cf.list(cssFamilies), cssStyles);
        logger.debug("Parsed {} => {}", pdfFontName, fontNameInfo);
        return Optional.of(fontNameInfo);
    }

    private enum LookupAt {
        FF, STYLES, BOTH;
    };

    private boolean check(LookupAt where, List<String> probes, List<String> addCssFamilies) {
        Validate.forAll(addCssFamilies, Cf.String.notEmptyF());
        return check(where, probes, addCssFamilies, Cf.map());
    }

    private boolean check(LookupAt where, List<String> probes, CssFontStyleProperty cssFontStyle, String cssFontStyleValue) {
        Validate.notEmpty(cssFontStyleValue);
        return check(where, probes, Cf.list(), Cf.map(cssFontStyle, cssFontStyleValue));
    }

    private boolean check(
            final LookupAt where, final List<String> probes,
            final List<String> addCssFamilies, final Map<CssFontStyleProperty, String> addStyles)
    {
        Validate.forAll(probes, Cf.String.notEmptyF());

        Function<String, Boolean> f = a -> {
            for (String probe : probes) {
                if (a.contains(probe)) {
                    cssFamilies.addAll(addCssFamilies);
                    cssStyles.putAll(addStyles);
                    return true;
                }
            }
            return false;
        };

        boolean match = false;
        if (where != LookupAt.STYLES && !match) { match = f.apply(fontFamily); }
        if (where != LookupAt.FF && !match) { match = f.apply(fontStyles); }
        return match;
    }

}
