package ru.yandex.tikaite.util;

import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Geographic;
import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TIFF;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMPDM;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

import ru.yandex.parser.string.DurationParser;
import ru.yandex.search.document.Document;

public class TextExtractResult {
    public static final int MIN_ROTATED_ORIENTATION = 5;

    private static final long MILLIS = 1000L;
    private static final int MIN_DATE_LENGTH = 10;
    private static final int DECIMAL_MULTIPLIER = 10;
    private static final Pattern GPS_PATTERN =
        Pattern.compile(
            "(?<deg>[0-9]+)(?:,(?<min>[0-9]+(?:[.][0-9]+))?)?"
            + "(?<dir>[eEwWnNsS])?");

    private static final String[] ALBUM = populateNames(XMPDM.ALBUM);
    private static final String[] ALTITUDE = populateNames(
        Geographic.ALTITUDE,
        Property.internalText("GPS Altitude"),
        Property.internalText("GPS Altitude Ref"),
        Property.internalText("GPS:GPS Altitude"));
    private static final String[] ARTIST = populateNames(
        XMPDM.ARTIST,
        XMPDM.ALBUM_ARTIST,
        Property.internalText("Artist"),
        Property.internalText("Exif IFD0:Artist"));
    private static final String[] AUTHOR =
        populateNames(TikaCoreProperties.CREATOR);
    private static final String[] COMMENT = populateNames(
        XMPDM.LOG_COMMENT,
        TikaCoreProperties.COMMENTS,
        OfficeOpenXMLExtended.COMMENTS,
        Property.internalText("Jpeg Comment"));
    private static final String[] COMPOSER = populateNames(XMPDM.COMPOSER);
    private static final String[] CREATED = populateNames(
        TikaCoreProperties.CREATED,
        TIFF.ORIGINAL_DATE,
        Property.internalText("created"),
        Property.internalText("Date/Time Original"),
        Property.internalText("Date/Time Digitized"),
        Property.internalText("Date/Time"));

    @SuppressWarnings("deprecation")
    private static final String[] DESCRIPTION = populateNames(
        DublinCore.DESCRIPTION,
        Property.internalText("Caption/Abstract"),
        Property.internalText("Image Description"),
        Property.internalText(Metadata.DESCRIPTION));

    private static final String[] DURATION = populateNames(
        XMPDM.DURATION,
        Property.internalText("Duration"));
    private static final String[] GENRE = populateNames(XMPDM.GENRE);
    private static final String[] HEIGHT = populateNames(
        Property.internalText("Image Height"),
        TIFF.IMAGE_LENGTH,
        Property.internalText("Exif Image Height"),
        Property.internalText("height"));

    @SuppressWarnings("deprecation")
    private static final String[] KEYWORDS = populateNames(
        Office.KEYWORDS,
        Property.internalText(MSOffice.KEYWORDS));

    private static final String[] LATITUDE = populateNames(
        Geographic.LATITUDE,
        Property.internalText("GPS Latitude"),
        Property.internalText("GPS Latitude Ref"));

    private static final String[] LONGITUDE = populateNames(
        Geographic.LONGITUDE,
        Property.internalText("GPS Longitude"),
        Property.internalText("GPS Longitude Ref"));

    private static final String[] MANUFACTURER =
        populateNames(
            TIFF.EQUIPMENT_MAKE,
            Property.internalText("Make"),
            Property.internalText("Vendor Name"));
    private static final String[] MODEL =
        populateNames(TIFF.EQUIPMENT_MODEL, Property.internalText("Model"));
    private static final String[] MODIFIED =
        populateNames(TikaCoreProperties.MODIFIED);
    private static final String[] ORIENTATION =
        populateNames(TIFF.ORIENTATION);

    @SuppressWarnings("deprecation")
    private static final String[] PAGES = populateNames(
        PagedText.N_PAGES,
        MSOffice.PAGE_COUNT,
        Office.PAGE_COUNT,
        Property.internalInteger("nbPage"));

    private static final String[] PRINT_DATE =
        populateNames(TikaCoreProperties.PRINT_DATE);

    private static final String[] PRODUCER = new String[] {"producer"};
    private static final String[] RELEASED = populateNames(XMPDM.RELEASE_DATE);
    @SuppressWarnings("deprecation")
    private static final String[] SUBJECT = populateNames(
        DublinCore.SUBJECT,
        OfficeOpenXMLCore.SUBJECT,
        Property.internalText(Metadata.SUBJECT));

    private static final String[] TITLE = populateNames(
        TikaCoreProperties.TITLE,
        Property.internalText("Object Name"),
        Property.internalText("Headline"));

    private static final String[] TOOL =
        populateNames(TikaCoreProperties.CREATOR_TOOL);

    private static final String[] TRACK_NUMBER =
        populateNames(XMPDM.TRACK_NUMBER);

    private static final String[] WIDTH = populateNames(
        Property.internalText("Image Width"),
        TIFF.IMAGE_WIDTH,
        Property.internalText("Exif Image Width"),
        Property.internalText("width"));

    private static final DateTimeZone MIDDAY_TIME_ZONE =
        DateTimeZone.forID("-12:00");
    private static final String[] UTC_TZ_FORMATS = new String[] {
        "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
        "yyyy-MM-dd'T'HH:mm:ss.SS'Z'",
        "yyyy-MM-dd'T'HH:mm:ss'Z'",

        "yyyy-MM-dd' 'HH:mm:ss.SSS'Z'",
        "yyyy-MM-dd' 'HH:mm:ss.SS'Z'",
        "yyyy-MM-dd' 'HH:mm:ss'Z'"
    };
    private static final String[] TZ_FORMATS = new String[] {
        "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
        "yyyy-MM-dd'T'HH:mm:ss.SSZ",
        "yyyy-MM-dd'T'HH:mm:ssZ",

        "yyyy-MM-dd'T'HH:mm:ss.SSS",
        "yyyy-MM-dd'T'HH:mm:ss.SS",
        "yyyy-MM-dd'T'HH:mm:ss",

        "yyyy-MM-dd' 'HH:mm:ss.SSSZ",
        "yyyy-MM-dd' 'HH:mm:ss.SSZ",
        "yyyy-MM-dd' 'HH:mm:ssZ",

        "yyyy-MM-dd' 'HH:mm:ss.SSS",
        "yyyy-MM-dd' 'HH:mm:ss.SS",
        "yyyy-MM-dd' 'HH:mm:ss",

        "yyyy:MM:dd' 'HH:mm:ss",
        "yyyy/MM/dd' 'HH:mm:ss"
    };
    private static final String[] MIDDAY_TZ_FORMATS = new String[] {
        "yyyy-MM-dd",
        "yyyy:MM:dd",
        "dd.MM.YYYY",
        "dd-MM-YYYY"
    };
    private static final DateTimeFormatter[] DATE_TIME_FORMATS;

    static {
        List<DateTimeFormatter> formats = new ArrayList<>();
        for (String format: UTC_TZ_FORMATS) {
            formats.add(DateTimeFormat.forPattern(format).withZoneUTC());
        }
        for (String format: TZ_FORMATS) {
            formats.add(
                DateTimeFormat.forPattern(format).withZone(
                    DateTimeZone.forID("Europe/Moscow")));
        }
        for (String format: MIDDAY_TZ_FORMATS) {
            formats.add(
                DateTimeFormat.forPattern(format).withZone(MIDDAY_TIME_ZONE));
        }
        DATE_TIME_FORMATS =
            formats.toArray(new DateTimeFormatter[formats.size()]);
    }

    private final Set<String> values = new LinkedHashSet<>();
    private final StringBuilder sb = new StringBuilder();
    private final Metadata metadata;
    private final int truncated;
    private final Throwable cause;

    public TextExtractResult(
        final Metadata metadata,
        final int truncated,
        final Throwable cause)
    {
        this.metadata = metadata;
        this.truncated = truncated;
        this.cause = cause;
    }

    private static void populateNames(
        final List<String> names,
        final Property... properties)
    {
        if (properties == null) {
            return;
        }
        for (Property property: properties) {
            if (property != null) {
                Property.PropertyType type = property.getPropertyType();
                if (type == Property.PropertyType.COMPOSITE) {
                    populateNames(names, property.getPrimaryProperty());
                    populateNames(
                        names,
                        property.getSecondaryExtractProperties());
                } else {
                    names.add(property.getName());
                }
            }
        }
    }

    private static String[] populateNames(final Property... properties) {
        List<String> names = new ArrayList<>();
        populateNames(names, properties);
        return names.toArray(new String[names.size()]);
    }

    private static long parseTimestamp(final String date) {
        if (date.length() >= MIN_DATE_LENGTH) {
            // Try several different ISO-8601 variants
            for (DateTimeFormatter format: DATE_TIME_FORMATS) {
                // CSOFF: EmptyBlock
                try {
                    return format.parseMillis(date);
                } catch (Throwable e) {
                }
                // CSON: EmptyBlock
            }
        }
        return 0L;
    }

    // CSOFF: FinalParameters
    private void addValue(String value) {
        if (value != null) {
            value = value.trim();
            if (!value.isEmpty()) {
                values.add(value);
            }
        }
    }
    // CSON: FinalParameters

    private void populateValues(final String... names) {
        values.clear();
        for (String name: names) {
            if (metadata.isMultiValued(name)) {
                String[] values = metadata.getValues(name);
                if (values != null) {
                    for (String value: values) {
                        addValue(value);
                    }
                }
            } else {
                addValue(metadata.get(name));
            }
        }
    }

    private void erase(final String... names) {
        for (String name: names) {
            metadata.remove(name);
        }
    }

    private void take(
        final String field,
        final Document document,
        final String... names)
        throws IOException
    {
        populateValues(names);
        if (!values.isEmpty()) {
            if (values.size() == 1) {
                document.addField(field, values.iterator().next());
            } else {
                sb.setLength(0);
                for (String value: values) {
                    sb.append(value);
                    sb.append('\n');
                }
                document.addField(field, sb.substring(0, sb.length() - 1));
            }
            erase(names);
        }
    }

    private void takeDate(
        final String field,
        final Document document,
        final String... names)
        throws IOException
    {
        populateValues(names);
        for (String value: values) {
            long millis = parseTimestamp(value);
            if (millis > 0L) {
                document.addField(field, millis / MILLIS);
            }
            erase(names);
            return;
        }
    }

    private void takeDuration(
        final String field,
        final Document document,
        final String... names)
        throws IOException
    {
        populateValues(names);
        for (String value: values) {
            try {
                document.addField(
                    field,
                    DurationParser.LONG.apply(value.trim()));
                erase(names);
                return;
            } catch (Throwable t) {
            }
        }
    }

    private void takeGps(
        final String field,
        final Document document,
        final String... names)
        throws IOException
    {
        populateValues(names);
        for (String value: values) {
            try {
                double parsed = Double.parseDouble(value);
                document.addField(field, parsed);
                erase(names);
                return;
            } catch (NumberFormatException e) {
            }
            Matcher matcher = GPS_PATTERN.matcher(value);
            if (matcher.matches()) {
                try {
                    double deg = Double.parseDouble(matcher.group("deg"));
                    String min = matcher.group("min");
                    if (min != null) {
                        deg += Double.parseDouble(min) / 60d;
                    }
                    String dir = matcher.group("dir");
                    if (dir != null) {
                        char c = dir.charAt(0);
                        if (c == 'w' || c == 'W' || c == 's' || c == 'S') {
                            deg = -deg;
                        }
                    }
                    document.addField(field, deg);
                    erase(names);
                    return;
                } catch (RuntimeException e) {
                }
            }
        }
    }

    private Integer takeInt(
        final String field,
        final Document document,
        final String... names)
        throws IOException
    {
        populateValues(names);
        for (String value: values) {
            try {
                int parsed = Integer.parseInt(value);
                document.addField(field, parsed);
                erase(names);
                return parsed;
            } catch (NumberFormatException e) {
            }
        }
        return null;
    }

    private Integer takePixels(
        final String field,
        final Document document,
        final String... names)
        throws IOException
    {
        populateValues(names);
        for (String value: values) {
            String pixels;
            int idx = value.indexOf(" pixels");
            if (idx == -1) {
                pixels = value;
            } else {
                pixels = value.substring(0, idx);
            }
            try {
                int parsed = Integer.parseInt(pixels);
                document.addField(field, parsed);
                erase(names);
                return parsed;
            } catch (NumberFormatException e) {
            }
        }
        return null;
    }

    private void takeMetres(
        final String field,
        final Document document,
        final String... names)
        throws IOException
    {
        populateValues(names);
        for (String value: values) {
            String metres;
            int idx = value.indexOf(" m");
            if (idx == -1) {
                metres = value;
            } else {
                metres = value.substring(0, idx);
            }
            try {
                double parsed = Double.parseDouble(metres);
                document.addField(field, parsed);
                erase(names);
            } catch (NumberFormatException e) {
            }
        }
    }

    private void takeAudioInfo(final Document document) throws IOException {
        take(CommonFields.ALBUM, document, ALBUM);
        take(CommonFields.ARTIST, document, ARTIST);
        take(CommonFields.COMPOSER, document, COMPOSER);
        take(CommonFields.GENRE, document, GENRE);
        take(CommonFields.RELEASED, document, RELEASED);
        takeDuration(CommonFields.DURATION, document, DURATION);
        populateValues(TRACK_NUMBER);
        for (String value: values) {
            int number = 0;
            int pos = 0;
            while (pos < value.length()
                && Character.isDigit(value.charAt(pos)))
            {
                number *= DECIMAL_MULTIPLIER;
                number += value.charAt(pos++) - '0';
            }
            if (pos > 0) {
                document.addField(CommonFields.TRACK_NUMBER, number);
                while (pos < value.length()
                    && !Character.isDigit(value.charAt(pos)))
                {
                    ++pos;
                }
                int start = pos;
                number = 0;
                while (pos < value.length()
                    && Character.isDigit(value.charAt(pos)))
                {
                    number *= DECIMAL_MULTIPLIER;
                    number += value.charAt(pos++) - '0';
                }
                if (pos > start) {
                    document.addField(CommonFields.ALBUM_TRACKS, number);
                }
                erase(TRACK_NUMBER);
            }
        }
    }

    private void takeGeoInfo(final Document document) throws IOException {
        takeMetres(CommonFields.ALTITUDE, document, ALTITUDE);
        takeGps(CommonFields.LATITUDE, document, LATITUDE);
        takeGps(CommonFields.LONGITUDE, document, LONGITUDE);
    }

    private void takeImageInfo(final Document document)
        throws IOException
    {
        take(CommonFields.MANUFACTURER, document, MANUFACTURER);
        take(CommonFields.MODEL, document, MODEL);

        // see https://github.com/drewnoakes/metadata-extractor/blob/5f91062969aca66ba5bb9d7372569576eddaf78a/Source/com/drew/metadata/exif/ExifDescriptorBase.java#L244-L256
        Integer orientation = takeInt(
            CommonFields.EXIF_ORIENTATION,
            document,
            ORIENTATION);
        Integer height;
        Integer width;
        if (orientation == null
            || orientation.intValue() < MIN_ROTATED_ORIENTATION)
        {
            height = takePixels(CommonFields.HEIGHT, document, HEIGHT);
            width = takePixels(CommonFields.WIDTH, document, WIDTH);
        } else {
            height = takePixels(CommonFields.HEIGHT, document, WIDTH);
            width = takePixels(CommonFields.WIDTH, document, HEIGHT);
        }
        if (height != null && width != null) {
            int h = height.intValue();
            int w = width.intValue();
            if (h > w) {
                document.addField(
                    CommonFields.ORIENTATION,
                    CommonFields.PORTRAIT);
            } else {
                int t = h;
                h = w;
                w = t;
                document.addField(
                    CommonFields.ORIENTATION,
                    CommonFields.LANDSCAPE);
            }
            if (h > 0 && w > 0) {
                int gcd = BigInteger.valueOf(h).gcd(BigInteger.valueOf(w))
                    .intValue();
                document.addField(
                    CommonFields.RATIO,
                    new String(
                        new StringBuilder()
                            .append(h / gcd)
                            .append(':')
                            .append(w / gcd)));
            }
        }
    }

    public void metadata(final Document document) throws IOException {
        if (truncated != -1) {
            document.addField(CommonFields.TRUNCATED, truncated);
        }

        takeAudioInfo(document);
        takeGeoInfo(document);
        takeImageInfo(document);

        take(CommonFields.AUTHOR, document, AUTHOR);
        take(CommonFields.COMMENT, document, COMMENT);
        take(CommonFields.DESCRIPTION, document, DESCRIPTION);
        take(CommonFields.KEYWORDS, document, KEYWORDS);
        take(CommonFields.PRODUCER, document, PRODUCER);
        take(CommonFields.SUBJECT, document, SUBJECT);
        take(CommonFields.TITLE, document, TITLE);
        take(CommonFields.TOOL, document, TOOL);

        takeDate(CommonFields.CREATED, document, CREATED);
        takeDate(CommonFields.MODIFIED, document, MODIFIED);
        takeDate(CommonFields.PRINT_DATE, document, PRINT_DATE);

        takeInt(CommonFields.PAGES, document, PAGES);
        erase(
            "File Name",
            "File Modification Date/Time",
            "File Size",
            "File Type",
            "File Permissions",
            "pdf:charsPerPage",
            "pdf:unmappedUnicodeCharsPerPage",
            "access_permission:modify_annotations",
            "access_permission:can_print_degraded",
            "access_permission:extract_for_accessibility",
            "access_permission:assemble_document",
            "access_permission:extract_content",
            "access_permission:can_print",
            "access_permission:fill_in_form",
            "access_permission:can_modify");

        sb.setLength(0);
        for (String name: metadata.names()) {
            if (metadata.isMultiValued(name)) {
                String[] values = metadata.getValues(name);
                Collections.sort(Arrays.asList(values));
                String prev = null;
                for (String value: values) {
                    String str = value.trim();
                    if (!str.isEmpty() && !str.equals(prev)) {
                        sb.append(name);
                        sb.append(':');
                        sb.append(str);
                        sb.append('\n');
                        prev = str;
                    }
                }
            } else {
                String value = metadata.get(name).trim();
                if (!value.isEmpty()) {
                    sb.append(name);
                    sb.append(':');
                    sb.append(value);
                    sb.append('\n');
                }
            }
        }
        int len = sb.length();
        if (len > 0) {
            document.addField(CommonFields.META, sb.substring(0, len - 1));
        }
    }

    public int truncated() {
        return truncated;
    }

    public Throwable cause() {
        return cause;
    }

    public int size() {
        return metadata.size();
    }

    public String mimetype() {
        return metadata.get(Metadata.CONTENT_TYPE);
    }
}

