package ru.yandex.tikaite.detect;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipException;

import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;

import ru.yandex.io.IOStreamUtils;
import ru.yandex.tikaite.util.TemporaryDirectory;

public class UngzippingDetector implements Detector {
    private static final long serialVersionUID = 0L;
    private static final int BUFFER_SIZE = 65536;

    private final Detector detector;

    public UngzippingDetector(final Detector detector) {
        this.detector = detector;
    }

    public static MediaType wrapType(final MediaType type) {
        return new MediaType(type.getType(), type.getSubtype() + "+gzip");
    }

    @Override
    public MediaType detect(final InputStream in, final Metadata metadata)
        throws IOException
    {
        byte[] buf = new byte[BUFFER_SIZE];
        int len = IOStreamUtils.peek(in, buf);
        MediaType mediaType = MediaType.OCTET_STREAM;
        if (GzipCompressorInputStream.matches(buf, len)) {
            try (GZIPInputStream gzis =
                    new GZIPInputStream(new ByteArrayInputStream(buf, 0, len)))
            {
                try (TikaInputStream tis = TikaInputStream.get(
                        gzis,
                        new TemporaryDirectory(false)))
                {
                    MediaType next = detector.detect(tis, metadata);
                    if (!MediaType.OCTET_STREAM.equals(next)) {
                        mediaType = wrapType(next);
                    }
                }
            } catch (ZipException e) {
                // Looks like it is not a gzip archive
            }
        }
        return mediaType;
    }
}

