package ru.yandex.tikaite.detect;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipInputStream;

import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;

import ru.yandex.io.IOStreamUtils;
import ru.yandex.tikaite.util.TemporaryDirectory;

public class UnzippingDetector implements Detector {
    private static final long serialVersionUID = 0L;
    private static final int BUFFER_SIZE = 65536;

    private final Detector detector;

    public UnzippingDetector(final Detector detector) {
        this.detector = detector;
    }

    public static MediaType wrapType(final MediaType type) {
        return new MediaType(type.getType(), type.getSubtype() + "+zip");
    }

    @Override
    public MediaType detect(final InputStream in, final Metadata metadata)
        throws IOException
    {
        byte[] buf = new byte[BUFFER_SIZE];
        int len = IOStreamUtils.peek(in, buf);
        MediaType mediaType = MediaType.OCTET_STREAM;
        if (ZipArchiveInputStream.matches(buf, len)) {
            try (ZipInputStream zis =
                    new ZipInputStream(new ByteArrayInputStream(buf, 0, len)))
            {
                ZipEntry entry = zis.getNextEntry();
                if (entry != null) {
                    try (TikaInputStream tis = TikaInputStream.get(
                            zis,
                            new TemporaryDirectory(false)))
                    {
                        MediaType next = detector.detect(tis, metadata);
                        if (!MediaType.OCTET_STREAM.equals(next)) {
                            mediaType = wrapType(next);
                        }
                        zis.closeEntry();
                    }
                }
            } catch (ZipException e) {
                // Looks like it is not a zip archive
            }
        }
        return mediaType;
    }
}

