package ru.yandex.tikaite.parser;

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import ru.yandex.io.CountingInputStream;

public enum RarParser implements Parser {
    INSTANCE;

    private static final Charset CP1251 = Charset.forName("cp1251");
    private static final int[] MARK =
        new int[] {0x52, 0x61, 0x72, 0x21, 0x1a, 0x07};
    private static final int MAX_STRING_LENGTH = 262144;
    private static final int BYTE_MASK = 0xff;
    private static final int BYTE_SHIFT = 8;
    private static final int FLAGS_SHIFT = 6;
    private static final int LOW_BITS_MASK = 0x7f;
    private static final int LENGTH_FLAG = 0x80;
    private static final int FILE_HEAD_TYPE = 0x74;
    private static final int NEWSUB_TYPE = 0x7a;
    private static final int LARGE_FLAG = 0x100;
    private static final int UNICODE_FLAG = 0x200;
    private static final int SKIP = 15;
    private static final int THREE = 3;
    private static final int FOUR = 4;
    private static final long VARINT_BYTE_SHIFT = 7;
    private static final long VARINT_CONTINUATION_FLAG = 0x80;
    private static final long FILE_HEADER = 2L;
    private static final long SERVICE_HEADER = 3L;
    private static final long EXTRA_AREA_FLAG = 1L;
    private static final long DATA_FLAG = 2L;
    private static final long MTIME_FLAG = 2L;
    private static final long CRC32_FLAG = 4L;
    private static final long COMPRESSION_METHOD_MASK = 0x380L;
    private static final String P = "p";
    private static final Set<MediaType> TYPES = Collections.unmodifiableSet(
        new HashSet<>(Arrays.asList(
            MediaType.application("x-rar-compressed"),
            MediaType.application("x-rar"))));

    @Override
    public Set<MediaType> getSupportedTypes(final ParseContext context) {
        return TYPES;
    }

    // CSOFF: ParameterNumber
    @Override
    public void parse(
        final InputStream is,
        final ContentHandler handler,
        final Metadata metadata,
        final ParseContext context)
        throws IOException, SAXException, TikaException
    {
        CountingInputStream in = new CountingInputStream(is);
        for (int i = 0; i < MARK.length; ++i) {
            int read = in.read();
            if (MARK[i] != read) {
                throw new TikaException(
                    "Bad file format. Expected: " + MARK[i]
                    + ", got: " + read + ", at position " + in.pos());
            }
        }
        int version = forceRead(in);
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.startElement(P);
        if (version == 0) {
            parseV4(in, xhtml);
        } else {
            parseV5(in, xhtml, metadata);
        }
        xhtml.endElement(P);
        xhtml.endDocument();
    }
    // CSON: ParameterNumber

    private static void parseV4(
        final CountingInputStream in,
        final XHTMLContentHandler handler)
        throws IOException, SAXException, TikaException
    {
        while (true) {
            long pos = in.pos();
            int crc = in.read();
            if (crc == -1) {
                break;
            }
            // skip second CRC32 byte
            forceRead(in);
            int type = forceRead(in);
            int flags = forceRead(in);
            flags |= forceRead(in) << BYTE_SHIFT;
            int headerSize = forceRead(in);
            headerSize |= forceRead(in) << BYTE_SHIFT;
            long size = 0;
            if (type == FILE_HEAD_TYPE || type == NEWSUB_TYPE) {
                size = forceRead(in);
                for (int i = 0; i <= 2; ++i) {
                    size |= forceRead(in) << (BYTE_SHIFT * (i + 1));
                }
            }
            if (type == FILE_HEAD_TYPE) {
                // not interesting fields like unpacked size and timestamp
                forceSkip(in, SKIP);
                int nameSize = forceRead(in);
                nameSize |= forceRead(in) << BYTE_SHIFT;
                // skip file attributes
                forceSkip(in, FOUR);
                if ((flags & LARGE_FLAG) != 0) {
                    for (int i = 0; i < FOUR; ++i) {
                        size |= ((long) forceRead(in))
                            << (BYTE_SHIFT * (i + FOUR));
                    }
                    // skip unpacked size high bytes
                    forceSkip(in, FOUR);
                }
                byte[] name = readName(in, nameSize);
                String nameString;
                if ((flags & UNICODE_FLAG) == 0) {
                    nameString = new String(name, CP1251);
                } else {
                    nameString = decodeString(name);
                }
                handler.characters(nameString);
                handler.newline();
            }
            // skip rest of header
            long totalRead = in.pos() - pos;
            if (headerSize > totalRead) {
                forceSkip(in, headerSize - totalRead);
            }
            // skip packed file body
            forceSkip(in, size);
        }
    }

    private static String decodeString(final byte[] buf) {
        int zero = 0;
        for (; zero < buf.length; ++zero) {
            if (buf[zero] == 0) {
                break;
            }
        }
        if (zero == buf.length) {
            return new String(buf, StandardCharsets.UTF_8);
        } else {
            ++zero;
            char[] cbuf = new char[buf.length];
            int flags = 0;
            int bits = 0;
            int inpos = 0;
            int outpos = 0;
            int high = (buf[zero + inpos++] & BYTE_MASK) << BYTE_SHIFT;
            while (inpos + zero < buf.length) {
                if (bits == 0) {
                    flags = buf[zero + inpos++] & BYTE_MASK;
                    bits = BYTE_SHIFT;
                }
                switch ((flags >> FLAGS_SHIFT) & THREE) {
                    case 0:
                        cbuf[outpos++] =
                            (char) (buf[zero + inpos++] & BYTE_MASK);
                        break;

                    case 1:
                        cbuf[outpos++] =
                            (char) ((buf[zero + inpos++] & BYTE_MASK) + high);
                        break;
                    case 2:
                        cbuf[outpos++] =
                            (char) ((buf[zero + inpos] & BYTE_MASK)
                            + ((buf[zero + inpos + 1] & BYTE_MASK)
                                << BYTE_SHIFT));
                        inpos += 2;
                        break;

                    case THREE:
                        int len = buf[zero + inpos++] & BYTE_MASK;
                        if ((len & LENGTH_FLAG) == 0) {
                            len += 2;
                            while (len > 0) {
                                cbuf[outpos] =
                                    (char) (buf[outpos] & BYTE_MASK);
                                --len;
                                ++outpos;
                            }
                        } else {
                            int correction = buf[zero + inpos++] & BYTE_MASK;
                            len = (len & LOW_BITS_MASK) + 2;
                            while (len > 0) {
                                cbuf[outpos] =
                                    (char) ((((buf[outpos] & BYTE_MASK)
                                        + correction) & BYTE_MASK) + high);
                                --len;
                                ++outpos;
                            }
                        }
                        break;

                    default:
                        break;
                }
                flags <<= 2;
                bits -= 2;
            }
            return new String(cbuf, 0, outpos);
        }
    }

    private static void parseV5(
        final CountingInputStream in,
        final XHTMLContentHandler handler,
        final Metadata metadata)
        throws IOException, SAXException, TikaException
    {
        // skip zero. dunno what it means
        forceRead(in);
        while (true) {
            int crc = in.read();
            if (crc == -1) {
                break;
            }
            // skip rest of CRC32
            for (int i = 0; i <= 2; ++i) {
                forceRead(in);
            }
            long headerSize = read(in);
            long pos = in.pos();
            long type = read(in);
            long flags = read(in);
            if ((flags & EXTRA_AREA_FLAG) != 0) {
                // skip extra area size
                read(in);
            }
            long size = 0;
            if ((flags & DATA_FLAG) != 0) {
                size = read(in);
            }
            if (type == FILE_HEADER || type == SERVICE_HEADER) {
                long fileFlags = read(in);
                // skip unpacked size
                read(in);
                // skip attributes
                read(in);
                if ((fileFlags & MTIME_FLAG) != 0) {
                    // skip mtime
                    for (int i = 0; i < FOUR; ++i) {
                        forceRead(in);
                    }
                }
                if ((fileFlags & CRC32_FLAG) != 0) {
                    // skip data CRC32
                    for (int i = 0; i < FOUR; ++i) {
                        forceRead(in);
                    }
                }
                long compressionInfo = read(in);
                // skip host OS info
                read(in);
                long nameSize = read(in);
                byte[] name = readName(in, (int) nameSize);
                int zero = 0;
                for (; zero < name.length; ++zero) {
                    if (name[zero] == 0) {
                        break;
                    }
                }
                String nameString =
                    new String(name, 0, zero, StandardCharsets.UTF_8);
                // skip rest of header
                forceSkip(in, headerSize - (in.pos() - pos));
                if (type == FILE_HEADER) {
                    handler.characters(nameString);
                    handler.newline();
                    // skip packed file body
                    forceSkip(in, size);
                } else if (nameString.equals("CMT")
                    && ((compressionInfo & COMPRESSION_METHOD_MASK) == 0
                    && size > 0))
                {
                    byte[] comment = readName(in, (int) size);
                    for (zero = 0; zero < comment.length; ++zero) {
                        if (comment[zero] == 0) {
                            break;
                        }
                    }
                    String commentString = new String(
                        comment,
                        0,
                        zero,
                        StandardCharsets.UTF_8).trim();
                    if (!commentString.isEmpty()) {
                        metadata.add(
                            TikaCoreProperties.COMMENTS,
                            commentString);
                    }
                } else {
                    forceSkip(in, size);
                }
            } else {
                // skip rest of header
                forceSkip(in, headerSize - (in.pos() - pos));
                // skip data
                forceSkip(in, size);
            }
        }
    }

    private static int forceRead(final CountingInputStream in)
        throws IOException
    {
        int read = in.read();
        if (read == -1) {
            throw new EOFException("Unexpected eof at position " + in.pos());
        }
        return read;
    }

    // CSOFF: FinalParameters
    private static void forceSkip(final CountingInputStream in, long n)
        throws IOException
    {
        while (n > 0) {
            long skipped = in.skip(n);
            if (skipped == 0) {
                throw new EOFException(
                    "Unexpected eof while skip at position " + in.pos());
            } else {
                n -= skipped;
            }
        }
    }
    // CSON: FinalParameters

    private static long read(final CountingInputStream in) throws IOException {
        long value = 0;
        long n = 0;
        while (true) {
            int read = in.read();
            if (read == -1) {
                throw new EOFException(
                    "Unexpected eof while reading varint at position "
                    + in.pos());
            }
            if ((read & VARINT_CONTINUATION_FLAG) == 0) {
                value |= ((long) read) << (n * VARINT_BYTE_SHIFT);
                break;
            } else {
                value |= (((long) read) ^ VARINT_CONTINUATION_FLAG)
                    << (n++ * VARINT_BYTE_SHIFT);
            }
        }
        return value;
    }

    public static byte[] readName(
        final CountingInputStream in,
        final int nameSize)
        throws IOException, TikaException
    {
        if (nameSize > MAX_STRING_LENGTH) {
            throw new TikaException(
                "Requested length is " + nameSize
                + ", looks like broken archive");
        }
        byte[] name = new byte[nameSize];
        int pos = 0;
        while (pos < nameSize) {
            int read = in.read(name, pos, nameSize - pos);
            if (read == -1) {
                throw new EOFException(
                    "Read only " + pos
                    + " bytes from name, at position " + in.pos());
            }
            pos += read;
        }
        return name;
    }
}

