package ru.yandex.tikaite.parser;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.Set;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.rtf.RTFEmbObjHandler;
import org.apache.tika.parser.rtf.TextExtractor;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import ru.yandex.io.BufferedPushbackInputStream;
import ru.yandex.io.TaggedInputStream;

public enum RtfParser implements Parser {
    INSTANCE;

    private static final Set<MediaType> SUPPORTED_TYPES =
        Collections.singleton(MediaType.application("rtf"));
    private static final int MAX_OBJECT_KBYTES = 1024;

    @Override
    public Set<MediaType> getSupportedTypes(final ParseContext context) {
        return SUPPORTED_TYPES;
    }

    @Override
    public void parse(
        final InputStream is,
        final ContentHandler handler,
        final Metadata metadata,
        final ParseContext context)
        throws IOException, SAXException, TikaException
    {
        metadata.set(Metadata.CONTENT_TYPE, "application/rtf");
        TaggedInputStream tagged = new TaggedInputStream(is);
        BufferedPushbackInputStream in =
            new BufferedPushbackInputStream(tagged);
        try {
            XHTMLContentHandler xhtmlHandler =
                new XHTMLContentHandler(handler, metadata);
            RTFEmbObjHandler embObjHandler = new RTFEmbObjHandler(
                xhtmlHandler,
                metadata,
                context,
                MAX_OBJECT_KBYTES);
            TextExtractor textExtractor =
                new TextExtractor(xhtmlHandler, metadata, embObjHandler);
            textExtractor.setIgnoreListMarkup(false);
            textExtractor.extract(in);
        } catch (RuntimeException e) {
            throw new TikaException("Malformed input", e);
        } catch (IOException e) {
            tagged.throwIfCauseOf(e);
            throw new TikaException("Failed to parse rtf", e);
        }
    }
}

