package ru.yandex.tikaite.srw;

import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.logging.Logger;

import cocaine.hpack.HeaderField;
import org.apache.http.Header;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpStatus;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.message.BasicHeader;
import org.apache.james.mime4j.stream.Field;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.xml.sax.ContentHandler;
import org.xml.sax.helpers.DefaultHandler;

import ru.yandex.base64.Base64Encoder;
import ru.yandex.client.cocaine.logging.CocaineLoggingService;
import ru.yandex.client.cocaine.unistorage.UnistorageService;
import ru.yandex.client.cocaine.worker.CocaineWorkerSession;
import ru.yandex.client.cocaine.worker.http.AbstractCocaineHttpRequestHandler;
import ru.yandex.client.cocaine.worker.http.CocaineDummyHttpRequestHandler;
import ru.yandex.http.util.CharsetUtils;
import ru.yandex.http.util.HeaderUtils;
import ru.yandex.http.util.UnsupportedMediaTypeException;
import ru.yandex.io.ByteArrayInputStreamFactory;
import ru.yandex.io.LimitedIOException;
import ru.yandex.io.LimitedWriter;
import ru.yandex.io.NullWriter;
import ru.yandex.io.StringBuilderWriter;
import ru.yandex.io.TrimmingWriter;
import ru.yandex.parser.html.BodyContentHandler;
import ru.yandex.parser.html.TextContentHandler;
import ru.yandex.parser.rfc2047.DefaultRfc2047DecodersProvider;
import ru.yandex.parser.rfc2047.Rfc2047Parser;
import ru.yandex.parser.string.NonEmptyValidator;
import ru.yandex.parser.uri.CgiParams;
import ru.yandex.tikaite.parser.HtmlParser;
import ru.yandex.tikaite.parser.TxtParser;
import ru.yandex.tikaite.server.DiskHandler;
import ru.yandex.tikaite.util.QuotesSeparatingContentHandler;
import ru.yandex.tikaite.util.QuotesSeparatingWriter;
import ru.yandex.tikaite.util.TextExtractOptions;
import ru.yandex.util.storage.DataExtractorConfigBuilder;
import ru.yandex.util.storage.ImmutableDataExtractorConfig;
import ru.yandex.util.storage.MailTextExtractor;
import ru.yandex.util.storage.StorageData;
import ru.yandex.util.storage.cocaine.CocaineStorageClient;

public class SrwTextHandler extends AbstractCocaineHttpRequestHandler {
    private static final DefaultHandler NULL_CONTENT_HANDLER =
        new DefaultHandler();
    private static final Header TRUNCATED =
        HeaderUtils.createHeader("Truncated", "true");
    private static final ImmutableDataExtractorConfig DATA_EXTRACTOR_CONFIG =
        new DataExtractorConfigBuilder().build();

    private final TikaiteSrw tikaiteSrw;
    private final UnistorageService unistorage;

    public SrwTextHandler(
        final CocaineLoggingService logging,
        final UnistorageService unistorage,
        final TikaiteSrw tikaiteSrw)
    {
        super(logging, 0L);
        this.unistorage = unistorage;
        this.tikaiteSrw = tikaiteSrw;
    }

    private static Header createSubjectHeader(final String subject) {
        Base64Encoder encoder = new Base64Encoder();
        encoder.process(subject.getBytes(StandardCharsets.UTF_8));
        return new BasicHeader("Subject-Base64", encoder.toString());
    }

    @Override
    protected void handle(
        final HttpRequest request,
        final List<HeaderField> headers,
        final CocaineWorkerSession session,
        final Logger logger)
        throws HttpException
    {
        Charset acceptedCharset = CharsetUtils.acceptedCharset(request);
        CgiParams params = new CgiParams(request);
        String stid = params.get("stid", NonEmptyValidator.INSTANCE);
        SubjectExtractor subjectExtractor = new SubjectExtractor();
        StorageData storageData =
            new CocaineStorageClient(
                unistorage,
                subjectExtractor,
                DATA_EXTRACTOR_CONFIG)
                .sendStorageRequest(stid, session, logger);
        String metainfo = storageData.metainfo();
        TextExtractOptions options =
            DiskHandler.extractOptions(params, -1L, tikaiteSrw);
        ContentType contentType = storageData.contentType();
        String mimetype = contentType.getMimeType();
        StringBuilderWriter sbw = new StringBuilderWriter();
        Writer writer =
            new TrimmingWriter(new LimitedWriter(sbw, options.writeLimit()));
        ContentHandler contentHandler;
        Parser parser;
        if ("text/plain".equals(mimetype)) {
            parser = TxtParser.INSTANCE;
            contentHandler = new TextContentHandler(
                new QuotesSeparatingWriter(NullWriter.INSTANCE, writer));
        } else {
            parser = HtmlParser.INSTANCE;
            contentHandler = new QuotesSeparatingContentHandler(
                NULL_CONTENT_HANDLER,
                new TextContentHandler(writer));
        }
        Metadata metadata = new Metadata();
        metadata.set(Metadata.CONTENT_TYPE, metainfo);

        boolean truncated = false;
        try {
            parser.parse(
                storageData.processWith(
                    ByteArrayInputStreamFactory.INSTANCE),
                new BodyContentHandler(contentHandler),
                metadata,
                new ParseContext());
        } catch (Exception e) {
            if (LimitedIOException.isLimitedIOException(e)) {
                truncated = true;
            } else {
                throw new UnsupportedMediaTypeException("Parse failed", e);
            }
        }
        List<Header> additionalHeaders;
        String subject = subjectExtractor.subject;
        if (subject == null || subject.isEmpty()) {
            if (truncated) {
                additionalHeaders = Collections.singletonList(TRUNCATED);
            } else {
                additionalHeaders = Collections.emptyList();
            }
        } else {
            if (truncated) {
                additionalHeaders = new ArrayList<>(2);
                additionalHeaders.add(createSubjectHeader(subject));
                additionalHeaders.add(TRUNCATED);
            } else {
                additionalHeaders =
                    Collections.singletonList(createSubjectHeader(subject));
            }
        }
        new CocaineDummyHttpRequestHandler(
            HttpStatus.SC_OK,
            new StringEntity(
                sbw.toString(),
                ContentType.TEXT_PLAIN.withCharset(acceptedCharset)),
            additionalHeaders)
            .handle(request, headers, session);
    }

    private static class SubjectExtractor extends MailTextExtractor {
        private String subject = null;

        @Override
        protected void processField(final String name, final Field field) {
            if (subject == null && "subject".equals(name)) {
                subject =
                    Rfc2047Parser.decode(
                        field.getBody(),
                        DefaultRfc2047DecodersProvider.INSTANCE)
                        .trim();
            }
        }

        @Override
        public boolean findFirstHtml() {
            return true;
        }
    }
}

