package ru.yandex.tikaite.server;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.http.HttpConnection;
import org.apache.http.HttpEntity;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.MethodNotSupportedException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.EntityTemplate;
import org.apache.http.protocol.HttpContext;
import org.apache.http.protocol.HttpRequestHandler;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.Parser;

import ru.yandex.function.GenericAutoCloseable;
import ru.yandex.function.GenericAutoCloseableHolder;
import ru.yandex.http.server.sync.BaseHttpServer;
import ru.yandex.http.server.sync.JsonContentProducerWriter;
import ru.yandex.http.util.CharsetUtils;
import ru.yandex.http.util.SynchronizedHttpContext;
import ru.yandex.http.util.UnsupportedMediaTypeException;
import ru.yandex.http.util.server.HttpServerConfigBuilder;
import ru.yandex.io.CountingWriter;
import ru.yandex.io.GenericCloseableAdapter;
import ru.yandex.json.writer.JsonTypeExtractor;
import ru.yandex.json.writer.JsonValue;
import ru.yandex.json.writer.JsonWriterBase;
import ru.yandex.parser.string.IntegerMemorySizeParser;
import ru.yandex.parser.uri.CgiParams;
import ru.yandex.parser.uri.UriParser;
import ru.yandex.search.document.Document;
import ru.yandex.search.document.JsonDocument;
import ru.yandex.tikaite.config.ImmutableTextExtractorConfig;
import ru.yandex.tikaite.util.CommonFields;
import ru.yandex.tikaite.util.DetectionResult;
import ru.yandex.tikaite.util.ExtractionFinalizer;
import ru.yandex.tikaite.util.TextExtractOptions;
import ru.yandex.tikaite.util.TextExtractResult;
import ru.yandex.tikaite.util.TextExtractor;

public class DiskHandler implements HttpRequestHandler {
    private final Server server;

    public DiskHandler(final Server server) {
        this.server = server;
    }

    // CSOFF: ParameterNumber
    public static TextExtractOptions extractCommonOptions(
        final TextExtractOptions options,
        final CgiParams params,
        final TikaiteContext context)
        throws HttpException
    {
        options.parseDmarc(params.getBoolean("parse-dmarc", false));
        if (params.getBoolean(
                TextExtractOptions.Mode.ULTRA_FAST.cgiFlag(),
                false))
        {
            options.mode(TextExtractOptions.Mode.ULTRA_FAST);
        } else if (params.getBoolean(
            TextExtractOptions.Mode.FAST.cgiFlag(),
            false))
        {
            options.mode(TextExtractOptions.Mode.FAST);
        }
        if (params.getBoolean("sanitize-html", false)) {
            options.sanitizer(context.sanitizer());
        }
        if (params.getBoolean("filter-fields", false)) {
            options.fieldsFilter(context.fieldsFilter());
        }
        options.fieldsFilterOptions(
            params.getString("filter-fields-options", null));
        return options;
    }

    public static TextExtractOptions extractOptions(
        final CgiParams params,
        final long contentLength,
        final TikaiteContext context)
        throws HttpException
    {
        TextExtractOptions options = new TextExtractOptions();
        ImmutableTextExtractorConfig config = context.textExtractorConfig();
        options.writeLimit(
            params.get(
                "limit",
                config.textLengthLimit(),
                IntegerMemorySizeParser.INSTANCE));
        options.mimetypeHint(params.getString(CommonFields.MIMETYPE, null));
        if (contentLength >= 0L) {
            if (contentLength >= config.tmpFileLimit()) {
                options.parsingType(TextExtractOptions.ParsingType.STREAM);
            } else if (contentLength < config.memoryLimit()) {
                options.parsingType(TextExtractOptions.ParsingType.MEMORY);
            }
        }
        return extractCommonOptions(options, params, context);
    }
    // CSON: ParameterNumber

    public static GenericAutoCloseable<IOException> selectConnectionTerminator(
        final DetectionResult dr,
        final TextExtractOptions options,
        final HttpClientContext context)
    {
        HttpConnection conn = context.getConnection();
        if (conn != null
            && options.parsingType() == TextExtractOptions.ParsingType.STREAM)
        {
            return new ConnectionTerminator(conn);
        } else {
            return dr;
        }
    }

    @Override
    public void handle(
        final HttpRequest request,
        final HttpResponse response,
        final HttpContext context)
        throws HttpException, IOException
    {
        String method = request.getRequestLine().getMethod();
        if (!method.equals("GET")) {
            throw new MethodNotSupportedException(
                method + " method not supported");
        }
        UriParser uri = new UriParser(request.getRequestLine().getUri());
        CgiParams params = new CgiParams(uri.queryParser());
        Logger logger = (Logger) context.getAttribute(BaseHttpServer.LOGGER);
        HttpClientContext clientContext =
            new HttpClientContext(new SynchronizedHttpContext());
        try (GenericAutoCloseableHolder<
                IOException,
                GenericCloseableAdapter<CloseableHttpResponse>>
                storageResponse =
                    new GenericAutoCloseableHolder<>(
                        new GenericCloseableAdapter<>(
                            server.sendStorageRequest(
                                uri.rawPath() + "?raw",
                                logger,
                                clientContext))))
        {
            HttpEntity entity = storageResponse.get().get().getEntity();
            long contentLength = entity.getContentLength();
            TextExtractOptions options =
                extractOptions(params, contentLength, server);
            try (GenericAutoCloseableHolder<
                    IOException,
                    GenericCloseableAdapter<TikaInputStream>> in =
                        new GenericAutoCloseableHolder<>(
                            new GenericCloseableAdapter<>(
                                TikaInputStream.get(
                                    entity.getContent(),
                                    options.createTemporaryResources(),
                                    contentLength))))
            {
                storageResponse.release();
                DetectionResult dr = TextExtractor.INSTANCE.detectStreamType(
                    in.get().get(),
                    options);
                GenericAutoCloseable<IOException> connectionTerminator =
                    selectConnectionTerminator(dr, options, clientContext);
                if (dr.mediaType().equals(MediaType.OCTET_STREAM)) {
                    connectionTerminator.close();
                    throw new UnsupportedMediaTypeException();
                }
                logger.info(
                    "Mimetype detected: " + dr.mediaType()
                    + ", parser: " + dr.parser());
                Charset charset = CharsetUtils.acceptedCharset(request);
                EntityTemplate responseEntity = new EntityTemplate(
                    new JsonContentProducerWriter(
                        new TextProducer(
                            new ExtractionFinalizer(
                                dr,
                                connectionTerminator,
                                logger),
                            options),
                        JsonTypeExtractor.NORMAL.extract(params),
                        charset));
                responseEntity.setChunked(true);
                responseEntity.setContentType(
                    ContentType.APPLICATION_JSON
                        .withCharset(charset).toString());
                response.setStatusCode(HttpStatus.SC_OK);
                response.setEntity(responseEntity);
                in.release();
            }
        } catch (Throwable t) {
            throw Server.toHttpException(t);
        }
    }

    public static class TextProducer implements JsonValue {
        private final ExtractionFinalizer extractionFinalizer;
        private final TextExtractOptions options;

        public TextProducer(
            final ExtractionFinalizer extractionFinalizer,
            final TextExtractOptions options)
        {
            this.extractionFinalizer = extractionFinalizer;
            this.options = options;
        }

        @Override
        public void writeValue(final JsonWriterBase out) throws IOException {
            try (ExtractionFinalizer extractionFinalizer =
                    this.extractionFinalizer)
            {
                DetectionResult dr = extractionFinalizer.dr();
                Logger logger = extractionFinalizer.logger();
                Document document = new JsonDocument(
                    out,
                    options.fieldsFilter(),
                    options.fieldsFilterOptions());
                document.addField(
                    CommonFields.BUILT_DATE,
                    HttpServerConfigBuilder.BUILT_DATE);
                MediaType mediaType = dr.mediaType();
                Parser parser = dr.parser();
                if (parser == null
                    || !TextExtractor.INSTANCE.checkConstraints(
                        options,
                        parser))
                {
                    logger.info("No suitable parser found");
                    document.addField(CommonFields.PARSED, false);
                } else {
                    TextExtractResult result;
                    boolean empty;
                    try (CountingWriter bodyTextWriter =
                            new CountingWriter(
                                document.addField(CommonFields.BODY_TEXT)))
                    {
                        result = TextExtractor.INSTANCE.extractText(
                            dr,
                            bodyTextWriter,
                            options);
                        empty = bodyTextWriter.pos() == 0L;
                    }
                    int truncated = result.truncated();
                    if (truncated != -1) {
                        logger.warning(
                            "Data exceeded length limit, truncated to: "
                            + truncated);
                    }
                    document.addField(
                        CommonFields.PARSED,
                        !empty || result.size() > 1 || result.cause() == null);
                    result.metadata(document);
                    if (result.cause() != null) {
                        logger.log(
                            Level.INFO,
                            "Error occured during parsing",
                            result.cause());
                        document.addField(CommonFields.ERROR, result.cause());
                    }
                    mediaType = TextExtractor.INSTANCE.improveMimetype(
                        mediaType,
                        result.mimetype());
                }
                document.addField(
                    CommonFields.MIMETYPE,
                    mediaType.getBaseType().toString());
                document.close();
            }
        }
    }

    @Override
    public String toString() {
        return "https://wiki.yandex-team.ru/ps/tikaite#"
            + "izvlechenietekstovizfajjlovjandeks.diska";
    }
}

