package ru.yandex.tikaite.server;

import java.io.IOException;
import java.io.InputStream;
import java.util.logging.Logger;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.http.HttpEntity;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.protocol.HttpContext;
import org.apache.http.protocol.HttpRequestHandler;
import org.apache.james.mime4j.util.MimeUtil;
import org.apache.tika.io.TikaInputStream;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import ru.yandex.function.GenericAutoCloseable;
import ru.yandex.http.server.sync.BaseHttpServer;
import ru.yandex.http.util.CharsetUtils;
import ru.yandex.http.util.HeadersParser;
import ru.yandex.http.util.SynchronizedHttpContext;
import ru.yandex.http.util.UnsupportedMediaTypeException;
import ru.yandex.io.LimitedInputStream;
import ru.yandex.io.StringBuilderWriter;
import ru.yandex.mail.mime.BodyDecoder;
import ru.yandex.parser.string.NonEmptyValidator;
import ru.yandex.parser.string.PositiveLongValidator;
import ru.yandex.parser.uri.CgiParams;
import ru.yandex.tikaite.util.DetectionResult;
import ru.yandex.tikaite.util.TextExtractOptions;
import ru.yandex.tikaite.util.TextExtractResult;
import ru.yandex.tikaite.util.TextExtractor;

public class TextHandler implements HttpRequestHandler {
    private static final String HID = "hid";

    private final Server server;

    public TextHandler(final Server server) {
        this.server = server;
    }

    @Override
    @SuppressWarnings("try")
    public void handle(
        final HttpRequest request,
        final HttpResponse response,
        final HttpContext context)
        throws HttpException, IOException
    {
        Logger logger = (Logger) context.getAttribute(BaseHttpServer.LOGGER);
        CgiParams params = new CgiParams(request);
        String stid = params.get("stid", NonEmptyValidator.INSTANCE);
        String query = "/get/" + stid + '?';
        String encoding = params.getString("encoding", MimeUtil.ENC_BINARY);
        String hid;
        if (encoding.equals("auto")) {
            hid = params.get(HID, NonEmptyValidator.INSTANCE);
            encoding = detectEncoding(query, hid, logger);
            if (encoding == null) {
                throw new UnsupportedMediaTypeException(
                    "Failed to find hid " + hid + " in meta");
            }
        } else {
            hid = params.get(HID, (String) null, NonEmptyValidator.INSTANCE);
        }
        if (hid == null) {
            query = query + "raw";
        } else {
            query = query + "gettype=part&part=" + hid;
        }
        StringBuilderWriter sw = new StringBuilderWriter();
        HttpClientContext clientContext =
            new HttpClientContext(new SynchronizedHttpContext());
        try (CloseableHttpResponse storageResponse =
            server.sendStorageRequest(query, logger, clientContext))
        {
            HttpEntity entity = storageResponse.getEntity();
            long contentLength = entity.getContentLength();
            TextExtractOptions options =
                DiskHandler.extractOptions(params, contentLength, server);
            DetectionResult dr =
                TextExtractor.INSTANCE.detectStreamType(
                    TikaInputStream.get(
                        BodyDecoder.INSTANCE.apply(
                            entity.getContent(),
                            encoding),
                        options.createTemporaryResources(),
                        contentLength),
                    options);
            try (GenericAutoCloseable<IOException> connectionTerminator =
                    DiskHandler.selectConnectionTerminator(
                        dr,
                        options,
                        clientContext))
            {
                if (dr.parser() == null) {
                    throw new UnsupportedMediaTypeException();
                }
                logger.info("Mimetype detected: " + dr.mediaType());
                TextExtractResult result =
                    TextExtractor.INSTANCE.extractText(dr, sw, options);
                if (result.cause() != null) {
                    throw new UnsupportedMediaTypeException(
                        "Failed to extract text",
                        result.cause());
                }
                int truncated = result.truncated();
                if (truncated != -1) {
                    logger.warning(
                        "Data exceeded length limit, truncated to: "
                        + truncated);
                    response.addHeader(
                        "Truncated",
                        Integer.toString(truncated));
                }
            }
        } catch (Throwable t) {
            throw Server.toHttpException(t);
        }
        response.setEntity(
            new StringEntity(
                sw.toString(),
                ContentType.TEXT_PLAIN.withCharset(
                    CharsetUtils.acceptedCharset(request))));
    }

    private static String findHid(final Element element, final String hid) {
        NodeList nodes = element.getElementsByTagName("part");
        String encoding = null;
        for (int i = 0; encoding == null && i < nodes.getLength(); ++i) {
            Node node = nodes.item(i);
            if (node.getNodeType() == Node.ELEMENT_NODE) {
                Element child = (Element) node;
                if (hid.equals(child.getAttribute("id"))) {
                    encoding = child.getAttribute(
                        "content_transfer_encoding");
                    if (encoding.isEmpty()) {
                        encoding = MimeUtil.ENC_BINARY;
                    }
                } else {
                    encoding = findHid(child, hid);
                }
            }
        }
        return encoding;
    }

    private String detectEncoding(
        final String query,
        final String hid,
        final Logger logger)
        throws HttpException
    {
        try (CloseableHttpResponse response =
                server.sendStorageRequest(query + "gettype=meta", logger);
            InputStream meta = new LimitedInputStream(
                response.getEntity().getContent(),
                new HeadersParser(response).get(
                    "X-Mulca-Server-Xml-Header-Size",
                    PositiveLongValidator.INSTANCE)))
        {
            Element root = DocumentBuilderFactory
                .newInstance()
                .newDocumentBuilder()
                .parse(meta)
                .getDocumentElement();
            root.normalize();
            return findHid(root, hid);
        } catch (ParserConfigurationException | SAXException e) {
            throw new UnsupportedMediaTypeException("Failed to parse meta", e);
        } catch (Throwable t) {
            throw Server.toHttpException(t);
        }
    }

    @Override
    public String toString() {
        return "https://wiki.yandex-team.ru/ps/tikaite#"
            + "izvlechenietekstaizattachejjifajjlov";
    }
}

