package ru.yandex.tikaite.srw;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.CodingErrorAction;
import java.util.List;
import java.util.logging.Logger;

import cocaine.hpack.HeaderField;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpStatus;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.entity.ContentType;

import ru.yandex.charset.Decoder;
import ru.yandex.charset.Encoder;
import ru.yandex.client.cocaine.logging.CocaineLoggingService;
import ru.yandex.client.cocaine.unistorage.UnistorageService;
import ru.yandex.client.cocaine.worker.CocaineWorkerSession;
import ru.yandex.client.cocaine.worker.http.AbstractCocaineHttpRequestHandler;
import ru.yandex.client.cocaine.worker.http.CocaineDummyHttpRequestHandler;
import ru.yandex.http.util.ByteArrayEntityFactory;
import ru.yandex.http.util.CharsetUtils;
import ru.yandex.http.util.ServiceUnavailableException;
import ru.yandex.parser.string.NonEmptyValidator;
import ru.yandex.parser.uri.ScanningCgiParams;
import ru.yandex.sanitizer2.ByteArrayHtmlCollector;
import ru.yandex.sanitizer2.HtmlNode;
import ru.yandex.sanitizer2.HtmlPrinter;
import ru.yandex.sanitizer2.IdentityAttrPostProcessor;
import ru.yandex.sanitizer2.IdentityCssPostProcessor;
import ru.yandex.sanitizer2.NullUrlCollector;
import ru.yandex.sanitizer2.SanitizingHandler;
import ru.yandex.tikaite.util.TextExtractor;
import ru.yandex.util.storage.DataExtractorConfigBuilder;
import ru.yandex.util.storage.MailTextExtractor;
import ru.yandex.util.storage.StorageData;
import ru.yandex.util.storage.cocaine.CocaineStorageClient;

public class SrwSanitizerHandler extends AbstractCocaineHttpRequestHandler {
    private final CocaineStorageClient storageClient;
    private final SanitizingHandler sanitizer;

    public SrwSanitizerHandler(
        final CocaineLoggingService logging,
        final UnistorageService unistorage,
        final SanitizingHandler sanitizer)
    {
        super(logging, 0L);
        storageClient = new CocaineStorageClient(
            unistorage,
            MailTextExtractor.INSTANCE,
            new DataExtractorConfigBuilder().build());
        this.sanitizer = sanitizer;
    }

    @Override
    protected void handle(
        final HttpRequest request,
        final List<HeaderField> headers,
        final CocaineWorkerSession session,
        final Logger logger)
        throws HttpException
    {
        Charset acceptedCharset = CharsetUtils.acceptedCharset(request);
        String stid = new ScanningCgiParams(request).get(
            "stid",
            NonEmptyValidator.INSTANCE);
        StorageData storageData =
            storageClient.sendStorageRequest(stid, session, logger);
        ContentType contentType = storageData.contentType();
        String mimetype = contentType.getMimeType();
        ByteArrayEntity entity;
        if (ContentType.TEXT_HTML.getMimeType().equals(mimetype)) {
            Charset charset = contentType.getCharset();
            if (charset == null) {
                charset = TextExtractor.DEFAULT_EMAIL_CHARSET;
            }
            try {
                Decoder decoder =
                    new Decoder(
                        charset.newDecoder()
                            .onMalformedInput(CodingErrorAction.REPLACE)
                            .onUnmappableCharacter(CodingErrorAction.REPLACE));
                storageData.processWith(decoder);
                storageData = null;
                int len = decoder.length();
                HtmlNode root = sanitizer.sanitize(decoder, false);
                decoder = null;
                ByteArrayHtmlCollector htmlCollector =
                    new ByteArrayHtmlCollector(
                        new Encoder(
                            acceptedCharset.newEncoder()
                                .onMalformedInput(
                                    CodingErrorAction.REPLACE)
                                .onUnmappableCharacter(
                                    CodingErrorAction.REPLACE)),
                        len);
                root.accept(
                    new HtmlPrinter<>(
                        sanitizer.config(),
                        htmlCollector,
                        NullUrlCollector.INSTANCE,
                        IdentityAttrPostProcessor.INSTANCE,
                        IdentityCssPostProcessor.INSTANCE));
                root = null;
                entity = htmlCollector.data().processWith(
                    ByteArrayEntityFactory.INSTANCE);
                entity.setContentType(
                    ContentType.TEXT_HTML
                        .withCharset(acceptedCharset).toString());
            } catch (IOException e) {
                // Impossible
                throw new ServiceUnavailableException(e);
            }
        } else {
            entity = storageData.processWith(ByteArrayEntityFactory.INSTANCE);
            entity.setContentType(contentType.toString());
        }
        new CocaineDummyHttpRequestHandler(HttpStatus.SC_OK, entity)
            .handle(request, headers, session);
    }
}

