package ru.yandex.mail.so.factors.extractors;

import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.function.Predicate;

import org.apache.http.concurrent.FutureCallback;
import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.parser.MimeStreamParser;

import ru.yandex.function.ByteArrayProcessable;
import ru.yandex.function.EmptyRunnable;
import ru.yandex.function.NullConsumer;
import ru.yandex.json.dom.JsonList;
import ru.yandex.json.dom.JsonMap;
import ru.yandex.json.dom.PositionSavingContainerFactory;
import ru.yandex.json.parser.JsonException;
import ru.yandex.mail.so.factors.SoFactor;
import ru.yandex.mail.so.factors.SoFunctionInputs;
import ru.yandex.mail.so.factors.types.BinarySoFactorType;
import ru.yandex.mail.so.factors.types.MailMetaSoFactorType;
import ru.yandex.mail.so.factors.types.SoFactorType;
import ru.yandex.mail.so.factors.types.TikaiteDocsSoFactorType;
import ru.yandex.parser.config.ConfigException;
import ru.yandex.parser.config.IniConfig;
import ru.yandex.parser.mail.errors.ErrorInfo;
import ru.yandex.parser.mail.received.ReceivedChainParser;
import ru.yandex.parser.string.NonNegativeIntegerValidator;
import ru.yandex.sanitizer2.PageHeaderException;
import ru.yandex.sanitizer2.SanitizingHandler;
import ru.yandex.sanitizer2.config.ImmutableSanitizingConfig;
import ru.yandex.search.document.mail.MailMetaInfo;
import ru.yandex.search.document.mail.SafeMimeTokenStream;
import ru.yandex.tikaite.config.ImmutableTextExtractorConfig;
import ru.yandex.tikaite.config.TextExtractorConfigBuilder;
import ru.yandex.tikaite.mimeparser.HandlerManager;
import ru.yandex.tikaite.mimeparser.MailDocumentCollectorFactory;
import ru.yandex.tikaite.mimeparser.ManagedHandler;
import ru.yandex.tikaite.mimeparser.RootMessageHandler;
import ru.yandex.tikaite.util.TextExtractOptions;

public class TikaiteExtractor implements SoFactorsExtractor {
    private static final List<SoFactorType<?>> INPUTS =
        Collections.singletonList(BinarySoFactorType.RAW_MAIL);
    private static final List<SoFactorType<?>> OUTPUTS =
        Arrays.asList(
            MailMetaSoFactorType.MAIL_META,
            TikaiteDocsSoFactorType.TIKAITE_DOCS);
    private static final List<SoFactor<?>> NULL_RESULT =
        Arrays.asList(null, null);

    private final ImmutableTextExtractorConfig textExtractorConfig;
    private final SanitizingHandler sanitizer;
    private final Predicate<? super InetAddress> yandexNets;
    private final int maxAttachmentsToParse;

    public TikaiteExtractor(final IniConfig config)
        throws ConfigException, IOException
    {
        textExtractorConfig =
            new TextExtractorConfigBuilder(
                new TextExtractorConfigBuilder(config).build())
                .build();
        yandexNets = textExtractorConfig
            .receivedChainParserConfig()
            .yandexNetsConfig()
            .createIpChecker();
        ImmutableSanitizingConfig sanitizingConfig =
            textExtractorConfig.sanitizingConfig();
        if (sanitizingConfig == null) {
            sanitizer = null;
        } else {
            try {
                sanitizer = new SanitizingHandler(
                    sanitizingConfig,
                    NullConsumer.instance(),
                    EmptyRunnable.INSTANCE);
            } catch (PageHeaderException e) {
                throw new ConfigException(e);
            }
        }
        maxAttachmentsToParse = config.get(
            "max-attachments-to-parse",
            NonNegativeIntegerValidator.INSTANCE);
    }

    @Override
    public void close() {
        if (sanitizer != null) {
            sanitizer.close();
        }
    }

    @Override
    public List<SoFactorType<?>> inputs() {
        return INPUTS;
    }

    @Override
    public List<SoFactorType<?>> outputs() {
        return OUTPUTS;
    }

    @Override
    public void extract(
        final SoFactorsExtractorContext context,
        final SoFunctionInputs inputs,
        final FutureCallback<? super List<SoFactor<?>>> callback)
    {
        ByteArrayProcessable rawMail =
            inputs.get(0, BinarySoFactorType.RAW_MAIL);
        if (rawMail == null) {
            callback.completed(NULL_RESULT);
            return;
        }
        JsonMap mimeRoot =
            new JsonMap(PositionSavingContainerFactory.INSTANCE);
        MailMetaInfo meta = new MailMetaInfo(
            textExtractorConfig.headersLengthLimit(),
            textExtractorConfig.headerLengthLimit(),
            yandexNets);
        JsonList docs = null;
        try {
            SafeMimeTokenStream mimeTokenStream =
                SafeMimeTokenStream.parseMeta(rawMail.content(), meta);
            ManagedHandler handler = new RootMessageHandler(
                meta,
                new TextExtractOptions()
                    .mode(TextExtractOptions.Mode.SO2)
                    .sanitizer(sanitizer)
                    .writeLimit(textExtractorConfig.mailLengthLimit())
                    .headersLimit(textExtractorConfig.headersLengthLimit())
                    .headerLimit(textExtractorConfig.headerLengthLimit())
                    .maxHidDepth(textExtractorConfig.maxHidDepth())
                    .maxParts(textExtractorConfig.maxParts())
                    .maxPartLength(textExtractorConfig.maxPartLength())
                    .mimeTokenStream(mimeTokenStream),
                new MailDocumentCollectorFactory(
                    new DomDocumentCollectorFactory(
                        mimeRoot,
                        maxAttachmentsToParse)));
            HandlerManager manager = new HandlerManager();
            manager.push(handler);
            MimeStreamParser parser = new MimeStreamParser(mimeTokenStream);
            parser.setContentHandler(manager);
            parser.parse(null);
            docs = mimeRoot.getListOrNull(MailMetaInfo.DOCS);
            ReceivedChainParser receivedChainParser =
                meta.receivedChainParser();
            if (receivedChainParser != null) {
                ErrorInfo errorInfo = receivedChainParser.errorInfo();
                if (errorInfo != null) {
                    context.errorsConsumer().accept(errorInfo);
                }
            }
        } catch (IOException | JsonException | MimeException e) {
            context.errorsConsumer().accept(
                new ErrorInfo(
                    ErrorInfo.Scope.MALFORMED_INPUT,
                    ErrorInfo.Type.SYNTAX_ERROR,
                    "RAW_MAIL parse failed",
                    e));
        }
        List<SoFactor<?>> factors = new ArrayList<>(2);
        factors.add(MailMetaSoFactorType.MAIL_META.createFactor(meta));
        if (docs == null) {
            factors.add(null);
        } else {
            factors.add(
                TikaiteDocsSoFactorType.TIKAITE_DOCS.createFactor(docs));
        }
        callback.completed(factors);
    }

    @Override
    public void registerInternals(final SoFactorsExtractorsRegistry registry)
        throws ConfigException
    {
        TikaiteExtractorFactory.INSTANCE.registerInternals(registry);
    }
}

