package ru.yandex.mail.so.factors.extractors;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.http.concurrent.FutureCallback;

import ru.yandex.json.dom.JsonBadCastException;
import ru.yandex.json.dom.JsonList;
import ru.yandex.json.dom.JsonMap;
import ru.yandex.json.dom.JsonObject;
import ru.yandex.json.parser.JsonException;
import ru.yandex.mail.so.factors.SoFactor;
import ru.yandex.mail.so.factors.SoFunctionInputs;
import ru.yandex.mail.so.factors.types.SoFactorType;
import ru.yandex.mail.so.factors.types.TikaiteDocSoFactorType;
import ru.yandex.mail.so.factors.types.TikaiteDocsSoFactorType;
import ru.yandex.parser.config.ConfigException;
import ru.yandex.search.document.mail.MailMetaInfo;
import ru.yandex.util.string.StringUtils;

public enum TextPartExtractor implements SoFactorsExtractor {
    INSTANCE;

    private static final List<SoFactorType<?>> INPUTS =
        Collections.singletonList(TikaiteDocsSoFactorType.TIKAITE_DOCS);
    private static final List<SoFactorType<?>> OUTPUTS =
        Collections.singletonList(TikaiteDocSoFactorType.TIKAITE_DOC);

    @Override
    public void close() {
    }

    @Override
    public List<SoFactorType<?>> inputs() {
        return INPUTS;
    }

    @Override
    public List<SoFactorType<?>> outputs() {
        return OUTPUTS;
    }

    public static JsonMap extractTextPart(final JsonList docs)
        throws JsonBadCastException
    {
        List<String> nestedMessagePrefixes = new ArrayList<>();
        int prefixesCount = 0;
        JsonMap textPart = JsonMap.EMPTY;
        for (JsonObject docObject : docs) {
            JsonMap doc = docObject.asMap();
            String hid = doc.getOrNull(MailMetaInfo.HID);
            if (hid == null) {
                continue;
            }
            String contentType = doc.getOrNull(MailMetaInfo.CONTENT_TYPE);
            if ("message/rfc822".equals(contentType)) {
                nestedMessagePrefixes.add(
                        StringUtils.concat(hid, '.'));
                ++prefixesCount;
                continue;
            }
            if (doc.getOrNull(MailMetaInfo.ATTACHSIZE) == null
                && (contentType == null
                    || contentType.isEmpty()
                    || contentType.equals("text/html")
                    || contentType.equals("text/plain")))
            {
                boolean nested = false;
                for (int i = 0; i < prefixesCount; ++i) {
                    if (hid.startsWith(nestedMessagePrefixes.get(i))) {
                        nested = true;
                        break;
                    }
                }
                if (!nested) {
                    textPart = doc;
                }
            }
        }

        return textPart;
    }

    @Override
    public void extract(
        final SoFactorsExtractorContext context,
        final SoFunctionInputs inputs,
        final FutureCallback<? super List<SoFactor<?>>> callback)
    {
        JsonList docs = inputs.get(0, TikaiteDocsSoFactorType.TIKAITE_DOCS);
        if (docs == null || docs.isEmpty()) {
            callback.completed(NULL_RESULT);
        } else {
            try {
                JsonMap textPart = extractTextPart(docs);
                if (textPart.isEmpty()) {
                    callback.completed(NULL_RESULT);
                } else {
                    callback.completed(
                        Collections.singletonList(
                            TikaiteDocSoFactorType.TIKAITE_DOC.createFactor(
                                textPart)));
                }
            } catch (JsonException e) {
                callback.failed(e);
            }
        }
    }

    @Override
    public void registerInternals(final SoFactorsExtractorsRegistry registry)
        throws ConfigException
    {
        registry.typesRegistry().registerSoFactorType(
            TikaiteDocsSoFactorType.TIKAITE_DOCS);
        registry.typesRegistry().registerSoFactorType(
            TikaiteDocSoFactorType.TIKAITE_DOC);
    }
}

