package ru.yandex.search.mail.tupita;

import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import ru.yandex.dbfields.MailIndexFields;
import ru.yandex.json.writer.JsonWriter;
import ru.yandex.json.xpath.JsonUnexpectedTokenException;
import ru.yandex.json.xpath.ValueUtils;
import ru.yandex.search.mail.kamaji.KamajiIndexer;
import ru.yandex.util.string.StringUtils;

public class AllDataInTwoPartsDocumentProvider {
    protected static final String DOCS = "docs";
    protected static final String MIMETYPE = "mimetype";
    protected static final String SUBMESSAGE = "message/rfc822";

    public List<String> apply(
        final Object tikaiteResult,
        final JsonWriter writer,
        final TupitaIndexationContext context)
        throws IOException, JsonUnexpectedTokenException
    {
        List<String> urls = new ArrayList<>();

        Map<?, ?> json = ValueUtils.asMap(tikaiteResult);
        List<?> docs = ValueUtils.asList(json.remove(DOCS));

        Map<String, Set<String>> luceneDoc = new LinkedHashMap<>();
        // join all parts, first do with fast part

        for (Map.Entry<String, String> entry
            : context.meta().toFastDocMap().entrySet())
        {
            luceneDoc.computeIfAbsent(
                entry.getKey(),
                (k) -> new LinkedHashSet<>())
                .add(entry.getValue());
        }

        // add slow part
        Map<String, String> fields = context.meta().toMap();

        Map<String, Map<?, ?>> docsMap = new LinkedHashMap<>(docs.size());

        Set<String> subMessagesPrefixes = new LinkedHashSet<>();
        for (Object docObject : docs) {
            Map<?, ?> doc = ValueUtils.asMap(docObject);
            String hid = ValueUtils.asString(doc.get(MailIndexFields.HID));
            if (SUBMESSAGE.equalsIgnoreCase(
                ValueUtils.asStringOrNull(
                    doc.getOrDefault(
                        MIMETYPE,
                        null))))
            {
                subMessagesPrefixes.add(hid);
            }

            docsMap.put(hid, doc);
        }

        if (subMessagesPrefixes.size() != 0) {
            context.logger().info(
                "SubMessages found " + subMessagesPrefixes.size());
            for (Map.Entry<String, Map<?, ?>> docEntry: docsMap.entrySet()) {
                for (String prefix: subMessagesPrefixes) {
                    if (docEntry.getKey().startsWith(prefix)) {
                        docEntry.getValue().remove("headers");
                        break;
                    }
                }
            }
        }
        for (Map.Entry<String, Map<?, ?>> docEntry: docsMap.entrySet()) {
            for (Map.Entry<String, String> entry : fields.entrySet()) {
                luceneDoc.computeIfAbsent(
                    entry.getKey(),
                    (k) -> new LinkedHashSet<>())
                    .add(entry.getValue());
            }

            Map<?, ?> doc = docEntry.getValue();
            // Mid will be already taken from meta
            // We will write our own url
            doc.remove(MailIndexFields.URL);

            for (Map.Entry<?, ?> entry : doc.entrySet()) {
                String key = ValueUtils.asString(entry.getKey());
                if (!fields.containsKey(key)) {
                    String value = entry.getValue().toString();
                    luceneDoc.computeIfAbsent(
                        ValueUtils.asString(key),
                        (k) -> new LinkedHashSet<>())
                        .add(value);
                }
            }
            doc.clear();
        }

        luceneDoc.remove(MailIndexFields.HID);
        luceneDoc.remove(MailIndexFields.URL);

        writer.startObject();

        writer.key("prefix");
        writer.value(context.prefix());

        // Nothing should left in the `result' object by now
        // Let's write preserve fields and other stuff
        KamajiIndexer.writePreserveFields(writer, context);

        writer.key(DOCS);
        writer.startArray();

        for (int i = 0; i < 2; i++) {
            writer.startObject();
            String url = context.url(String.valueOf(i));
            urls.add(url);

            writer.key(MailIndexFields.URL);
            writer.value(url);
            writer.key(MailIndexFields.HID);
            writer.value(String.valueOf(i));

            for (Map.Entry<String, Set<String>> entry: luceneDoc.entrySet()) {
                writer.key(entry.getKey());
                writer.value(StringUtils.join(entry.getValue(), '\n'));
            }

            writer.endObject();
        }

        writer.endArray();
        writer.endObject();

        return urls;
    }
}
