package ru.yandex.msearch.jobs;

import java.io.File;
import java.io.PrintStream;
import java.io.IOException;

import java.text.ParseException;

import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;

import org.json.JSONObject;
import org.json.JSONArray;
import org.json.JSONException;

import ru.yandex.msearch.Config;
import ru.yandex.msearch.FieldConfig;
import ru.yandex.msearch.Index;
import ru.yandex.msearch.PrimaryKey;
import ru.yandex.msearch.Searcher;
import ru.yandex.msearch.Shard;
import ru.yandex.msearch.parallel.ParallelExec;
import ru.yandex.msearch.parallel.ParaWork;

import ru.yandex.search.prefix.Prefix;
import ru.yandex.search.prefix.PrefixParser;

public class ShardsRemoverJob extends PerShardJobBase {
    private static final int MAGIC_MOD = 65534;

    public ShardsRemoverJob(
        final File file,
        final JSONObject obj,
        final Index index)
        throws IOException
    {
        super(file, obj, index);
    }

    protected void doLoadValues() throws JSONException {
    }

    protected SingleShardJobBase createJobForShard(final int shardNo) {
        return new RemoveWork(
            index,
            shardNo,
            userShardStart,
            userShardEnd,
            shardingFields,
            1,
            this);
    }

    public String toString() {
        return "ShardsRemover_" + userShardStart + "-" + userShardEnd;
    }

    @Override
    public boolean equals(Object o) {
        if (o instanceof ShardsRemoverJob) {
            ShardsRemoverJob other = (ShardsRemoverJob)o;
            if (other.userShardStart == userShardStart
                &&  other.userShardEnd == userShardEnd)
            {
                return true;
            } else {
                return false;
            }
        } else {
            return false;
        }
    }

    @Override
    public int hashCode() {
        return userShardStart | userShardEnd << 16;
    }

    private static class RemoveWork extends SingleShardJobBase {
        private final PrefixParser prefixParser;

        public RemoveWork(
            final Index index,
            final int shardNo,
            final int userShardStart,
            final int userShardEnd,
            final Set<String> shardingFields,
            final int retryCount,
            final ShardsRemoverJob parent)
        {
            super(
                index,
                shardNo,
                userShardStart,
                userShardEnd,
                shardingFields,
                retryCount,
                parent);
            prefixParser = index.config().prefixParser();
        }

        void tryFlush() throws IOException {
            try {
                index.getShard(shardNo).flush(false, null);
                index.getShard(shardNo).doFlush(true);
            } catch (Exception e) {
                throw new IOException("Failed to flush shard <" + shardNo +">: "
                    + e.toString());
            }
        }

        private int processReader(final IndexReader reader) throws IOException {
            if (shardingFields.size() == 1) {
                return handleSimpleSharding(reader);
            } else {
                return handleMultiFieldSharding(reader);
            }
        }

        private int handleSimpleSharding(final IndexReader reader)
            throws IOException
        {
            Bits skipDocs = MultiFields.getDeletedDocs(reader);
            Fields fields = MultiFields.getFields(reader);

            int deletedCount = 0;

            if (fields == null) {
                if (logger.isLoggable(Level.INFO)) {
                    logger.info("ShardsRemover: (luceneShard=" + shardNo
                        + ", fromShard=" + userShardStart
                        + ", toShard=" + userShardEnd
                        + "): No fields available (empty index?)");
                }
                return deletedCount;
            }

            final String field = shardingFields.iterator().next();

            FieldConfig fieldConfig = config.fieldConfig(field);
            if (fieldConfig == null) {
                throw new IOException("ShardsRemover: (luceneShard=" + shardNo
                    + ", fromShard=" + userShardStart
                    + ", toShard=" + userShardEnd
                    + "): Unknown field: " + field);
            }
            Terms terms = fields.terms(field);
            if (terms == null) {
                if (logger.isLoggable(Level.INFO)) {
                    logger.info("ShardsRemover: (luceneShard=" + shardNo
                        + ", fromShard=" + userShardStart
                        + ", toShard=" + userShardEnd
                        + "): No field named: " + field);
                }
                return deletedCount;
            }
            TermsEnum te = terms.iterator();
            DocsEnum td = null;
            for (BytesRef term = te.next(); term != null; term = te.next()) {
                final String termText = term.utf8ToString();
                try {
                    final Prefix prefix = extractPrefix(termText, fieldConfig);
                    final long hash = prefix.hash();
                    if (
                        (hash % MAGIC_MOD >= userShardStart)
                        && (hash % MAGIC_MOD <= userShardEnd ))
                    {
                        try {
                            index.deleteTerm(prefix, new Term(field, termText));
                            deletedCount += te.docFreq();
                        } catch (Exception e) {
                            if (logger.isLoggable(Level.SEVERE)) {
                                logger.log(
                                    Level.SEVERE,
                                    "ShardsRemover: (luceneShard="
                                        + shardNo
                                        + ", fromShard=" + userShardStart
                                        + ", toShard=" + userShardEnd
                                        + "): Can't delete document with term="
                                        + termText + ", field=" +  field,
                                    e);
                            }
                        }
                    }
                } catch (Exception e) {
                    if (logger.isLoggable(Level.SEVERE)) {
                        logger.log(
                            Level.SEVERE,
                            "ShardsRemover: (luceneShard=" + shardNo
                                + ", fromShard=" + userShardStart
                                + ", toShard=" + userShardEnd
                                + "): Can't extract shard number from term="
                                + termText + ", field=" +  field,
                            e);
                    }
                }
            }
            return deletedCount;

        }

        private int handleMultiFieldSharding(final IndexReader reader)
            throws IOException
        {
            Bits skipDocs = MultiFields.getDeletedDocs(reader);
            Fields fields = MultiFields.getFields(reader);

            int deletedCount = 0;

            if (fields == null) {
                if (logger.isLoggable(Level.INFO)) {
                    logger.info("ShardsRemover: (luceneShard=" + shardNo
                        + ", fromShard=" + userShardStart
                        + ", toShard=" + userShardEnd
                        + "): No fields available (empty index?)");
                }
                return deletedCount;
            }

            BitVector deleteDocs = new BitVector(reader.maxDoc());
            for (int i = 0; i < reader.maxDoc(); i++) {
                deleteDocs.set(i);
            }

            //first pass: collect documents to delete
            for (String field : shardingFields) {
                FieldConfig fieldConfig = config.fieldConfig(field);
                if (fieldConfig == null) {
                    throw new IOException("ShardsRemover: (luceneShard=" + shardNo
                        + ", fromShard=" + userShardStart
                        + ", toShard=" + userShardEnd
                        + "): Unknown field: " + field);
                }
                Terms terms = fields.terms(field);
                if (terms == null) continue;
                TermsEnum te = terms.iterator();
                DocsEnum td = null;
                for (BytesRef term = te.next(); term != null; term = te.next()) {
                    final String termText = term.utf8ToString();
                    try {
                        final Prefix prefix =
                            extractPrefix(termText, fieldConfig);
                        final long hash = prefix.hash();
                        if (
                            (hash % MAGIC_MOD < userShardStart)
                            || (hash % MAGIC_MOD > userShardEnd ))
                        {
                            //good document's that should not be deleted
                            td = te.docs(skipDocs, td);
                            int docId;
                            while ((docId = td.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                deleteDocs.clear(docId);
                            }
                        }
                    } catch (Exception e) {
                        if (logger.isLoggable(Level.SEVERE)) {
                            logger.log(
                                Level.SEVERE,
                                "ShardsRemover: (luceneShard=" + shardNo
                                    + ", fromShard=" + userShardStart
                                    + ", toShard=" + userShardEnd
                                    + "): Can't extract shard number from term="
                                    + termText + ", field=" +  field,
                                e);
                        }
                    }

                }
            }

            BitVector alreadyDeleted = new BitVector(reader.maxDoc());
            Set<String> primaryKeyFields = config.primaryKey();
            HashMap<String, String> reusableFieldMap = new HashMap<>();
            for (String field : primaryKeyFields) {
                reusableFieldMap.put(field, null);
            }
            FieldSelector fs = new SetBasedFieldSelector(
                primaryKeyFields,
                Collections.emptySet());

            //second pass: delete marked documents
            for (String field : shardingFields) {
                FieldConfig fieldConfig = config.fieldConfig(field);
                if (fieldConfig == null) {
                    throw new IOException("ShardsRemover: (luceneShard=" + shardNo
                        + ", fromShard=" + userShardStart
                        + ", toShard=" + userShardEnd
                        + "): Unknown field: " + field);
                }
                Terms terms = fields.terms(field);
                if (terms == null) continue;
                TermsEnum te = terms.iterator();
                DocsEnum td = null;
                for (BytesRef term = te.next(); term != null; term = te.next()) {
                    final String termText = term.utf8ToString();
                    final Prefix prefix;
                    try {
                        prefix = extractPrefix(termText, fieldConfig);
                    } catch (Exception e) {
                        if (logger.isLoggable(Level.SEVERE)) {
                            logger.log(
                                Level.SEVERE,
                                "ShardsRemover: (luceneShard=" + shardNo
                                    + ", fromShard=" + userShardStart
                                    + ", toShard=" + userShardEnd
                                    + "): Can't extract shard number from term="
                                    + termText + ", field=" +  field,
                                e);
                        }
                        continue;
                    }
                    final long hash = prefix.hash();
                    if ((hash % MAGIC_MOD >= userShardStart)
                        && (hash % MAGIC_MOD <= userShardEnd ))
                    {
                        try {
                            td = te.docs(skipDocs, td);
                            int docId;
                            while ((docId = td.nextDoc())
                                != DocIdSetIterator.NO_MORE_DOCS)
                            {
                                if (deleteDocs.get(docId)) {
                                    final Document doc;
                                    try {
                                        doc = reader.document(docId, fs);
                                    } catch (Exception e) {
                                        if (logger.isLoggable(Level.SEVERE)) {
                                            logger.log(
                                                Level.SEVERE,
                                                "ShardsRemover: (luceneShard="
                                                    + shardNo
                                                    + ", fromShard="
                                                    + userShardStart
                                                    + ", toShard="
                                                    + userShardEnd
                                                    + "): Can't load document "
                                                    + "num=" + docId
                                                    + ", tempText=" + termText
                                                    + ", field="
                                                    +  field,
                                                e);
                                        }
                                        continue;
                                    }
                                    deleteDocument(doc, prefix,
                                        reusableFieldMap);
                                    deletedCount++;
                                }
                            }
                        } catch (Exception e) {
                            if (logger.isLoggable(Level.SEVERE)) {
                                logger.log(
                                    Level.SEVERE,
                                    "ShardsRemover: (luceneShard=" + shardNo
                                        + ", fromShard=" + userShardStart
                                        + ", toShard=" + userShardEnd
                                        + "): Can't iterate documents for for "
                                        + "term=" + termText + ", field="
                                        +  field,
                                    e);
                            }
                        }
                    }

                }
            }
            return deletedCount;
        }

        private void deleteDocument(
            final Document doc,
            final Prefix prefix,
            final HashMap<String, String> fieldMap)
            throws IOException, ParseException
        {
            PrimaryKey key = extractPrimaryKey(doc, prefix, fieldMap);
            index.deleteDocument(key);
        }

        private PrimaryKey extractPrimaryKey(
            final Document doc,
            final Prefix prefix,
            final HashMap<String, String> fieldMap)
            throws IOException
        {
            for (final Map.Entry<String, String> entry : fieldMap.entrySet()) {
                final String fieldName = entry.getKey();
                final String fieldValue = doc.get(fieldName);
                if (fieldValue == null) {
                    throw new IOException("Can't load value for primary key"
                        + " field <" + fieldName + ">");
                }
                entry.setValue(fieldValue);
            }
            return PrimaryKey.create(fieldMap, prefix, config);
        }

        private Prefix extractPrefix(
            final String term,
            final FieldConfig config)
            throws ParseException
        {
            final String prefixString;
            int sep = term.indexOf('#');
            if (sep == -1) {
                if (config.prefixed()) {
                    throw new ParseException("Can't parse prefixed term: "
                        + term + ": no '#' separator", 0);
                }
                prefixString = term;
            } else {
                prefixString = term.substring(0, sep);
            }
            return prefixParser.parse(prefixString);
        }

        @Override
        public void doJob() throws IOException {
            Searcher s = null;
            try {
                tryFlush();
                s = index.getShard(shardNo).getMultiSearcher();

                int deletedDocs = processReader(s.reader());

                if (logger.isLoggable(Level.INFO)) {
                    logger.info( "Marked <"+ deletedDocs +"> in shard <"
                        + shardNo + "> for deletion. Flushing...");
                }
                s.free();
                s = null;
                if (deletedDocs > 0) {
//                    tryFlush();
                    index.getShard(shardNo).expunge( false, null, 0, false );
//                    tryFlush();
                    if (logger.isLoggable(Level.INFO)) {
                        logger.info("Expunged <"+ deletedDocs +"> in shard <"
                            + shardNo + ">.");
                    }
                }
                //refresh searcher
//                index.getShard(shardNo).reopenShardReader();
//                index.getShard(shardNo).reopenMultiSearcher();
//                s = index.getShard(shardNo).getMultiSearcher();
//                s.free();
//                s = null;
            } finally {
                if (s != null) s.free();
            }
        }

        @Override
        public void onFail() {
        }
    }

}
