package ru.yandex.msearch;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.IOException;
import java.text.ParseException;
import java.util.Set;

import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.SingleSegmentFieldsReader;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.search.DocIdSetIterator;

import ru.yandex.msearch.config.DatabaseConfig;
import ru.yandex.search.prefix.PrefixParser;

public class SuidFilterIndexReader extends FilterIndexReader
{
    private static final int MAGIC_MOD = 65534;
    private BitVector dontCopyDocs;
    private final Set<String> shardingFields;
    private final DatabaseConfig config;
    private final PrefixParser prefixParser;
    private final Logger logger;

    public SuidFilterIndexReader(
        final IndexReader in,
        final int userShardStart,
        final int userShardEnd,
        final int outShards,
        final int outShard,
        final Set<String> shardingFields,
        final DatabaseConfig config,
        final Logger logger)
        throws IOException
    {
        super(in);
        this.shardingFields = shardingFields;
        this.config = config;
        this.logger = logger;
        prefixParser = config.prefixParser();
        createDeletedDocsBitSet(userShardStart, userShardEnd, outShards, outShard);
    }

    private void createDeletedDocsBitSet( int userShardStart, int userShardEnd, int outShards, int outShard ) throws IOException
    {
        if (logger.isLoggable(Level.INFO)) {
            logger.info("IndexDump: (luceneShard=" + outShard
                + ", fromShard=" + userShardStart
                + ", toShard=" + userShardEnd
                + ", shardingFields=" + shardingFields.toString() + "): "
                + " dumping segment: " + in);
        }
        Fields fields = null;
        if (in.getSequentialSubReaders() != null && in.getSequentialSubReaders().length > 0) {
            fields = MultiFields.getFields(in);
        } else {
            fields = in.fields();
        }
        if (fields == null) {
            return;
        }
        dontCopyDocs = new BitVector(in.maxDoc());
        //we are copying document that are belong to requested shards by deleting documents which are not
        //i.e. document that should not be copied is marked as deleted
        //document is marked as deleted only if all fields from @shardingFields are not belongs to requested shards
        //i.e. if any of field from @shardingFields is belongs to requested shard than document should not be marked
        //to do so we are marking all documents as deleted and then iterating through fields and if any field is
        //matching requested shard unmarking document

        for (int i = 0; i < in.maxDoc(); i++) {
            dontCopyDocs.set(i);
        }

        Bits allreadyDeleted = in.getDeletedDocs();
        for (String field : shardingFields) {
            FieldConfig fieldConfig = config.fieldConfig(field);
            if (fieldConfig == null) {
                throw new IOException("IndexDump: (luceneShard=" + outShard
                    + ", fromShard=" + userShardStart
                    + ", toShard=" + userShardEnd
                    + "): Unknown field: " + field);
            }
            Terms terms = fields.terms(field);
            if (terms == null) continue;
            TermsEnum te = terms.iterator(false);
            DocsEnum td = null;
            for (BytesRef term = te.next(); term != null; term = te.next()) {
                final String termText = term.utf8ToString();
                try {
                    long prefix = extractPrefix(termText, fieldConfig);
                    if (
                        (prefix % outShards == outShard)
                        && (prefix % MAGIC_MOD >= userShardStart)
                        && (prefix % MAGIC_MOD <= userShardEnd ))
                    {
                        //good document's that should be copied
                        td = te.docs(allreadyDeleted, td);
                        int docId;
                        while ((docId = td.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            dontCopyDocs.clear(docId);
                        }
                    }
                } catch (Exception e) {
                    if (logger.isLoggable(Level.SEVERE)) {
                        logger.log(
                            Level.SEVERE,
                            "IndexDump: (luceneShard=" + outShard
                                + ", fromShard=" + userShardStart
                                + ", toShard=" + userShardEnd
                                + "): Can't extract shard number from term="
                                + termText + ", field=" +  field,
                            e);
                    }
                }

            }
        }
        if (logger.isLoggable(Level.INFO)) {
            logger.info("IndexDump: (luceneShard=" + outShard
                + ", fromShard=" + userShardStart
                + ", toShard=" + userShardEnd
                + "): totalDocs=" + in.maxDoc()
                + ", docsToCopy=" + (in.maxDoc() - dontCopyDocs.count())
                + ", skippedDocs=" + dontCopyDocs.count());
        }
    }

    private long extractPrefix(final String term, final FieldConfig config)
        throws ParseException
    {
        final String prefixString;
        int sep = term.indexOf('#');
        if (sep == -1) {
            if (config.prefixed()) {
                throw new ParseException("Can't parse prefixed term: " + term
                    + ": no '#' separator", 0);
            }
            prefixString = term;
        } else {
            prefixString = term.substring(0, sep);
        }
        return prefixParser.parse(prefixString).hash();
    }

    @Override
    public int numDocs() {
        return in.maxDoc() - dontCopyDocs.count();
    }

    @Override
    public Bits getDeletedDocs()
    {
        return dontCopyDocs;
    }

    @Override
    public int numDeletedDocs()
    {
        return dontCopyDocs.count();
    }

    @Override
    public boolean hasDeletions()
    {
        return dontCopyDocs.count() > 0;
    }

    @Override
    protected void doClose() throws IOException
    {
    }
}
