package org.apache.lucene.index;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.MultiBits;
//import org.apache.lucene.util.packed_native.*;
import org.apache.lucene.util.packed.*;

import ru.yandex.collection.ChunkedIntList;
import ru.yandex.collection.IntList;

import ru.yandex.msearch.util.JavaAllocator;

import ru.yandex.util.unicode.UnicodeUtil;

/**
 * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
 * into a single Segment.  After adding the appropriate readers, call the merge method to combine the 
 * segments.
 * 
 * @see #merge
 * @see #add
 */
final class SegmentMerger {
    private static final JavaAllocator allocator =
        JavaAllocator.get("SegmentMergerDocIdStreams");
//    private static final NativeMemoryAllocator docMapAllocator =
//        NativeMemoryAllocator.get("SegmentMergerDocMaps");

  /** norms header placeholder */
  static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; 
  
  private Directory directory;
  private String segment;
  private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;

  private List<IndexReader> readers = new ArrayList<IndexReader>();
  private final FieldInfos fieldInfos;
  
  private int mergedDocs;

  private final MergeState.CheckAbort checkAbort;

  /** Maximum number of contiguous documents to bulk-copy
      when merging stored fields */
  private final static int MAX_RAW_MERGE_DOCS = 4192;
  
  private final CodecProvider codecs;
  private Codec codec;
  private SegmentWriteState segmentWriteState;

  private final PayloadProcessorProvider payloadProcessorProvider;

  private final int fieldsWriterBufferSize;
  private final Set<String> storedFields;
  private final Set<String> indexedFields;

  SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos, int fieldsWriterBufferSize, final Set<String> storedFields, final Set<String> indexedFields) {
    this.payloadProcessorProvider = payloadProcessorProvider;
    directory = dir;
    this.codecs = codecs;
    this.fieldInfos = fieldInfos;
    segment = name;
    if (merge != null) {
      checkAbort = new MergeState.CheckAbort(merge, directory);
    } else {
      checkAbort = new MergeState.CheckAbort(null, null) {
        @Override
        public void work(double units) throws MergeAbortedException {
          // do nothing
        }
      };
    }
    this.termIndexInterval = termIndexInterval;
    this.fieldsWriterBufferSize = fieldsWriterBufferSize;
    this.storedFields = storedFields;
    this.indexedFields = indexedFields;
  }

  public FieldInfos fieldInfos() {
    return fieldInfos;
  }

  /**
   * Add an IndexReader to the collection of readers that are to be merged
   * @param reader
   */
  final void add(IndexReader reader) {
    ReaderUtil.gatherSubReaders(readers, reader);
  }

  /**
   * Merges the readers specified by the {@link #add} method into the directory passed to the constructor
   * @return The number of documents that were merged
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
  final int merge() throws CorruptIndexException, IOException {
    // NOTE: it's important to add calls to
    // checkAbort.work(...) if you make any changes to this
    // method that will spend alot of time.  The frequency
    // of this check impacts how long
    // IndexWriter.close(false) takes to actually stop the
    // threads.
    String dbg = "Merger<" + directory.toString() + ">: merging ";
    for (IndexReader reader : readers) {
        dbg += reader.toString();
        dbg += " ";
    }
    dbg += " to " + segment;
    System.err.println(dbg);
    mergedDocs = mergeFields();
    mergeTerms();
    mergeNorms();

    if (fieldInfos.hasVectors())
      mergeVectors();

    return mergedDocs;
  }

  final Collection<String> createCompoundFile(String fileName, final SegmentInfo info)
          throws IOException {

    // Now merge all added files
    Collection<String> files = info.files();
    CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
    for (String file : files) {
      cfsWriter.addFile(file);
    }
    
    // Perform the merge
    cfsWriter.close();
   
    return files;
  }

  private static void addIndexed(IndexReader reader, FieldInfos fInfos,
      Collection<String> names, boolean storeTermVectors,
      boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
      boolean storePayloads, boolean omitTFAndPositions)
      throws IOException {
    for (String field : names) {
      fInfos.add(field, true, storeTermVectors,
          storePositionWithTermVector, storeOffsetWithTermVector, !reader
              .hasNorms(field), storePayloads, omitTFAndPositions);
    }
  }

  private SegmentReader[] matchingSegmentReaders;
  private int[] rawDocLengths;
  private int[] rawDocLengths2;
  private int matchedCount;

  public int getMatchedSubReaderCount() {
    return matchedCount;
  }

  private void setMatchingSegmentReaders() {
    // If the i'th reader is a SegmentReader and has
    // identical fieldName -> number mapping, then this
    // array will be non-null at position i:
    int numReaders = readers.size();
    matchingSegmentReaders = new SegmentReader[numReaders];

    // If this reader is a SegmentReader, and all of its
    // field name -> number mappings match the "merged"
    // FieldInfos, then we can do a bulk copy of the
    // stored fields:
    for (int i = 0; i < numReaders; i++) {
      IndexReader reader = readers.get(i);
      if (reader instanceof SegmentReader) {
        SegmentReader segmentReader = (SegmentReader) reader;
        boolean same = true;
        FieldInfos segmentFieldInfos = segmentReader.fieldInfos();
        int numFieldInfos = segmentFieldInfos.size();
        for (int j = 0; same && j < numFieldInfos; j++) {
          same = fieldInfos.fieldName(j).equals(segmentFieldInfos.fieldName(j));
        }
        if (same) {
          matchingSegmentReaders[i] = segmentReader;
          matchedCount++;
        }
      }
    }

    // Used for bulk-reading raw bytes for stored fields
    rawDocLengths = new int[MAX_RAW_MERGE_DOCS];
    rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS];
  }

  /**
   * 
   * @return The number of documents in all of the readers
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
  private int mergeFields() throws CorruptIndexException, IOException {

    for (IndexReader reader : readers) {
      if (reader instanceof SegmentReader) {
        SegmentReader segmentReader = (SegmentReader) reader;
        FieldInfos readerFieldInfos = segmentReader.fieldInfos();
        int numReaderFieldInfos = readerFieldInfos.size();
        for (int j = 0; j < numReaderFieldInfos; j++) {
          fieldInfos.add(readerFieldInfos.fieldInfo(j));
        }
      } else if (reader instanceof PerFieldMergingIndexReader) {
        FieldInfos readerFieldInfos = ((PerFieldMergingIndexReader)reader).getFieldsWriter().getFieldInfos();
        int numReaderFieldInfos = readerFieldInfos.size();
        for (int j = 0; j < numReaderFieldInfos; j++) {
          fieldInfos.add(readerFieldInfos.fieldInfo(j));
        }
      } else {
        addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
        addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
        addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
        addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
        addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
        addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
        addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false);
        fieldInfos.add(reader.getFieldNames(FieldOption.UNINDEXED), false);
      }
    }
    final SegmentCodecs codecInfo = SegmentCodecs.build(fieldInfos, this.codecs);
    fieldInfos.write(directory, segment + ".fnm");

    int docCount = 0;

    setMatchingSegmentReaders();

    Codec defCodec = this.codecs.lookup(this.codecs.getDefaultFieldCodec());

//    final boolean groupDocs = false;
    final boolean groupDocs = defCodec.groupFieldsWriter();
//        && !(directory instanceof ru.yandex.msearch.PrintStreamDirectory);
/*    final int fieldsFormat;
    if (groupDocs) {
        fieldsFormat = FieldsWriter.FORMAT_COMPRESSED;
    } else {
        fieldsFormat = FieldsWriter.FORMAT_CURRENT;
    }
*/
    final FieldsWriter fieldsWriter =
        defCodec.fieldsWriter(
            true,
            directory,
            segment,
            fieldInfos);

    if (groupDocs) {
        setMatchingSegmentReaders();
        try {
            docCount = mergeFieldsWithGrouping(
                fieldsWriter,
                readers,
                defCodec.getGroupFields());
        } finally {
          fieldsWriter.close();
        }

    } else {
        setMatchingSegmentReaders();
        boolean success = false;
        try {
          int idx = 0;
          for (IndexReader reader : readers) {
            if (reader.hasDeletions() || defCodec.groupFieldsWriter()) {
              docCount += copyFieldsWithDeletions(fieldsWriter, reader);
            } else {
              docCount += copyFieldsNoDeletions(fieldsWriter, reader);
            }
          }
          success = true;
        } finally {
          fieldsWriter.close();
//          if (success && groupDocs) {
//            fieldsWriter.rebuildAndCompressStore2(fieldsWriterBufferSize);
//          }
        }
    }

    segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null);
    return docCount;
  }

    static class FieldGroupWithReader
        implements Comparable<FieldGroupWithReader>
    {
//        private final BytesRef group;
        private final int readerIdx;
        private final int maxGroups = (1 << 20) - 1;
        private int hashCode;
        private int groupHash;

        public FieldGroupWithReader(final int groupHash, final int readerIdx) {
            this.readerIdx = readerIdx;
            this.groupHash = groupHash;
            hash();
        }

        public void hash() {
            hashCode = groupHash + (readerIdx << 20);
        }

        public void rehash(final BytesRef group) {
            //Save memory (use 1kk hash code instead of full strings)
            groupHash = group.hashCode() & maxGroups;
            hash();
        }

        @Override
        public int hashCode() {
            return hashCode;
        }

        @Override
        public boolean equals(final Object o) {
            FieldGroupWithReader other = (FieldGroupWithReader)o;
            return other.groupHash == groupHash && other.readerIdx == readerIdx;
        }

        @Override
        public int compareTo(final FieldGroupWithReader other) {
            int groupCmp = Integer.compare(groupHash, other.groupHash);
            if (groupCmp == 0) {
                return Integer.compare(readerIdx, other.readerIdx);
            } else {
                return groupCmp;
            }
        }

        @Override
        public FieldGroupWithReader clone() {
            return new FieldGroupWithReader(
                groupHash,
                readerIdx);
        }

        @Override
        public String toString() {
            return groupHash + ":" + readerIdx;
        }
    }

    private class ReaderContext {
        private IndexReader reader = null;
        private FieldsReader fieldsReader = null;
        private final Set<String> storedFields;
        private final FieldSelector selector;
        private int docBase = 0;
        public ReaderContext(
            final Set<String> storedFields,
            final IndexReader reader,
            final int docBase)
        {
            this.storedFields = storedFields;
            this.reader = reader;
            this.docBase = docBase;
            selector = new SetBasedFieldSelector(
                storedFields,
                Collections.<String>emptySet());
            if (reader instanceof SingleSegmentFieldsReader) {
                fieldsReader =
                    ((SingleSegmentFieldsReader)reader).getFieldsReader();
                if (fieldsReader != null) {
//                    fieldsReader.directIO(false);
                    fieldsReader.directIO(true);
                }
            }
        }

        public void copyDocs(
            final DocIdStreamReader docIdReader,
            final FieldsWriter writer,
            final PackedInts.Mutable index)
            throws IOException
        {
            if (fieldsReader != null) {
                copyFields(docIdReader, writer, index);
            } else {
                copyLuceneDocs(docIdReader, writer, index);
            }
        }

        public void copyFields(
            final DocIdStreamReader docIdReader,
            final FieldsWriter writer,
            final PackedInts.Mutable index)
            throws IOException
        {
            int docs = 0;
            while (!docIdReader.eof()) {
                final int docId = docIdReader.readVInt();
                final int oldDocId = docIdReader.readVInt() + docId;
                final long dataPos = writer.getDataStreamPosition();
                final int newDocId = docBase + docId;
                fieldsReader.writeDocument(oldDocId, newDocId, writer, storedFields, true);
//                System.err.println("Merger.COPYDOCS2: id=" + docId + ", basedId="
//                    + (docId + docBase) + ", oldDocId=" + oldDocId
//                    + ", reader=" + reader + ", pos=" + dataPos + ", nextPos="
//                    + writer.getDataStreamPosition());
                index.set(newDocId, dataPos);
                checkAbort.work(300);
                docs++;
            }
        }

        public void copyLuceneDocs(
            final DocIdStreamReader docIdReader,
            final FieldsWriter writer,
            final PackedInts.Mutable index)
            throws IOException
        {
            while (!docIdReader.eof()) {
                final int docId = docIdReader.readVInt();
                final int oldDocId = docIdReader.readVInt() + docId;
//                System.err.println("docId=" + docId + ", oldDocId=" + oldDocId);
                final long dataPos = writer.getDataStreamPosition();
                Document doc = reader.document(oldDocId, selector);
//                System.err.println("Merger.COPYDOCS: id=" + docId + ", basedId="
//                    + (docId + docBase) + ", oldDocId=" + oldDocId
//                    + ", reader=" + reader + ", doc=" + doc + ", pos=" + dataPos);
                writer.addDocumentNoIndex(docBase + docId, doc);
                index.set(docBase + docId, dataPos);
                checkAbort.work(300);
            }
        }

        public void close() {
            if (fieldsReader != null && reader != null) {
                fieldsReader.directIO(false);
                ((SingleSegmentFieldsReader)reader)
                    .freeFieldsReader(fieldsReader);
                fieldsReader = null;
                reader = null;
            }
        }

        private long fillGroupDocFieldMap(
            final Set<String> groupFields,
            final int readerIdx,
            final Map<FieldGroupWithReader, DocIdStream> groupDocFieldMap,
            final FieldsWriter writer)
            throws IOException
        {
            final BytesRef groupRef = new BytesRef();
            final FieldGroupWithReader reusableKey =
                new FieldGroupWithReader(0, readerIdx);
            final Bits deletedDocs = reader.getDeletedDocs();
            if (fieldsReader != null) {
                return fieldsReader.fillGroupDocFieldMap(
                    groupFields,
                    reusableKey,
                    groupDocFieldMap,
                    deletedDocs,
                    storedFields,
                    writer.getFieldInfos(),
                    checkAbort);
            } else {
                long totalFieldsSize = 0;
                FieldSelector selector = new SetBasedFieldSelector(
                    storedFields,
                    Collections.<String>emptySet());
                final FieldInfos fieldInfos = writer.getFieldInfos();
                final int maxDoc = reader.maxDoc();
                int docId = 0;
                StringBuilder groupConcat = new StringBuilder();
                String[] groupFieldsArray = groupFields.toArray(new String[0]);
                for (int i = 0; i < maxDoc; i++) {
                    checkAbort.work(300);
                    if (deletedDocs != null && deletedDocs.get(i)) {
                        continue;
                    }
                    Document doc = reader.document(i, selector);
                    long docSize = calcLuceneDocumentSize(doc, fieldInfos);
//                    System.err.println("calcLuceneDocSize: " + docSize);
                    totalFieldsSize += docSize;
                    groupConcat.setLength(0);
                    for (final String groupField: groupFieldsArray) {
                        final String groupFieldValue = doc.get(groupField);
                        if (groupFieldValue != null) {
                            groupConcat.append(groupFieldValue);
                        }
                        groupConcat.append('#');
                    }
                    groupRef.copy(groupConcat);
                    reusableKey.rehash(groupRef);
                    DocIdStream docIdStream = groupDocFieldMap.get(reusableKey);
                    if (docIdStream == null) {
                        docIdStream = new DocIdStream();
                        groupDocFieldMap.put(reusableKey.clone(), docIdStream);
                    }
                    docIdStream.addDocId(docId);
                    docIdStream.addDocId(i - docId);
                    docId++;
                }
                return totalFieldsSize;
            }
        }
    }

    private static final class DocIdStreamReader {
        private DocIdStream docIdStream;
        private BytesRef stream;
        private int currentStream;
        private int offset;
        private int streamCount;

        public void reset(final DocIdStream docIdStream) {
            this.docIdStream = docIdStream;
            currentStream = 0;
            offset = 0;
            stream = docIdStream.streams.get(currentStream++);
            streamCount = docIdStream.streams.size();
        }

        public boolean eof() {
            if (offset == stream.length
                && currentStream == streamCount)
            {
                return true;
            }
            return false;
        }

        public byte readByte() throws IOException {
            if (offset == stream.length) {
                if (currentStream == streamCount) {
                    throw new IOException("Read past EOF");
                }
                stream = docIdStream.streams.get(currentStream++);
                offset = 0;
            }
            return stream.bytes[offset++];
        }

        public int readVInt() throws IOException {
            byte b = readByte();
            int i = b & 0x7F;
            for (int shift = 7; b < 0; shift += 7) {
                b = readByte();
                i |= (b & 0x7F) << shift;
            }
            return i;
        }

        public long readVLong() throws IOException {
            byte b = readByte();
            long i = b & 0x7fL;
            for (int shift = 7; b < 0; shift += 7) {
                b = readByte();
                i |= (b & 0x7fL) << shift;
            }
            return i;
        }
    }

    //write once, read once stream
    static class DocIdStream {
        private static final int INITIAL_BLOCK_SIZE = 16;
        private static final int MAX_BLOCK_SIZE = 1024;
        protected ArrayList<BytesRef> streams;
        private BytesRef currentStream;

        public DocIdStream() {
            streams = new ArrayList<BytesRef>();
            currentStream = new BytesRef(INITIAL_BLOCK_SIZE);
            streams.add(currentStream);
        }

        public void addDocId(final int docId) {
            writeVInt(docId);
        }

        private void writeVInt(int value) {
            while ((value & ~0x7f) != 0) {
                writeByte(((byte)((value & 0x7f) | 0x80)));
                value >>>= 7;
            }
            writeByte((byte)value);
        }

        private void writeByte(final byte b) {
            if (currentStream.length == currentStream.bytes.length) {
                if (currentStream.bytes.length >= MAX_BLOCK_SIZE) {
//                    streams = Arrays.copyOf(streams, streams.length + 1);
                    currentStream =
                        new BytesRef(allocator.alloc(INITIAL_BLOCK_SIZE), 0, 0);
                    streams.add(currentStream);

                } else {
                    currentStream.bytes = allocator.realloc(
                        currentStream.bytes,
                        currentStream.length << 1);
                }
            }
            currentStream.bytes[currentStream.length++] = b;
        }

        public void close() {
            for (BytesRef ref : streams) {
                allocator.free(ref.bytes);
            }
        }
    }

    private int mergeFieldsWithGrouping(
        final FieldsWriter fieldsWriter,
        final List<IndexReader> readers,
        final Set<String> groupFields)
        throws IOException
    {
        System.err.println("SegmentMerger.mergeFieldsWithGrouping: "
            + groupFields);
        final HashMap<FieldGroupWithReader, DocIdStream> groupDocFieldMap =
            new HashMap<>();
        final int[] readersDocBase = new int[readers.size()];
        final ReaderContext[] readerContext = new ReaderContext[readers.size()];
        int docCount = 0;
        int idx = 0;
        long totalFieldsSize = 0;
        boolean success = false;
        try {
            for (IndexReader reader : readers) {
                final ReaderContext ctx = new ReaderContext(
                    storedFields,
                    reader,
                    docCount);
                readerContext[idx] = ctx;
                totalFieldsSize +=
                    ctx.fillGroupDocFieldMap(
                        groupFields,
                        idx,
                        groupDocFieldMap,
                        fieldsWriter);
                readersDocBase[idx] = docCount;
                System.err.println("reader=" + reader + ", numDocs=" + reader.numDocs());
                docCount += reader.numDocs();
                idx++;
            }
            success = true;
        } finally {
            if (!success) {
                for (DocIdStream docIdStream : groupDocFieldMap.values()) {
                    docIdStream.close();
                }
                for (int i = 0; i < readerContext.length; i++) {
                    if (readerContext[i] != null) {
//                        try {
                            readerContext[i].close();
//                        } catch (IOException ign) {
//                            ign.printStackTrace();
//                        }
                    }
                }
            }
        }
        ArrayList<FieldGroupWithReader> groups =
            new ArrayList<>(groupDocFieldMap.keySet());
        Collections.sort(groups);
        final DocIdStreamReader streamReader = new DocIdStreamReader();
        PackedInts.Mutable tempIndex =
            PackedInts.getMutable(
                docCount,
                PackedInts.bitsRequired(totalFieldsSize),
                allocator);
        try {
            for (FieldGroupWithReader gwr : groups) {
                final DocIdStream docIdStream = groupDocFieldMap.get(gwr);
                streamReader.reset(docIdStream);
                final int readerIdx = gwr.readerIdx;
                final ReaderContext ctx = readerContext[readerIdx];
                ctx.copyDocs(streamReader, fieldsWriter, tempIndex);
            }
            System.err.println("TotalFieldSize=" + totalFieldsSize
                + ", writed=" + fieldsWriter.getDataStreamPosition());
            if (totalFieldsSize != fieldsWriter.getDataStreamPosition()) {
                throw new CorruptIndexException(
                    "Actual field data size != calculated size: "
                    + fieldsWriter.getDataStreamPosition()
                    + " != " + totalFieldsSize);
            }
            fieldsWriter.writeIndex(tempIndex);
        } finally {
            for (DocIdStream docIdStream : groupDocFieldMap.values()) {
                docIdStream.close();
            }
            for (int i = 0; i < readerContext.length; i++) {
                if (readerContext[i] != null) {
//                    try {
                        readerContext[i].close();
//                    } catch (IOException ign) {
//                        ign.printStackTrace();
//                    }
                }
            }
            tempIndex.close();
        }
        return docCount;
    }

    private int calcLuceneDocumentSize(
        final Document doc,
        final FieldInfos fieldInfos)
    {
        int size = 0;
        int storedCount = 0;
        List<Fieldable> fields = doc.getFields();
        for (Fieldable field : fields) {
            if (field.isStored()) {
                size += fieldSize(fieldInfos.fieldInfo(field.name()), field);
                storedCount++;
            }
        }
        size += vIntSize(storedCount);
        return size;
    }

    private int fieldSize(FieldInfo fi, Fieldable field) {
        int size = vIntSize(fi.number) + 1; //+1 for bits
        if (field.isBinary()) {
            final int len = field.getBinaryLength();
            size += vIntSize(len);
            size += len;
        } else {
            final int utf8Size = UnicodeUtil.utf8Length(field.stringValue());
            size += vIntSize(utf8Size);
            size += utf8Size;
        }
        return size;
    }

    private int vIntSize(int value) {
        int size = 1;
        while ((value & ~0x7F) != 0) {
            size++;
            value >>>= 7;
        }
        return size;
    }



  private int copyFieldsWithDeletions(final FieldsWriter fieldsWriter, final IndexReader reader)
    throws IOException, MergeAbortedException, CorruptIndexException {
    int docCount = 0;
    final int maxDoc = reader.maxDoc();
    Bits delDocs = reader.getDeletedDocs();
    if( delDocs == null ) delDocs = new Bits.MatchNoBits( 0 );
    if (reader instanceof PerFieldMergingIndexReader) {
        StandardFieldsWriter writer = 
            (StandardFieldsWriter) (((PerFieldMergingIndexReader)reader).getFieldsWriter());
        if (writer == null) {
            return 0;
        }
        boolean same = true;
        FieldInfos segmentFieldInfos = writer.getFieldInfos();
        int numFieldInfos = segmentFieldInfos.size();
        for (int j = 0; same && j < numFieldInfos; j++) {
            same = fieldInfos.fieldName(j).equals(segmentFieldInfos.fieldName(j));
        }
        if (!same) {
            return copyThruDocuments(reader, fieldsWriter, maxDoc, delDocs);
        } else {
            for (int j = 0; j < maxDoc;) {
                if (delDocs.get(j)) {
                // skip deleted docs
                    ++j;
                    continue;
                }
                // We can optimize this case (doing a bulk byte copy) since the field 
                // numbers are identical
                int start = j, numDocs = 0;
                do {
                    j++;
                    numDocs++;
                    if (j >= maxDoc) break;
                    if (delDocs.get(j)) {
                        j++;
                        break;
                    }
                } while(numDocs < MAX_RAW_MERGE_DOCS);

                IndexInput stream = writer.rawDocs(rawDocLengths, start, numDocs);
                fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs);
                docCount += numDocs;
                checkAbort.work(300 * numDocs);
            }
        }
    } else {
        return copyThruDocuments(reader, fieldsWriter, maxDoc, delDocs);
    }
    return docCount;
  }

  private final int copyThruDocuments(final IndexReader reader,
    final FieldsWriter fieldsWriter, final int maxDoc, final Bits delDocs)
    throws IOException, MergeAbortedException, CorruptIndexException
  {
    int docCount = 0;
//    if (false) {
    if (reader instanceof SingleSegmentFieldsReader) {
        FieldsReader fieldsReader =
            ((SingleSegmentFieldsReader)reader).getFieldsReader();
        if (fieldsReader == null) {
            docCount = copyThruLuceneDocuments(
                reader,
                fieldsWriter,
                maxDoc,
                delDocs);
        } else {
            try {
                for (int j = 0; j < maxDoc; j++) {
                    if (delDocs.get(j)) {
                        // skip deleted docs
                        continue;
                    }
                    try {
                        fieldsReader.writeDocument(
                            j,
                            docCount,
                            fieldsWriter,
                            storedFields);
                    } catch (Throwable t) {
                        t.printStackTrace();
                        throw t;
                    }
                    docCount++;
                    checkAbort.work(300);
                }
            } finally {
//                ((SingleSegmentFieldsReader)reader)
//                    .freeFieldsReader(fieldsReader);
            }
        }
    } else {
        docCount = copyThruLuceneDocuments(
            reader,
            fieldsWriter,
            maxDoc,
            delDocs);
    }
    return docCount;
  }

    private final int copyThruLuceneDocuments(
        final IndexReader reader,
        final FieldsWriter fieldsWriter,
        final int maxDoc,
        final Bits delDocs)
        throws CorruptIndexException, IOException, MergeAbortedException
    {
        System.err.println("MERGE: CopyThru LUCENE Documents: " + reader.getClass().getName());
        int docCount = 0;
        SetBasedFieldSelector selector = new SetBasedFieldSelector(
            storedFields,
            Collections.<String>emptySet());
        for (int j = 0; j < maxDoc; j++) {
            if (delDocs.get(j)) {
                // skip deleted docs
                continue;
            }
            // NOTE: it's very important to first assign to doc then pass it to
            // termVectorsWriter.addAllDocVectors; see LUCENE-1282
            Document doc = reader.document(j, selector);
            fieldsWriter.addDocument(docCount, doc);
            docCount++;
            checkAbort.work(300);
        }
        return docCount;
    }

    private final int copyThruLuceneDocumentsNoDeletions(
        final IndexReader reader,
        final FieldsWriter fieldsWriter)
        throws CorruptIndexException, IOException, MergeAbortedException
    {
        int docCount = 0;
        final int maxDoc = reader.maxDoc();
        SetBasedFieldSelector selector = new SetBasedFieldSelector(
            storedFields,
            Collections.<String>emptySet());
        for (; docCount < maxDoc; docCount++) {
          // NOTE: it's very important to first assign to doc then pass it to
          // termVectorsWriter.addAllDocVectors; see LUCENE-1282
          Document doc = reader.document(docCount, selector);
          fieldsWriter.addDocument(docCount, doc);
          checkAbort.work(300);
        }
        return docCount;
    }

  private int copyFieldsNoDeletions(final FieldsWriter fieldsWriter, final IndexReader reader)
    throws IOException, MergeAbortedException, CorruptIndexException {
    int docCount = 0;
    if (reader instanceof SingleSegmentFieldsReader) {
        FieldsReader fieldsReader = ((SingleSegmentFieldsReader)reader).getFieldsReader();
        if (fieldsReader == null) {
            docCount = copyThruLuceneDocumentsNoDeletions(reader, fieldsWriter);
        } else {
            try {
                final int maxDoc = reader.maxDoc();
                for (int j = 0; j < maxDoc; j++) {
                    try {
                        fieldsReader.writeDocument(
                            j,
                            docCount,
                            fieldsWriter,
                            storedFields);
                    } catch (Throwable t) {
                        t.printStackTrace();
                        throw t;
                    }
                    docCount++;
                    checkAbort.work(300);
                }
            } finally {
//                ((SingleSegmentFieldsReader)reader)
//                    .freeFieldsReader(fieldsReader);
            }
        }
    } else {
        docCount = copyThruLuceneDocumentsNoDeletions(reader, fieldsWriter);
    }
    return docCount;
  }

  /**
   * Merge the TermVectors from each of the segments into the new one.
   * @throws IOException
   */
  private final void mergeVectors() throws IOException {
    TermVectorsWriter termVectorsWriter = 
      new TermVectorsWriter(directory, segment, fieldInfos);

    try {
      int idx = 0;
      for (final IndexReader reader : readers) {
        final SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
        TermVectorsReader matchingVectorsReader = null;
        if (matchingSegmentReader != null) {
          TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();

          // If the TV* files are an older format then they cannot read raw docs:
          if (vectorsReader != null && vectorsReader.canReadRawDocs()) {
            matchingVectorsReader = vectorsReader;
          }
        }
        if (reader.hasDeletions()) {
          copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
        } else {
          copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
          
        }
      }
    } finally {
      termVectorsWriter.close();
    }

    final String fileName = IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
    final long tvxSize = directory.fileLength(fileName);

    if (4+((long) mergedDocs)*16 != tvxSize)
      // This is most likely a bug in Sun JRE 1.6.0_04/_05;
      // we detect that the bug has struck, here, and
      // throw an exception to prevent the corruption from
      // entering the index.  See LUCENE-1282 for
      // details.
      throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + " file=" + fileName + " file exists?=" + directory.fileExists(fileName) + "; now aborting this merge to prevent index corruption");
  }

  private void copyVectorsWithDeletions(final TermVectorsWriter termVectorsWriter,
                                        final TermVectorsReader matchingVectorsReader,
                                        final IndexReader reader)
    throws IOException, MergeAbortedException {
    final int maxDoc = reader.maxDoc();
    final Bits delDocs = reader.getDeletedDocs();
    if (matchingVectorsReader != null) {
      // We can bulk-copy because the fieldInfos are "congruent"
      for (int docNum = 0; docNum < maxDoc;) {
        if (delDocs.get(docNum)) {
          // skip deleted docs
          ++docNum;
          continue;
        }
        // We can optimize this case (doing a bulk byte copy) since the field 
        // numbers are identical
        int start = docNum, numDocs = 0;
        do {
          docNum++;
          numDocs++;
          if (docNum >= maxDoc) break;
          if (delDocs.get(docNum)) {
            docNum++;
            break;
          }
        } while(numDocs < MAX_RAW_MERGE_DOCS);
        
        matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
        termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
        checkAbort.work(300 * numDocs);
      }
    } else {
      for (int docNum = 0; docNum < maxDoc; docNum++) {
        if (delDocs.get(docNum)) {
          // skip deleted docs
          continue;
        }
        
        // NOTE: it's very important to first assign to vectors then pass it to
        // termVectorsWriter.addAllDocVectors; see LUCENE-1282
        TermFreqVector[] vectors = reader.getTermFreqVectors(docNum);
        termVectorsWriter.addAllDocVectors(vectors);
        checkAbort.work(300);
      }
    }
  }
  
  private void copyVectorsNoDeletions(final TermVectorsWriter termVectorsWriter,
                                      final TermVectorsReader matchingVectorsReader,
                                      final IndexReader reader)
      throws IOException, MergeAbortedException {
    final int maxDoc = reader.maxDoc();
    if (matchingVectorsReader != null) {
      // We can bulk-copy because the fieldInfos are "congruent"
      int docCount = 0;
      while (docCount < maxDoc) {
        int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
        matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, docCount, len);
        termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
        docCount += len;
        checkAbort.work(300 * len);
      }
    } else {
      for (int docNum = 0; docNum < maxDoc; docNum++) {
        // NOTE: it's very important to first assign to vectors then pass it to
        // termVectorsWriter.addAllDocVectors; see LUCENE-1282
        TermFreqVector[] vectors = reader.getTermFreqVectors(docNum);
        termVectorsWriter.addAllDocVectors(vectors);
        checkAbort.work(300);
      }
    }
  }

  SegmentCodecs getSegmentCodecs() {
    assert segmentWriteState != null;
    return segmentWriteState.segmentCodecs;
  }

  private final void mergeTerms() throws CorruptIndexException, IOException {

    // Let CodecProvider decide which codec will be used to write
    // the new segment:
    
    int docBase = 0;

    final List<Fields> fields = new ArrayList<Fields>();
    final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
    final List<Bits> bits = new ArrayList<Bits>();
    final IntList bitsStarts = new IntList();

    for(IndexReader r : readers) {
      final Fields f = r.fields();
      final int maxDoc = r.maxDoc();
      if (f != null) {
        slices.add(new ReaderUtil.Slice(docBase, maxDoc, fields.size()));
        fields.add(new IndexedFields(f, indexedFields));
        bits.add(r.getDeletedDocs());
        bitsStarts.addInt(docBase);
      }
//      System.err.println("MERGE TERMS: reader=" + r + ", docBase=" + docBase);
      docBase += maxDoc;
    }

    bitsStarts.addInt(docBase);

    // we may gather more readers than mergeState.readerCount
    mergeState = new MergeState();
    mergeState.readers = readers;
    mergeState.readerCount = readers.size();
    mergeState.fieldInfos = fieldInfos;
    mergeState.mergedDocCount = mergedDocs;
    
    // Remap docIDs
    mergeState.delCounts = new int[mergeState.readerCount];
    mergeState.docMaps = new ChunkedIntList[mergeState.readerCount];
    mergeState.docBase = new int[mergeState.readerCount];
    mergeState.hasPayloadProcessorProvider = payloadProcessorProvider != null;
    mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[mergeState.readerCount];
    mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[mergeState.readerCount];
    mergeState.checkAbort = checkAbort;

    docBase = 0;
    int inputDocBase = 0;

    for(int i=0;i<mergeState.readerCount;i++) {

      final IndexReader reader = readers.get(i);

      mergeState.delCounts[i] = reader.numDeletedDocs();
      mergeState.docBase[i] = docBase;
      docBase += reader.numDocs();
      inputDocBase += reader.maxDoc();
      if (mergeState.delCounts[i] != 0) {
        int delCount = 0;
        final Bits delDocs = reader.getDeletedDocs();
        assert delDocs != null;
        final int maxDoc = reader.maxDoc();
        final ChunkedIntList docMap = mergeState.docMaps[i] =
            new ChunkedIntList(maxDoc, true);
//        doc
//            docMapAllocator.allocInt(maxDoc);
        int newDocID = 0;
        for(int j=0;j<maxDoc;j++) {
          if (delDocs.get(j)) {
            docMap.setInt(j, -1);
            delCount++;  // only for assert
          } else {
            docMap.setInt(j, newDocID++);
          }
        }
        assert delCount == mergeState.delCounts[i]: "reader delCount=" + mergeState.delCounts[i] + " vs recomputed delCount=" + delCount;
      }
      
      if (payloadProcessorProvider != null) {
        mergeState.dirPayloadProcessor[i] = payloadProcessorProvider.getDirProcessor(reader.directory());
      }
    }
    codec = segmentWriteState.segmentCodecs.codec();
    final FieldsConsumer consumer = codec.fieldsConsumer(segmentWriteState);

    // NOTE: this is silly, yet, necessary -- we create a
    // MultiBits as our skip docs only to have it broken
    // apart when we step through the docs enums in
    // MultiDocsEnum.
    mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
    
    try {
      consumer.merge(mergeState,
                     new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
                                     slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)));
    } finally {
      consumer.close();
    }
  }

  private MergeState mergeState;

//  int[][] getDocMaps() {
//    return mergeState.docMaps;
//  }

  int[] getDelCounts() {
    return mergeState.delCounts;
  }
  
  private void mergeNorms() throws IOException {
    IndexOutput output = null;
    try {
      for (int i = 0, numFieldInfos = fieldInfos.size(); i < numFieldInfos; i++) {
        final FieldInfo fi = fieldInfos.fieldInfo(i);
        if (fi.isIndexed && !fi.omitNorms) {
          if (output == null) { 
            output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
            output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
          }
          for (IndexReader reader : readers) {
            final int maxDoc = reader.maxDoc();
            byte normBuffer[] = reader.norms(fi.name);
            if (normBuffer == null) {
              // Can be null if this segment doesn't have
              // any docs with this field
              normBuffer = new byte[maxDoc];
              Arrays.fill(normBuffer, (byte)0);
            }
            if (!reader.hasDeletions()) {
              //optimized case for segments without deleted docs
              output.writeBytes(normBuffer, maxDoc);
            } else {
              // this segment has deleted docs, so we have to
              // check for every doc if it is deleted or not
              final Bits delDocs = reader.getDeletedDocs();
              for (int k = 0; k < maxDoc; k++) {
                if (!delDocs.get(k)) {
                  output.writeByte(normBuffer[k]);
                }
              }
            }
            checkAbort.work(maxDoc);
          }
        }
      }
    } finally {
      if (output != null) { 
        output.close();
      }
    }
  }

  private static class IndexedFields extends Fields {
    private final Fields fields;
    private final Set<String> indexedFields;

    public IndexedFields(
      final Fields fields,
      final Set<String> indexedFields)
    {
      this.fields = fields;
      this.indexedFields = indexedFields;
    }

    @Override
    public Terms terms(final String field) throws IOException {
      if (indexedFields.contains(field)) {
        return fields.terms(field);
      }
      return null;
    }

    @Override
    public FieldsEnum iterator() throws IOException {
      return new IndexedFieldsEnum(fields.iterator(), indexedFields);
    }
  }

  private static class IndexedFieldsEnum extends FieldsEnum {
    private final FieldsEnum fieldsEnum;
    private final Set<String> indexedFields;

    public IndexedFieldsEnum(
      final FieldsEnum fieldsEnum,
      final Set<String> indexedFields)
    {
        this.fieldsEnum = fieldsEnum;
        this.indexedFields = indexedFields;
    }

    @Override
    public AttributeSource attributes() {
      return fieldsEnum.attributes();
    }

    @Override
    public String next() throws IOException {
        while (true) {
            String next = fieldsEnum.next();
            if (next == null || indexedFields.contains(next)) {
                return next;
            }
        }
    }

    @Override
    public TermsEnum terms() throws IOException {
      return fieldsEnum.terms();
    }

    @Override
    public TermsEnum terms(final boolean buffered) throws IOException {
      return fieldsEnum.terms(buffered);
    }
  }
}

