package org.apache.lucene.index.codecs;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.bloom.FuzzySet;
import org.apache.lucene.index.codecs.bloom.BloomFilterFactory;
import org.apache.lucene.index.codecs.bloom.DefaultBloomFilterFactory;
import org.apache.lucene.index.codecs.yandex.YandexPostingsWriter;
import org.apache.lucene.store.Compressor;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.DeflateOutputStream;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.RamUsageEstimator;

// TODO: currently we encode all terms between two indexed
// terms as a block; but, we could decouple the two, ie
// allow several blocks in between two indexed terms

/**
 * Writes terms dict, block-encoding (column stride) each
 * term's metadata for each set of terms between two
 * index terms.
 *
 * @lucene.experimental
 */

public class YandexTermsWriter extends FieldsConsumer {
  public static final String BLOOM_CODEC_NAME = "BloomFilter";
  public static final int BLOOM_CODEC_VERSION = 1;
  
  /** Extension of Bloom Filters file */
  static final String BLOOM_EXTENSION = "blm";

  final static String CODEC_NAME = "YANDEX_TERMS_DICT";
  BloomFilterFactory bloomFilterFactory = new DefaultBloomFilterFactory();

  // Initial format
  public static final int VERSION_START = 0;

  public static final int VERSION_CURRENT = VERSION_START;

  /** Extension of terms file */
  static final String TERMS_EXTENSION = "tib";
  
  private final int blockSize;

  protected final IndexOutput out;
  final PostingsWriterBase postingsWriter;
  final FieldInfos fieldInfos;
  FieldInfo currentField;
  private final TermsIndexWriterBase termsIndexWriter;
  private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
  private final Set<String> bloomSet;
  private Map<FieldInfo,FuzzySet> bloomFilters =
    new LinkedHashMap<FieldInfo,FuzzySet>();
  private final SegmentWriteState state;
  private final Compressor compressor;

  //private final String segment;

  public YandexTermsWriter(
      Compressor compressor,
      TermsIndexWriterBase termsIndexWriter,
      SegmentWriteState state,
      PostingsWriterBase postingsWriter,
      int blockSize,
      Set<String> bloomSet)
    throws IOException
  {
    this.compressor = compressor;
    this.blockSize = blockSize;
    final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION);
    this.termsIndexWriter = termsIndexWriter;
    this.state = state;
    out = state.directory.createOutput(termsFileName);
    fieldInfos = state.fieldInfos;
    writeHeader(out);
    currentField = null;
    this.postingsWriter = postingsWriter;
    this.bloomSet = bloomSet;
    //segment = state.segmentName;

    //System.out.println("BTW.init seg=" + state.segmentName);

    postingsWriter.start(out);                          // have consumer write its format/header
  }

  private String codecName() {
    if (compressor.id() != null) {
        return CODEC_NAME + "_" + compressor.id();
    } else {
        return CODEC_NAME;
    }
  }

  protected void writeHeader(IndexOutput out) throws IOException {
    CodecUtil.writeHeader(
        out,
        codecName(),
        VERSION_CURRENT); 

    out.writeLong(0);                             // leave space for end index pointer    
  }

  @Override
  public TermsConsumer addField(FieldInfo field) throws IOException {
    FuzzySet bloomFilter = null;
    if (bloomSet.contains(field.name)) {
      bloomFilter = bloomFilterFactory.getSetForField(state,field);
      if (bloomFilter != null) {
        bloomFilters.put(field, bloomFilter);
      }
    }
    //System.out.println("\nBTW.addField seg=" + segment + " field=" + field.name);
    assert currentField == null || currentField.name.compareTo(field.name) < 0;
    currentField = field;
    TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer());
//    TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, (out.getFilePointer() << 16) & 0xFFFFFFFFFFFF0000L);
    final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter, bloomFilter, state);
    fields.add(terms);
    return terms;
  }

  @Override
  public void close() throws IOException {

    try {
      
      int nonZeroCount = 0;
      for(TermsWriter field : fields) {
        if (field.numTerms > 0) {
          nonZeroCount++;
        }
      }

      final long dirStart = out.getFilePointer();

      out.writeVInt(nonZeroCount);
      for(TermsWriter field : fields) {
        if (field.numTerms > 0) {
          out.writeVInt(field.fieldInfo.number);
          out.writeVLong(field.numTerms);
          out.writeVLong(field.termsStartPointer);
          if (!field.fieldInfo.omitTermFreqAndPositions) {
            out.writeVLong(field.sumTotalTermFreq);
          }
        }
      }
      writeTrailer(dirStart);

      List<Entry<FieldInfo,FuzzySet>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo,FuzzySet>>();
      
      for (Entry<FieldInfo,FuzzySet> entry : bloomFilters.entrySet()) {
        FuzzySet bloomFilter = entry.getValue();
        if(!bloomFilterFactory.isSaturated(bloomFilter,entry.getKey())){          
          nonSaturatedBlooms.add(entry);
        }
      }
      String bloomFileName = IndexFileNames.segmentFileName(
          state.segmentName, state.codecId, BLOOM_EXTENSION);
      try (IndexOutput bloomOutput =
              state.directory.createOutput(bloomFileName))
      {
        CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
            BLOOM_CODEC_VERSION);

        // First field in the output file is the number of fields+blooms saved
        bloomOutput.writeInt(nonSaturatedBlooms.size());
        for (Entry<FieldInfo,FuzzySet> entry : nonSaturatedBlooms) {
          FieldInfo fieldInfo = entry.getKey();
          FuzzySet bloomFilter = entry.getValue();
          bloomOutput.writeInt(fieldInfo.number);
          saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
        }
      }
      //We are done with large bitsets so no need to keep them hanging around
      bloomFilters.clear(); 



    } finally {
      try {
        out.close();
      } finally {
        try {
          postingsWriter.close();
        } finally {
          termsIndexWriter.close();
        }
      }
    }
  }

  private void saveAppropriatelySizedBloomFilter(IndexOutput bloomOutput,
    FuzzySet bloomFilter, FieldInfo fieldInfo) throws IOException {
      
    FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo,
      bloomFilter);
    if (rightSizedSet == null) {
       rightSizedSet = bloomFilter;
    }
    rightSizedSet.serialize(bloomOutput);
  }

  protected void writeTrailer(long dirStart) throws IOException {
    // TODO Auto-generated method stub
    out.seek(CodecUtil.headerLength(codecName()));
    out.writeLong(dirStart);    
  }

  class TermsWriter extends TermsConsumer {
    private final FieldInfo fieldInfo;
    private final PostingsWriterBase postingsWriter;
    private final long termsStartPointer;
    private long numTerms;
    private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
    long sumTotalTermFreq;
    private final FuzzySet bloomFilter;
    private final SegmentWriteState writerState;
    private final DeflateOutputStream bytesWriter;//= new DeflateOutputStream( 2040 );
    private final RAMOutputStream tmpWriter = new RAMOutputStream();

    private int pendingCount;

    TermsWriter(
        TermsIndexWriterBase.FieldWriter fieldIndexWriter,
        FieldInfo fieldInfo,
        PostingsWriterBase postingsWriter,
        FuzzySet bloomFilter,
        SegmentWriteState writerState) 
    {
      this.fieldInfo = fieldInfo;
      this.fieldIndexWriter = fieldIndexWriter;
      termsStartPointer = out.getFilePointer();
      postingsWriter.setField(fieldInfo);
      this.postingsWriter = postingsWriter;
      this.bloomFilter = bloomFilter;
      this.writerState = writerState;
      bytesWriter = new DeflateOutputStream(2040, compressor);
    }
    
    @Override
    public Comparator<BytesRef> getComparator() {
      return BytesRef.getUTF8SortedAsUnicodeComparator();
    }

    @Override
    public PostingsConsumer startTerm(BytesRef text) throws IOException {
//      System.err.println("BTW.startTerm " + postingsWriter + " term=" + fieldInfo.name + ":" + text.utf8ToString() );
      postingsWriter.startTerm();
      return postingsWriter;
    }

    private final BytesRef lastPrevTerm = new BytesRef();

    @Override
    public void finishTerm(BytesRef text, TermStats stats) throws IOException {

      assert stats.docFreq > 0;
//
      if (stats.docFreq > 0 && bloomFilter != null) {
        bloomFilter.addValue(text);
      }

      final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
      long outFp = out.getFilePointer();

      if( bytesWriter.length() >= blockSize )
      {
        if (pendingCount > 0) {
          // Instead of writing each term, live, we gather terms
          // in RAM in a pending buffer, and then write the
          // entire block in between index terms:
          flushBlock();
        }
//        long data = outFp;
//        data = (outFp << 16) & 0xFFFFFFFFFFFF0000L;
        fieldIndexWriter.add(text, stats, out.getFilePointer());
      }
      else if( isIndexTerm )
      {
//        long data = outFp;
        if( pendingCount != 0 ) throw new IOException( "isIndexTerm must be set only in first term" );
//        data = (outFp << 16) & 0xFFFFFFFFFFFF0000L | bytesWriter.length() & 0xFFFF;
        fieldIndexWriter.add(text, stats, outFp);
      }


      int commonPrefix = sharedPrefix(lastPrevTerm, text);
//      int commonPrefix = 0;
      bytesWriter.writeVInt(commonPrefix);
      int suffix = text.length - commonPrefix;
      bytesWriter.writeVInt(suffix);

      bytesWriter.writeBytes( text.bytes, commonPrefix + text.offset, suffix );

      bytesWriter.writeVInt(stats.docFreq);
      if (!fieldInfo.omitTermFreqAndPositions) {
        bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
      }

      ((YandexPostingsWriter)postingsWriter).setWriter(bytesWriter);
      postingsWriter.finishTerm(stats);

      pendingCount++;

      numTerms++;

      lastPrevTerm.copy(text);
    }

    // Finishes all terms in this field
    @Override
    public void finish(long sumTotalTermFreq) throws IOException {
      if (pendingCount > 0) {
//        boolean addLast = false;
//        if( pendingCount > 1 ) addLast = true;
        flushBlock();
//        if( addLast )fieldIndexWriter.add(lastPrevTerm, null, 0);
      }
      String lastTerm = lastPrevTerm.utf8ToString();
      lastTerm = lastTerm + "_FAKE_NON_EXISTS";
//      lastPrevTerm
      fieldIndexWriter.add( new BytesRef(lastTerm), null, 0 );
      // EOF marker:
      out.writeVInt(0);

      this.sumTotalTermFreq = sumTotalTermFreq;
//      fieldIndexWriter.finish((out.getFilePointer() << 16) & 0xFFFFFFFFFFFF0000L);
      fieldIndexWriter.finish(out.getFilePointer());
      bytesWriter.close();
    }

    private int sharedPrefix(BytesRef term1, BytesRef term2) {
      assert term1.offset == 0;
      assert term2.offset == 0;
      int pos =
          Arrays.mismatch(
            term1.bytes,
            0,
            term1.length,
            term2.bytes,
            0,
            term2.length);
      if (pos == -1) {
          pos = term1.length;
      }
      return pos;
    }

    private void flushBlock() throws IOException {
	int deflated = bytesWriter.deflateTo( tmpWriter );
	out.writeVInt( deflated );
	out.writeVInt( (int)(bytesWriter.length() & 0xFFFFFFFF) );
	tmpWriter.writeTo( out );
	tmpWriter.reset();
        bytesWriter.reset();
        postingsWriter.flushTermsBlock();
        pendingCount = 0;
    }
  }
}
