package ru.yandex.msearch;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.document.Document;
import org.apache.lucene.document.MapFieldSelector;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.CoreCodecProvider;
import org.apache.lucene.index.codecs.fast_commit.FastCommitCodec;
import org.apache.lucene.index.codecs.yandex.YandexCodec;
import org.apache.lucene.index.codecs.yandex2.Yandex2Codec;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.store.AesflateCompressor;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.ZstdCompressor;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Bits;
import org.apache.lucene.search.DocIdSetIterator;

import java.io.File;

import java.util.Collections;
import java.util.HashSet;

import ru.yandex.msearch.collector.PruningCollector;

public class PrintKeys {

  /** Use the norms from one field for all fields.  Norms are read into memory,
   * using a byte of memory per document per searched field.  This can cause
   * search of large collections with a large number of fields to run out of
   * memory.  If all of the fields contain only a single token, then the norms
   * are all identical, then single norm vector may be shared. */
  private PrintKeys() {}

  /** Simple command-line based search demo. */
  public static void main(String[] args) throws Exception {
    String usage =
      "Usage: PrintKeys -i index_dir -f field -p prefix [-r (reverse)] [-d index_divisor] [-l line_limit] [-pd (print docs)] [-pp (print positions)] [-ff (print freqs) [-pf field1,field2 (print fields)]";
    if ( (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) || args.length < 6 ) {
      System.out.println(usage);
      System.exit(0);
    }

    String index = "index";
    String field = "contents";
    String prefix = "";
    String[] printFields = null;
    int	   limit = 0;
    int	   divisor = 1;
    boolean printFreqs = false;
    boolean printDocs = false;
    boolean printPositions = false;
    boolean reverse = false;
    boolean seekExact = false;

    for (int i = 0; i < args.length; i++) {
      if ("-i".equals(args[i])) {
        index = args[i+1];
        i++;
      } else if ("-f".equals(args[i])) {
        field = args[i+1];
        i++;
      } else if ("-p".equals(args[i])) {
        prefix = args[i+1];
        i++;
      } else if ("-d".equals(args[i])) {
        divisor = Integer.parseInt(args[i+1]);
        i++;
      } else if ("-pf".equals(args[i])) {
        printFields = args[i+1].split(",");
        i++;
      } else if ("-r".equals(args[i])) {
        reverse = true;
      } else if ("-ff".equals(args[i])) {
        printFreqs = true;
      } else if ("-pd".equals(args[i])) {
        printDocs = true;
      } else if ("-pp".equals(args[i])) {
        printPositions = true;
      } else if ("-pe".equals(args[i])) {
        seekExact = true;
      } else if ("-l".equals(args[i])) {
        limit = Integer.parseInt(args[i+1]);
        i++;
      }
    }

    FieldSelector fs = null;
    if (printFields != null) {
        fs = new MapFieldSelector(printFields);
    }

    System.err.println( "Limit: " + limit + "\nDivisor: " + divisor + "\n" + "Field: " + field + "\n" );
    CodecProvider cp = new CoreCodecProvider();
    cp.register( new FastCommitCodec(new HashSet()) );
    cp.register( new YandexCodec() );
    cp.register( new YandexCodec(new AesflateCompressor(1)) );
    cp.register( new Yandex2Codec() );
    cp.register( new Yandex2Codec(new AesflateCompressor(1)) );
    cp.register( new Yandex2Codec(new ZstdCompressor(1)) );

    IndexReader oreader = IndexReader.open( FSDirectory.open(new File(index)), null, true, divisor, cp );
//    IndexReader reader = oreader;
    for (IndexReader.AtomicReaderContext ctx : oreader.getTopReaderContext().leaves()) {
    IndexReader reader = ctx.reader;
    System.err.println("Reader: " + reader);
    TermsEnum terms;
    BytesRef term = null;
    BytesRef prefixRef = null;
    if (reverse) {
        terms = MultiFields.getFields(reader).terms( field ).reverseIterator();
    } else {
        terms = MultiFields.getFields(reader).terms( field ).iterator();
    }
	if( prefix.length() > 0 )
	{
            prefixRef = new BytesRef(prefix);
            BytesRef seekRef;
            if (reverse) {
                seekRef = new BytesRef(prefix);
                seekRef.append(PruningCollector.REVERSE_SEEK_SUFFIX);
            } else {
                seekRef = prefixRef;
            }
            if (seekExact) {
                boolean found = terms.seekExact(prefixRef, false);
                System.err.println("SeekExact: " + found);
            } else {
                terms.seek( prefixRef, true );
                TermState state = terms.termState();
                terms.seek( new BytesRef("QWEQWE"), true );
                terms.seek( prefixRef, true );
//	    terms.seek(prefixRef, state);
            }
            if (reverse) {
                prefixRef = new BytesRef(prefix);
	    }
	    term = terms.term();
	}
    DocsEnum td = null;
    DocsAndPositionsEnum pd = null;
    int  lines = 0;
    Bits skipDocs = new Bits.MatchNoBits( 0 );
    long time = System.currentTimeMillis();
    if( term == null ) term = terms.next();
    while( term != null )
    {
	if( term != null )
	{
	    if( prefixRef != null )
		if( !term.startsWith(prefixRef) ) break;
            if (printFreqs) {
		System.out.println(term.utf8ToString() + " " + terms.docFreq());
	    } else if(printFields != null) {
	        System.out.print(term.utf8ToString() + " ");
		td = terms.docs( skipDocs, td );
		int docId;
		while( (docId = td.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS )
                {
                    String sep = "";
                    Document doc = reader.document(docId, fs);
	            for (String f : printFields) {
	                System.out.print(sep);
	                sep = ",";
	                System.out.print(doc.get(f));
	            }
		    System.out.print(" ");
		}
		System.out.println("");
            } else if(printDocs) {
		System.out.print( term.utf8ToString() + " ( " + terms.docFreq() + " ) : " );
		td = terms.docs( skipDocs, td );
		int docId;
		while( (docId = td.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS )
		{
		    System.out.print( docId + " " );
		}
		System.out.println("");
	    } else if(printPositions) {
		System.out.print( term.utf8ToString() + " ( " + terms.docFreq() + " ) : " );
		pd = terms.docsAndPositions( skipDocs, pd );
		int docId;
		while( (docId = pd.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS )
		{
		    int freq = pd.freq();
		    System.out.print( "\n\t" + docId + " < " + freq + " > : \t" );
		    int pos;
		    for( int i = 0; i < freq; i++ )
		    {
			System.out.print( pd.nextPosition() + " " );
		    }
		}
		System.out.println("");
	    } else {
		System.out.println( term.utf8ToString() );
	    }
	    lines++;
	    if( limit > 0 && lines == limit ) break;
	}
	term = terms.next();
    }
    }
//    for (int i = 0; i < 100; i++) {
//        term = terms.next();
//    }
//    System.err.println( "Exec time: " + (System.currentTimeMillis() - time) );
//    reader.close();
  }
}
