package ru.yandex.msearch;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.fast_commit.FastCommitCodec;
import org.apache.lucene.index.codecs.yandex.YandexCodec;
import org.apache.lucene.document.MapFieldSelector;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Bits;

import java.io.File;
import java.util.HashSet;
import java.util.Arrays;

/** Simple command-line based search demo. */
public class ListDocs {

  /** Use the norms from one field for all fields.  Norms are read into memory,
   * using a byte of memory per document per searched field.  This can cause
   * search of large collections with a large number of fields to run out of
   * memory.  If all of the fields contain only a single token, then the norms
   * are all identical, then single norm vector may be shared. */
  private ListDocs() {}

  /** Simple command-line based search demo. */
  public static void main(String[] args) throws Exception {
    String usage =
      "Usage: java ru.yandex.msearch.ListDocs [-index dir] [-f field1,field2,field3.....]";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
      System.out.println(usage);
      System.exit(0);
    }

    String index = "index";
    String fieldsReq = "mid,suid";

    for (int i = 0; i < args.length; i++) {
      if ("-index".equals(args[i])) {
        index = args[i+1];
        i++;
      } else if( "-f".equals(args[i]) ) {
        fieldsReq = args[i+1];
        i++;
      }
    }

    boolean printDocId = false;
    String fields[] = fieldsReq.split(",");
    for (String f : fields) {
        if (f.equalsIgnoreCase("_docid")) {
            printDocId = true;
            break;
        }
    }

    CodecProvider cp = CodecProvider.getDefault();
    cp.register( new YandexCodec() );
    cp.register( new FastCommitCodec(new HashSet()) );
    cp.setDefaultFieldCodec( "Yandex" );

    IndexReader reader = IndexReader.open(NIOFSDirectory.get(new File(index)));

    FieldSelector fs = new MapFieldSelector( fields );

    Bits deleted = MultiFields.getDeletedDocs(reader);

    for( int i = 0; i < reader.maxDoc(); i++ )
    {
    Document doc;
	if( deleted != null && deleted.get(i) ) continue;
	try
	{
	    doc = reader.document( i, fs );
	} catch( Exception e )
	{
	    e.printStackTrace();
	    continue;
	}
	String out = "";
	if (printDocId) {
	    out += i + " ";
	}
	for( int f = 0; f < fields.length; f++ )
	{
	String value = doc.get(fields[f]);
	    if( value == null ) value = "NULL";
	    else
	    {
		if( fields[f].equals("x_urls") ) 
		{
		    String[] urls = value.split(";");
		    HashSet<String> dedup = new HashSet<String>();
		    dedup.addAll( Arrays.asList(urls) );
		    urls = dedup.toArray( new String[0] );
		    value = "\n";
		    for( int u = 0; u < urls.length - 1; u++ )
		    {
			value += urls[u] + "\n";
		    }
		    value += urls[urls.length - 1];
		}
	    }
	    out += value + " ";
	}
	System.out.println( out );
    }
    reader.close();
  }
}
