package ru.yandex.tours.indexer.wizard

import java.io.{BufferedOutputStream, FileOutputStream, File, OutputStream}

import org.apache.commons.io.IOUtils
import ru.yandex.tours.index.{ShardedIndexWriter, WizardIndexing, IndexWriter}
import ru.yandex.tours.index.composite.CompositeIndex
import ru.yandex.tours.index.shard.IndexShard
import ru.yandex.tours.util.{IO, Logging}

/**
 * Author: Vladislav Dolbilov (darl@yandex-team.ru)
 * Created: 16.03.15
 */
object IndexMerger extends ResourceMerger[IndexShard] with Logging {

  def merge(shards: Seq[IndexShard], os: OutputStream): Unit = {
    merge(shards, new IndexWriter(os))
  }

  def merge(shards: Seq[IndexShard], writer: IndexWriter): Unit = {
    require(shards.nonEmpty, "shard must be non empty")
    log.info(s"Merging ${shards.size} shards")
    var i = 0
    try {
      val composite = CompositeIndex.optimized(shards.toVector)

      val freshness = shards.map(_.freshness).min
      writer.writeHeader(WizardIndexing.formatVersion, freshness, -1)

      for (item <- composite.iterator) {
        writer.writeItem(item)
        i += 1
        if (i % 100000 == 0) {
          log.info(s"Collected $i records")
        }
      }
    } finally IOUtils.closeQuietly(writer)
    log.info(s"Collected $i records")
  }

  def mergeTo(shards: Seq[IndexShard], file: File): Unit = {
    val os = new BufferedOutputStream(new FileOutputStream(file))
    try merge(shards, os)
    finally IOUtils.closeQuietly(os)
  }

  def mergeTo(shards: Seq[IndexShard], files: IndexedSeq[File]): Unit = {
    val writers = files.map(file => new IndexWriter(file))
    IO.using(new ShardedIndexWriter(writers)) { writer =>
      merge(shards, writer)
    }
  }
}
