package ru.yandex.tours.extdataloader.verba.parsers

import java.io.{FileInputStream, OutputStream}

import ru.yandex.extdata.loader.engine.DataPersistenceManager
import ru.yandex.tours.extdata.DataTypes
import ru.yandex.tours.hotels.HotelsIndex
import ru.yandex.tours.indexer.task.{PeriodicUpdatable, TaskWeight}
import ru.yandex.tours.model.util.Util
import ru.yandex.tours.util.IO
import ru.yandex.tours.util.naming.{HotelNameId, HotelNameUtils}
import ru.yandex.tours.util.text.StringNormalizer

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.concurrent.duration._

/**
 * Author: Vladislav Dolbilov (darl@yandex-team.ru)
 * Created: 29.04.15
 */
class WizardHotelNamePartsIndexer(getAliases: () => collection.Map[Int, Seq[String]],
                                  hotelsIndex: HotelsIndex,
                                  updateTime: FiniteDuration,
                                  dataPersistenceManager: DataPersistenceManager)
  extends PeriodicUpdatable(updateTime, "hotel_name_parts") with TaskWeight.Unique {

  protected def convert(stream: OutputStream): Unit = {
    val aliases: collection.Map[Int, Seq[String]] = getAliases()

    //phrase -> (hotel_id + name_id, count(words in name))
    val phrases = new mutable.HashMap[String, ArrayBuffer[(HotelNameId, Int)]]()

    for (hotel ← hotelsIndex.hotels) {
      val names = hotel.name.allValues.flatMap(HotelNameUtils.splitNameToNames)
      val synonyms = hotel.synonyms.flatMap(HotelNameUtils.splitNameToNames)
      val allAliases = hotel.partnerIds.map(_.travelId).flatMap(aliases.getOrElse(_, Seq.empty))
      val allNames = (names ++ synonyms ++ allAliases).distinct

      for {
        (name, nameId) ← allNames.zipWithIndex
        normalized = StringNormalizer.normalizeString(name)
        words0 = HotelNameUtils.splitNameToWords(normalized)
        words = if (words0.length == 1) "отель" +: words0 else words0
        id = HotelNameId(hotel.id, nameId)
        wordCount = words.length
        word ← HotelNameUtils.generateSynonyms(words)
      } {
        val buffer = phrases.getOrElseUpdate(word, mutable.ArrayBuffer.empty)
        buffer += (id → wordCount)
      }
    }

    for ((phrase, data) ← phrases) {
      val builder = Util.HotelNamePart.newBuilder()
        .setPhrase(phrase)

      for ((hotelNameId, wordCount) ← data) {
        builder.addHotelBuilder()
          .setHotelId(hotelNameId.hotelId)
          .setNameId(hotelNameId.nameIndex)
          .setWords(wordCount)
      }

      builder.build().writeDelimitedTo(stream)
    }
  }


  override def run(): Unit = {
    val file = IO.usingTmp("name_parts") { stream ⇒ convert(stream) }
    try {
      dataPersistenceManager.checkAndStore(DataTypes.hotelNameParts, new FileInputStream(file))
    } finally {
      IO.deleteFile(file)
    }
  }
}
