package ru.yandex.tours.indexer.wizard.parser

import com.google.common.hash.{BloomFilter, Funnels}
import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap
import ru.yandex.tours.query._
import ru.yandex.tours.query.parser.{ParserResources, ParsingStates, ParsingTrie}
import ru.yandex.tours.query.parser.ParsingTrie.Payload
import ru.yandex.tours.util.Logging
import ru.yandex.tours.util.lang.EnumExtractor
import ru.yandex.tours.util.naming.HotelNameId
import ru.yandex.tours.util.parsing.IntValue
import ru.yandex.tours.util.text.StringNormalizer.normalizeString
import ru.yandex.tours.util.trie.{BordBuilder, TrieBuilder}

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

/**
 * Author: Vladislav Dolbilov (darl@yandex-team.ru)
 * Created: 06.10.15
 */
object ParsingTrieBuilder extends Logging {

  def build(parserResources: ParserResources): ParsingTrie = {
    var rules: Iterator[PragmaticRule] = Iterator.empty

    rules ++= Seq(PragmaticRule(Ignored, parserResources.getIgnoreWords))

    rules ++= {
      for ((marker, values) <- parserResources.getMarkers)
        yield PragmaticRule(getMarker(marker), values)
    }
    rules ++= {
      for ((synonym, hotels) <- parserResources.getHotelParts)
        yield PragmaticRule(convertMap(hotels), Seq(synonym))
    }
    rules ++= {
      for ((operatorId, synonyms) <- parserResources.getOperators)
        yield PragmaticRule(TourOperatorMarker(operatorId), synonyms)
    }
    rules ++= {
      for ((regionId, synonyms) <- parserResources.getRegions)
        yield PragmaticRule(SomeRegion(regionId), synonyms)
    }
    rules ++= {
      for (((from, until), synonyms) <- parserResources.getDateIntervals)
        yield PragmaticRule(DateInterval(from, until), synonyms)
    }

    build(rules)
  }

  def buildReqAns(data: Iterator[(String, Pragmatic)]): ParsingTrie = {
    build(data.map(pair ⇒ PragmaticRule(pair._2, Seq(pair._1))))
  }

  def build(pragmaticRules: TraversableOnce[PragmaticRule]): ParsingTrie = {
    log.info("Building trie")
    val builder = new TrieBuilder
    val stateToPayload = new mutable.HashMap[Int, ArrayBuffer[Payload]] // Разреженный массив
    for {
      rule ← pragmaticRules
      pattern ← rule.patterns
    } {
      val normalized = normalizeString(pattern)
      val stateId = builder.addString(normalized)
      val buffer = stateToPayload.getOrElseUpdate(stateId, ArrayBuffer.empty)
      buffer += Payload(normalized.length, rule.pragmatic)
    }
    log.info(s"Filling trie complete")

    BordBuilder.buildBord(builder, new BordBuilder.StateGluingListener {
      def statesAreGlued(fromState: Int, toState: Int) {
        val toStateValues = stateToPayload.getOrElse(toState, ArrayBuffer.empty)

        val buffer = stateToPayload.getOrElseUpdate(fromState, ArrayBuffer.empty)
        buffer ++= toStateValues
      }
    })
    log.info("Building bord complete")

    val trie = builder.build
    val stateToPayload2 = stateToPayload.map(kv ⇒ kv._1 → kv._2.toArray)

    val finalTrie = new ParsingTrie(trie, ParsingStates(stateToPayload2))
    log.info(s"Building trie complete: $finalTrie")
    finalTrie
  }

  private def getMarker(marker: String): Pragmatic = {
    marker match {
      case "hot" => HotMarker
      case "tours" => TourMarker
      case "buy" => BuyMarker
      case "search" => SearchMarker
      case "hotel" | "hotels" => HotelMarker(QueryHotelType.HOTEL)
      case HotelWithType(QueryHotelTypeValue(tpe)) => HotelMarker(tpe)
      case "official_site" => OfficialSiteMarker
      case "price" => PriceMarker
      case "reviews" => ReviewsMarker
      case "photos" => PhotosMarker
      case "no_visa" => NoVisa
      case StarsR(IntValue(stars)) => Stars(stars)
      case "cheap" => CheapMarker
      case "ski_resorts" => SkiResorts
      case _ => Ignored
    }
  }

  private def convertMap(map: Map[HotelNameId, Int]): HotelNamePart = {
    if (map.size <= MAX_HOTELS_IN_MAP) {
      val m = new Long2IntOpenHashMap()
      map.foreach { case (k, v) => m.put(k.id, v) }
      HotelNamePartMap(m)
    } else {
      val ffp = if (map.size > 100000) 0.4 else 0.05
      val filter = BloomFilter.create[java.lang.Long](Funnels.longFunnel(), map.size, ffp)
      map.keys.foreach { k => filter.put(k.id) }
      HotelNamePartBloom(filter)
    }
  }

  private val MAX_HOTELS_IN_MAP = 7000
  private val HotelWithType = "hotels?_(.+)".r
  private val StarsR = "stars_([1-5])".r
  private val QueryHotelTypeValue = new EnumExtractor[QueryHotelType]
}
