package ru.yandex.tours.geo.matching

import com.typesafe.config.Config
import ru.yandex.tours.geo.base.{Region, region}
import ru.yandex.tours.geo.partners.{PartnerRegionHeader, PartnerTree}
import ru.yandex.tours.model.Languages
import ru.yandex.tours.util.Collections._

/**
  * Created by asoboll on 21.02.17.
  */
class NameMatcher(config: Config,
                  regionTree: region.Tree,
                  geoSynonyms: Map[region.Id, Iterable[String]]) extends RegionMatcher {
  private val ruNameConfidence = config.getDouble("ru-name-confidence")
  private val enNameConfidence = config.getDouble("en-name-confidence")
  private val synonymConfidence = config.getDouble("synonym-confidence")
  private val cutIslandsCoeff = config.getDouble("cut-islands-coeff")

  private case class KeyEntry(region: Region, confidence: Double)

  private val key2regions = (for {
    region <- regionTree.regions
    cutIslands <- Seq(true, false)
    (names, conf) <- Seq((region.name.values.get(Languages.ru).toIterable, ruNameConfidence),
      (region.name.values.get(Languages.en).toIterable, enNameConfidence),
      (region.name.allValues.toIterable, synonymConfidence),
      (region.synonyms.toIterable, synonymConfidence),
      (geoSynonyms.getOrElse(region.id, Iterable.empty), synonymConfidence)
    )
    name <- names
    key = prepareNameKey(name, cutIslands)
    if key.nonEmpty
    confidence = conf * (if (cutIslands) 1.0 else cutIslandsCoeff)
  } yield (key, KeyEntry(region, confidence))).toMultiMap.mapValues { list =>
    list.groupBy(_.region).values.map(_.maxBy(_.confidence))
  }

  private def prepareNameKey(name: String, cutIslands: Boolean = true) = {
    val key = name.toLowerCase.replace("п-ов", "").replace("-", " ").replace("/", " ").replace("_", "")
    if (cutIslands)
      key.replace("о.", "").replace("остров ", "").replace(" island", "").replace("полуостров ", "").trim
    else
      key.trim
  }

  def createHypotheses(partnerRegion: PartnerRegionHeader): Iterable[Hypothesis] = {
    val key2confidence = (for {
      cutIslands <- Seq(true, false)
      (nameOpt, conf) <- Seq((partnerRegion.name.values.get(Languages.ru).toIterable, ruNameConfidence),
        (partnerRegion.name.values.get(Languages.en).toIterable, enNameConfidence),
        (partnerRegion.name.allValues.toIterable, synonymConfidence),
        (partnerRegion.synonyms.toIterable, synonymConfidence))
      name <- nameOpt
      key = prepareNameKey(name, cutIslands)
      if key.nonEmpty
      confidence = conf * (if (cutIslands) 1.0 else cutIslandsCoeff)
    } yield (key, confidence)).toMultiMap.mapValues(_.max)

    for {
      (key, confidence) <- key2confidence
      if key2regions.contains(key)
      keyEntry <- key2regions(key)
    } yield Hypothesis(keyEntry.region, partnerRegion, confidence * keyEntry.confidence)
  }

  override def createHypotheses(partnerTree: PartnerTree): Iterable[Hypothesis] =
    partnerTree.regionHeaders.flatMap(createHypotheses)

}
