package ru.yandex.tours.hotels.clustering.features

import ru.yandex.tours.hotels.clustering.ClusteringContext
import ru.yandex.tours.ml.FeatureExtractor

object NameSetFeature extends FeatureExtractor[ClusteringContext] {

  override def apply(ctx: ClusteringContext): Double = {
    val result = for {
      aW <- ctx.context1.cleanedNameWords
      bW <- ctx.context2.cleanedNameWords
      if aW.nonEmpty && bW.nonEmpty
    } yield jacardIndex(aW.toSet, bW.toSet)
    if (result.nonEmpty) result.min else -1d
  }


  private def jacardIndex(a: Set[String], b: Set[String]): Double = {
    if (a.isEmpty && b.isEmpty) {
      0
    } else {
      1 - 1.0 * (a & b).size / (a | b).size
    }
  }

  override def name: String = "name_word_jaccard"
}
