package ru.yandex.tours.hotels.clustering.features

import org.apache.commons.lang.StringUtils
import ru.yandex.tours.hotels.clustering.ClusteringContext
import ru.yandex.tours.ml.FeatureExtractor

object LevenshteinFeature extends FeatureExtractor[ClusteringContext] {

  override def apply(ctx: ClusteringContext): Double = {
    val result = for {
      aName <- ctx.context1.cleanedNames
      bName <- ctx.context2.cleanedNames
    } yield {
      val dist = StringUtils.getLevenshteinDistance(aName, bName)
      if (dist == 0) {
        0
      } else {
        1.0 * dist / (aName.length + bName.length)
      }
    }
    if (result.isEmpty) -1 else result.min
  }

  override def name: String = "levinshtein"
}
