package ru.yandex.tours.hotels.clustering.features

import ru.yandex.tours.hotels.clustering.ClusteringContext
import ru.yandex.tours.ml.FeatureExtractor
import ru.yandex.tours.util.Vectors.cosine
import ru.yandex.tours.util.naming.{HotelNameUtils, TfIdfModel}

import scala.collection.JavaConverters._

/**
 * Author: Vladislav Dolbilov (darl@yandex-team.ru)
 * Created: 20.04.16
 */
class NameTfIdfCosFeature(model: TfIdfModel) extends FeatureExtractor[ClusteringContext] {

  override def apply(ctx: ClusteringContext): Double = {
    val f1 = model.tfIdf(ctx.context1.cleanedNameWordsFlatten)
    val f2 = model.tfIdf(ctx.context2.cleanedNameWordsFlatten)

    if (f1.length != f2.length) -1d
    else cosine(f1, f2)
  }

  override def name: String = "tf_idf_cos"
}
