package ru.yandex.tours.indexer.clusterization

import java.util.concurrent.atomic.AtomicInteger

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.{Sink, Source}
import com.google.common.cache.{CacheBuilder, CacheLoader}
import ru.yandex.tours.clustering.Clustering.LinkWithConfidence
import ru.yandex.tours.indexer.clusterization.similarity.{CombinedSimilarity, DistanceThresholdSimilarity, HotelSimilarity, ShingleSimilarity}
import ru.yandex.tours.model.hotels.HotelsHolder.PartnerHotel
import ru.yandex.tours.util.Logging
import ru.yandex.tours.util.akka.Streams

import scala.concurrent.{ExecutionContext, Future}

class ThresholdClusterizer(masters: Grid[PartnerHotel],
                           parallelism: Int,
                           thresholdSimilarity: HotelSimilarity)(implicit ec: ExecutionContext, as: ActorSystem)
  extends Clusterizer with Logging {

  private implicit val materializer = ActorMaterializer()
  private val processed = new AtomicInteger()
  private val merged = new AtomicInteger()
  private val notMerged = new AtomicInteger()

  def getMergeResult(slaves: Iterator[PartnerHotel]): Future[Seq[LinkWithConfidence]] = {
    Source.fromIterator(() => slaves)
      .mapAsync(parallelism)(processSlave)
      .via(Streams.flatten)
      .runWith(Sink.seq)
  }

  private def processSlave(slave: PartnerHotel): Future[Iterable[LinkWithConfidence]] = Future {
    val result = getSimilarHotels(slave)
    if (processed.incrementAndGet() % 1000 == 0) {
      log.info(s"Clusterization done for ${processed.get()}. Merged: ${merged.get}, not merged: ${notMerged.get()}")
    }
    if (result.isEmpty) {
      notMerged.incrementAndGet()
      Iterable.empty
    } else {
      merged.incrementAndGet()
      buildLink(slave, result)
    }
  }

  private def buildLink(slave: PartnerHotel, masters: Iterable[PartnerHotel]) = {
    // If slave connect 3 or more hotels from one partner - this is suspicious
    val suspicious = masters.groupBy(_.getRawHotel.getPartner).exists(_._2.size > 2)
    val confidence = if (suspicious) 0.3 else 0.7
    masters.map(m => LinkWithConfidence(parent = m.getId, child = slave.getId, confidence))
  }

  private def getSimilarHotels(slave: PartnerHotel): Iterable[PartnerHotel] = {
    val (lonIndex, latIndex) = Grid.getIndex(slave.getRawHotel.getPoint)
    val candidates = cache.get((lonIndex, latIndex))
    candidates.filter(isSimilar(slave))
  }

  private def isSimilar(slave: PartnerHotel)(master: PartnerHotel) = {
    thresholdSimilarity.similarity(slave, master) < 1e-8
  }

  private val cache = CacheBuilder.newBuilder().maximumSize(3).build(new CacheLoader[(Int, Int), Seq[PartnerHotel]] {
    override def load(key: (Int, Int)): Seq[PartnerHotel] = {
      val (lonIndex, latIndex) = key
      val result = for {
        lon <- lonIndex - 1 to lonIndex + 1
        lat <- latIndex - 1 to latIndex + 1
      } yield masters.get(lon, lat).toSeq
      result.flatten
    }
  })

}

class ThresholdClusterizerFactory(implicit akkaSystem: ActorSystem) extends ClusterizerFactory {

  private val DISTANCE_IN_KM = 3
  private val PARALLELISM = 4
  private val shinglesSimilarity = ShingleSimilarity
  private val thresholdSimilarity = CombinedSimilarity(DistanceThresholdSimilarity(DISTANCE_IN_KM), shinglesSimilarity)

  override def apply(master: Grid[PartnerHotel], slave: Grid[PartnerHotel]): Clusterizer = {
    val ec = akkaSystem.dispatchers.lookup("akka.actor.hotels-clustering-dispatcher")
    new ThresholdClusterizer(master, PARALLELISM, thresholdSimilarity)(ec, akkaSystem)
  }
}