package ru.yandex.tours.indexer.clusterization

import akka.actor.ActorSystem
import ru.yandex.tours.clustering.Clustering.LinkWithConfidence
import ru.yandex.tours.db.{DBWrapper, Transaction}
import ru.yandex.tours.db.dao.HotelsDao
import ru.yandex.tours.db.dao.HotelsDao.{SkipDeleted, WithIds}
import ru.yandex.tours.db.tables.Clusterization
import ru.yandex.tours.hotels.clustering.{ClusteringModel, LocalContext}
import ru.yandex.tours.model.hotels.HotelsHolder.PartnerHotel
import ru.yandex.tours.model.hotels.Partners
import ru.yandex.tours.model.hotels.Partners.Partner
import ru.yandex.tours.util.collections.RafBasedMap

import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.duration._

import ru.yandex.tours.util.collections.RafBasedMap

import scala.collection.JavaConverters._
import scala.concurrent.duration._
import scala.concurrent.{ExecutionContext, Future}
/**
  * Created by alexcrush on 08.12.17.
  */
class OstrovokV3ClusterizerByOldId(dbWrapper: DBWrapper,
                                   hotelsDao: HotelsDao,
                                   clusteringModel: ClusteringModel)
                                  (implicit akka: ActorSystem, ec: ExecutionContext)
  extends NewHotelClusterizer(dbWrapper, hotelsDao, clusteringModel, 1.hour) {

  override val name: String = "cluster_ostrovokv3_by_oldid"

  override protected val partnerFilter: Option[Partner] = Some(Partners.ostrovokv3)

  private def extractOldIdFromUrl(url: String): Option[String] = {
    if (url.startsWith("https://ostrovok.ru/rooms/") && url.endsWith("/")) {
      Some(url.substring(26, url.length - 1))
    } else {
      None
    }
  }

  private def suggestedOldId(hotel: HotelRef,
                                    hotelsMap: collection.Map[Int, PartnerHotel]): Option[String] = {

    hotelsMap.get(hotel.id) match {
      case Some(partnerHotel) =>
        extractOldIdFromUrl(partnerHotel.getRawHotel.getPartnerUrl)
      case None => None
    }
  }

  private def getPartnerInfo(hotel: HotelRef,
                             hotelsMap: collection.Map[Int, PartnerHotel]): Option[String] = {
    hotelsMap.get(hotel.id) match {
      case Some(partnerHotel) =>
        if (partnerHotel.getRawHotel.getPartner == Partners.ostrovok.id) {
          Some(partnerHotel.getRawHotelOrBuilder.getPartnerId)
        } else {
          None
        }
      case None => None
    }
  }

  override protected def doClustering(hotelsToCluster: Seq[HotelRef],
                                      candidates: Seq[HotelRef],
                                      cleaner: HotelLinkCleaner,
                                      metrics: ClusteringMetrics,
                                      transaction: Transaction) = {

    val map = getHotelMap(candidates.map(_.id).toSet ++ hotelsToCluster.map(_.id).toSet)
    // map - все отели в точки
    // candidates - старые
    // hotelsToCluster - новые

    val links: Future[Seq[LinkWithConfidence]] = {
      for (hotelsMap ← map) yield {
        val localContext = new LocalContext(hotelsMap.valuesIterator)

        val newHotel2OldId = (for {
          ref <- hotelsToCluster
          oldId = suggestedOldId(ref, hotelsMap)
          if oldId.nonEmpty
        } yield ref -> oldId.get).toMap
        // newHotel2OldId = {новый отель: oldId}

        val oldId2OldHotel = (for {
          candidate <- candidates
          partnerInfo <- getPartnerInfo(candidate, hotelsMap)
        } yield partnerInfo -> candidate).toMap
        // oldId2OldHotel {oldId: старый отель}

        (for {
          (hotelToCluster, oldId) ← newHotel2OldId
          hotelsToCluster = Seq(hotelToCluster)
          contexts = buildHotelContexts(hotelsToCluster, localContext, hotelsMap)
          groupedCandidates = oldId2OldHotel.get(oldId).toSeq
          candidateContexts = buildHotelContexts(groupedCandidates, localContext, hotelsMap)
          link ← generateLinks(hotelsToCluster, groupedCandidates, candidateContexts ++ contexts, metrics).seq
        } yield link).toVector
      }
    }
    links.onComplete { _ ⇒
      map.foreach {
        case m: RafBasedMap[_, _] ⇒ m.close()
        case _ ⇒
      }
    }

    for {
      links ← links
      cleaned = cleaner.removeExcessLinksAndSetConfidence(links)
      _ ← saveLinksAndPublish(hotelsToCluster, cleaned, transaction, metrics)
    } yield metrics.doLogging()
  }

  override protected def shouldPublish(id: Int, linkConf: Map[Int, List[Double]]): Boolean = {
    linkConf.getOrElse(id, Seq.empty)
      .exists(_ >= Clusterization.defaultMinConfidence)
  }

}
