package ru.yandex.tours.tools.merging

import ru.yandex.tours.app.{AkkaSupport, Application, DefaultEnvironment}
import ru.yandex.tours.db._
import ru.yandex.tours.db.dao.HotelsDao
import ru.yandex.tours.db.dao.HotelsDao.OnlyPartner
import ru.yandex.tours.db.model.DbPartnerHotel
import ru.yandex.tours.db.tables.{ClusterLink, Clusterization, LinkType}
import ru.yandex.tours.indexer.clusterization.similarity._
import ru.yandex.tours.model.hotels.HotelsHolder.PartnerHotel
import ru.yandex.tours.model.hotels.Partners
import ru.yandex.tours.parsers.common.DBStableIds
import ru.yandex.tours.tools.TestDb
import ru.yandex.tours.util.collections.RafBasedMap
import ru.yandex.tours.util.parsing.{DoubleValue, IntValue, Tabbed}
import ru.yandex.tours.util.{IO, Statistics}
import slick.driver.MySQLDriver.api._

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.concurrent.Future

/**
  * Created by asoboll on 11.03.16.
  */
object ClusterizationByPartnerMapping extends Application with DefaultEnvironment with AkkaSupport with TestDb {
  implicit val ec = akkaSystem.dispatcher

  val partnerWithMappings = Partners.hotelsCombined
  val partnerToMap = Partners.booking
  val infoName = "BookingID"
  val folder = "local_temp"
  //val partnerToMap = Partners.oktogo
  //val infoName = "OktogoID"
  //val folder = "local_temp_2"
  val db = testDb
  val hotelsSimilarity = ShingleSimilarity

  val dao = new HotelsDao(db)

  clusterByMapping.map { _ =>
    println(allApproved.size)
    println(allNotFound.size)
    //dbWriteNoCheck
  }

  def clusterByMapping = {
    for {
      targetHotels <- Statistics.asyncLogTime(s"Retrieving hotels of $partnerToMap", dao.retrieveRafMap(OnlyPartner(partnerToMap)))
      stableIds <- Statistics.asyncLogTime(s"Retrieving hotel ids of $partnerToMap", new DBStableIds(db).getIds(partnerToMap))
      _ <- Statistics.asyncLogTime("Building links", updateLinks(targetHotels, stableIds.toMap, s"$folder", infoName))
    } yield {
      targetHotels.close()
    }
  }

/*  def clusterByExpedia = {
    for {
      expediaIndex <- Statistics.asyncLogTime(s"lodaing expedia index", Try(loadExpediaIndex).toFuture)
      targetHotels <- Statistics.asyncLogTime(s"Retrieving hotels of $partnerWithExpediaId", dao.retrieveRafMap(OnlyPartner(partnerWithExpediaId)))
      _ <- Statistics.asyncLogTime("updating links", updateLinks(targetHotels, expediaIndex, s"$folder/byExpedia", expediaName))
    } yield {
      targetHotels.close()
    }
  }
*/
  def updateLinks(hotels: RafBasedMap[Int, PartnerHotel],
                  stableIds: Map[String, Int],
                  root: String,
                  paramName: String): Future[_] = {
    var counter = 0
    var totalpartner = 0
    var withoutMapping = mutable.ArrayBuffer.empty[Int]
    var withNoStableId = mutable.ArrayBuffer.empty[Int]
    var withNoTarget = mutable.ArrayBuffer.empty[(Int, Int)]
    var notApproved = mutable.ArrayBuffer.empty[(Int, Int, Double)]
    var approved = mutable.ArrayBuffer.empty[(Int, Int, Double)]

    var forClusterization = Set.empty[ClusterLink]
    def printCounts = println(s"withoutMapping: ${withoutMapping.size}, withNoStableId: ${withNoStableId.size}, " +
      s"withNoTarget: ${withNoTarget.size}, notApproved: ${notApproved.size}, approved: ${approved.size}")

    def clusterize(dbHotel: DbPartnerHotel) = {
      counter += 1
      if (counter % 10000 == 0) println(s"hotels in db processed: $counter")
      val hotel = dbHotel.hotel
      if (hotel.getRawHotel.getPartner == partnerWithMappings.id) {
        totalpartner += 1
        if (totalpartner % 1000 == 0) printCounts
        hotel.getRawHotel.getAddInfoList.asScala.find(_.getName == paramName) match {
          case None => withoutMapping += hotel.getId
          case Some(addInfo) =>
            stableIds.get(addInfo.getValue) match {
              case None => withNoStableId += hotel.getId
              case Some(targetId) =>
                hotels.get(targetId) match {
                  case None => withNoTarget += ((hotel.getId, targetId))
                  case Some(targetHotel) =>
                    val confidence = 1d - hotelsSimilarity.similarity(hotel, targetHotel)
                    if (confidence > 0.8)
                      approved += ((hotel.getId, targetId, confidence))
                    else
                      notApproved += ((hotel.getId, targetId, confidence))
                }
            }
        }
      }
    }

    println("starting to count")
    dao.traverse(
      HotelsDao.IsNew(true),
      HotelsDao.OnlyPartner(partnerWithMappings),
      HotelsDao.SkipDeleted
    )(clusterize).map { _ =>
      printCounts
      IO.printFile(s"$root/withoutMapping.tsv") { pw => withoutMapping.foreach(pw.println) }
      IO.printFile(s"$root/withNoStableId.tsv") { pw => withNoStableId.foreach(pw.println) }
      IO.printFile(s"$root/withNoTarget.tsv") { pw => withNoTarget.foreach { case (q, w) => pw.println(Tabbed(q, w)) }}
      IO.printFile(s"$root/notApproved.tsv") { pw => notApproved.foreach { case (q, w, d) => pw.println(Tabbed(q, w, d)) }}
      IO.printFile(s"$root/approved.tsv") { pw => approved.foreach { case (q, w, d) => pw.println(Tabbed(q, w, d)) }}
    }
  }

  def dbWriteNoCheck = {
    def round(d: Double): Double = (100*d).toInt/100.0

    val approved = loadApproved(s"$folder/approved.tsv")
    Transactions.withTransaction(db) { tx =>
      val forClusterization = approved.map { case (q, w, conf) =>
        ClusterLink(-1, q, w, tx.id, round(conf), LinkType.MERGE)
      }.toIterable
      println(s"${forClusterization.size} links to add")
      val clusterizationQuery = Clusterization.table ++= forClusterization
      db.run(clusterizationQuery)
    }
  }

/*  def getExpediaIndex: Future[Map[String, Int]] = {
    def query = Hotels.table.filter(_.isDeleted === false).filter(_.isNew === false)
    val index = mutable.Map.empty[String, Int]
    var counter = 0

    println("building expedia index")
    db.stream(query.result).mapResult(_.hotel).foreach { hotel =>
      counter += 1
      if (counter % 10000 == 0) println(s"expedia indexing counter: $counter")
      if (hotel.getRawHotel.getPartner == partnerWithExpediaId.id) {
        for {
          addInfo <- hotel.getRawHotel.getAddInfoList.asScala.find(_.getName == expediaParam)
        } index += addInfo.getValue -> hotel.getId
      }
    }.map {_ =>
      println(s"index size: ${index.size}")
      index.toMap
    }
  }

  def saveExpediaIndex: Future[_] = {
    for {
      expediaIndex <- getExpediaIndex
    } yield IO.printFile(s"$folder/expedia_index.tsv") { pw =>
      expediaIndex.foreach { case (q, w) => pw.println(Tabbed(q, w))}
    }
  }

  def loadExpediaIndex: Map[String, Int] = {
    IO.readLines(s"$folder/expedia_index.tsv").map {
      case Tabbed(q, w) => q -> w.toInt
    }.toMap
  }
*/
  def loadApproved(file: String): Iterator[(Int, Int, Double)] = {
    IO.readLines(file).flatMap {
      case Tabbed(IntValue(q), IntValue(w), DoubleValue(d)) => Some((q, w, d))
      case _ => None
    }
  }

  def allApproved: Set[Int] = {
    loadApproved(s"$folder/approved.tsv").map(_._1).toSet
  }

  def loadNotFound(file: String) = {
    IO.readLines(file).flatMap {
      case IntValue(q) => Some(q)
      case _ => None
    }
  }

  def allNotFound: Set[Int] = {
    loadNotFound(s"$folder/withoutMapping.tsv").toSet ++
      loadNotFound(s"$folder/withNoStableId.tsv").toSet
  }
}