package ru.yandex.tours.tools.merging

import java.io.File

import ru.yandex.tours.app.{AkkaSupport, Application, DefaultEnvironment}
import ru.yandex.tours.db.Transactions
import ru.yandex.tours.db.dao.HotelsDao
import ru.yandex.tours.db.tables.{ClusterLink, Clusterization, Hotels, LinkType}
import ru.yandex.tours.indexer.hotels.PartnerHotelParser
import ru.yandex.tours.indexer.hotels.feed.StaticPartnerFeedRetriever
import ru.yandex.tours.indexer.hotels.parsers.CommonFormatParser
import ru.yandex.tours.model.hotels.HotelsHolder.RawPartnerHotel
import ru.yandex.tours.model.hotels.Partners
import ru.yandex.tours.parsers.common.DBStableIds
import ru.yandex.tours.tools.TestDb
import ru.yandex.tours.util.{IO, ProtoIO, Statistics}

import scala.collection.JavaConversions._
import scala.concurrent.Future
import slick.driver.MySQLDriver.api._


/**
  * Created by asoboll on 07.07.16.
  */
object VashOtelClusterization extends Application with DefaultEnvironment with AkkaSupport with TestDb {
  implicit val ec = akkaSystem.dispatcher

  val regionParam = Some("PartnerRegionID")
  val feedFile = new File("vashotel.hotels.xml")
  val partnerWithMappings = Partners.vashotel
  val partnerToMap = Partners.ostrovok
  val infoName = "OstrovokID"
  val db = prodDb
  val folder = "local_temp_vashotel"

  lazy val dao = new HotelsDao(db)
  lazy val parser: PartnerHotelParser = new CommonFormatParser(partnerWithMappings, regionParam = regionParam)
  lazy val feedRetriever = new StaticPartnerFeedRetriever(feedFile)

  getUnclustered.onFailure {
    case e => println(e)
  }

  def clusterFast = {
    for {
      mappings <- Statistics.asyncLogTime(s"reading mapping from $partnerWithMappings to $partnerToMap", getMappings)
      _ = println(mappings.size)
      stableIdsFrom <- Statistics.asyncLogTime(s"Retrieving hotel ids of $partnerWithMappings", new DBStableIds(db).getIds(partnerWithMappings))
      _ = println(stableIdsFrom.size)
      stableIdsTo <- Statistics.asyncLogTime(s"Retrieving hotel ids of $partnerToMap", new DBStableIds(db).getIds(partnerToMap))
      _ = println(stableIdsTo.size)
      links = generateLinks(mappings, stableIdsFrom.toMap, stableIdsTo.toMap)
      _ = println(links.size)
      cluster <- Statistics.asyncLogTime("writing links", dbWrite(links))
    } yield {
      cluster
    }
  }

  def getMappings: Future[Iterable[(String, String)]] = {
    for {
      feed <- feedRetriever.retrieve
      parsed <- parser.parse(feed)
    } yield {
      for {
        hotel <- ProtoIO.loadFromFile(parsed, RawPartnerHotel.PARSER)
        value <- hotel.getAddInfoList.filter(_.getName == infoName).map(_.getValue)
      } yield hotel.getPartnerId -> value
    }.toIterable
  }

  def generateLinks(mappings: Iterable[(String, String)],
                    stableIdsFrom: Map[String, Int],
                    stableIdsTo: Map[String, Int]): Iterable[(Int, Int)] = {
    for {
      (from, to) <- mappings
      fromId <- stableIdsFrom.get(from)
      toId <- stableIdsTo.get(to)
    } yield fromId -> toId
  }

  def dbWrite(approved: Iterable[(Int, Int)]): Future[_] = {
    Transactions.withTransaction(db) { tx =>
      val ostrovokQuery = Hotels.table.filter(_.id.inSet(approved.map(_._2))).map(_.id)
      val vashotelQuery = Hotels.table.filter(_.id.inSet(approved.map(_._1))).filter(_.isNew === true).map(_.id)

      for {
        ostrovokIds <- db.run(ostrovokQuery.result)
        vashotelNew <- db.run(vashotelQuery.result)
        action <- {
          println("osrovoks " + ostrovokIds.size)
          println("vashotel new " + vashotelNew.size)
          val approvedFinal = approved.filter(vashotelNew contains _._1)//.filter(ostrovokIds contains _._2)
          println(approvedFinal.size)
          val forClusterization = approvedFinal.map { case (q, w) =>
            ClusterLink(-1, q, w, tx.id, 1.0, LinkType.MERGE)
          }
          println(s"${forClusterization.size} links to add")
          val clusterizationQuery = Clusterization.table ++= forClusterization
          db.run(clusterizationQuery).flatMap { _ =>
            dao.publish(approvedFinal.map(_._1))
          }
        }
      } yield action
    }
  }

  def getUnclustered: Future[_] = {
    new DBStableIds(db).getIds(partnerWithMappings).flatMap { stableIds =>
      val vashotelQuery = Hotels.table.filter(_.id.inSet(stableIds.map(_._2))).filter(_.isNew === true).map(_.id)
      db.run(vashotelQuery.result).map { unclustered =>
        println(unclustered.size)
        val reverseMap = stableIds.map { case (q, w) => w -> q }.toMap
        val unclustered2 = unclustered.flatMap(reverseMap.get)
        IO.printFile(s"$folder/unclustered.tsv") { pw => unclustered2.foreach(pw.println) }
        println(unclustered2.size)
      }
    }
  }
}
