package ru.yandex.tours.tools

import java.net.URL

import org.joda.time.format.DateTimeFormat
import org.joda.time.{DateTime, LocalDate}
import ru.yandex.tours.model.search.HotelSearchRequest
import spray.http.Uri
import spray.http.Uri.Query

import scala.collection.mutable
import scala.concurrent.duration._
import scala.io.Source
import scala.util.Try

/**
 * Author: Vladislav Dolbilov (darl@yandex-team.ru)
 * Created: 19.01.15
 */
object SletatRuAnalytics extends App {

  val defaultDate = LocalDate.parse("01.01.1970", DateTimeFormat.forPattern("dd.MM.yyyy"))

  case class Line(date: DateTime, yuid: String, regionId: Long, request: HotelSearchRequest)

  def parseLine(line: String) = {
    val Array(date, time, yuid, regionId, url) = line.split("\\s+")
    Line(
      DateTime.parse(date + " " + time, DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss")),
      yuid,
      regionId.toLong,
      parseUrl(url)
    )
  }

  def parseUrl(urlStr: String): HotelSearchRequest = {
    val url = new URL(
      urlStr
        .replaceAllLiterally("Москва", "832")
        .replaceAllLiterally("Санкт-Петербург", "1264")
        .replaceAllLiterally("Екатеринбург", "1265")
        .replaceAllLiterally("Казань", "1266")
        .replaceAllLiterally("Ростов-на-Дону", "1269")
        .replaceAllLiterally("Самара", "1271")
        .replaceAllLiterally("Тюмень", "1273")
        .replaceAllLiterally("Уфа", "1274")
        .replaceAllLiterally("Омск", "1278")
        .replaceAllLiterally("Барнаул", "1305")
        .replaceAllLiterally("Новосибирск", "1267")
        .replaceAllLiterally("Иркутск", "1285")
    )
    if (url.getQuery == null) return null
    val query = Try(Query.apply(url.getQuery, mode = Uri.ParsingMode.Relaxed)).getOrElse {
      return null
    }

    def int(name: String, default: Int = -1): Int = query.getOrElse(name, default.toString) match {
      case "" => default
      case i => Try(i.toInt).getOrElse(default)
    }
    def str(name: String, default: String = "") = query.getOrElse(name, default)
    def date(name: String) = str(name, "01.01.1970") match {
      case "" => defaultDate
      case dt => LocalDate.parse(dt, DateTimeFormat.forPattern("dd.MM.yyyy"))
    }

    val ages = Seq(
      int("kid1"),
      int("kid2"),
      int("kid3")
    ).take(int("kids")) ++ Iterator.fill(int("adults"))(30)

    HotelSearchRequest(
      from = int("city", 832), //832 == Moscow
      to = int("country"),
      nights = (int("nightsmin") + int("nightsmax")) / 2,
      when = date("datefrom"),
      ages = ages.sorted,
      flexWhen = !date("datefrom").isEqual(date("dateto")),
      flexNights = int("nightsmin") != int("nightsmax")
    )
  }

  val lines =
    Source.fromFile("util-heavy/data/metrika/sletat_one_day_log.txt")
      .getLines()
      .map(l => Try(parseLine(l)).getOrElse(null))
      .filter(_ ne null)
      .filter(_.request ne null)
      .filter(_.request.when != defaultDate)
      .toVector

  val requests = lines.map(_.request)

  val uniq = requests.groupBy(identity).mapValues(_.size)

  println(s"Uniq: ${uniq.size}\nTotal: ${requests.size}")


  //top frequent requests
//  uniq.toVector.sortBy(_._2).takeRight(100).reverse.foreach(p => println(s"${p._2}\t${p._1}"))

  //cache hit
  def cacheHit(cacheTTL: FiniteDuration) = {
    val cache = mutable.Map.empty[HotelSearchRequest, Line]
    val reqHits = mutable.Map.empty[HotelSearchRequest, Int]
    var hits = 0

    lines.sortBy(_.date.getMillis).foreach { line =>
      val request = line.request
      cache.get(request) match {
        case Some(cached) if cached.date.plusMillis(cacheTTL.toMillis.toInt).isAfter(line.date) =>
          reqHits.update(request, reqHits.getOrElse(request, 0) + 1)
          hits += 1
        case _ => cache.update(request, line)
      }
    }
    val total = lines.size
    val topHits = reqHits.toVector.sortBy(_._2).reverse.take(10)

    println(s"TTL = $cacheTTL")
    println(s"$hits requests out of $total got from cache (${(hits.toDouble * 100 / total).round}%)")
    println(s"${reqHits.size} out of ${uniq.size} unique requests got from cache  (${(reqHits.size.toDouble * 100 / uniq.size).round}%)")
    println(s"Top hits:\n${topHits.mkString("\n")}")
  }


  cacheHit(1.day)
  cacheHit(1.hour)
  cacheHit(20.minutes)
  cacheHit(5.minutes)

}
