package ru.yandex.tours.tools

import java.io.{FilenameFilter, File, FileInputStream}
import java.util.zip.GZIPInputStream

import spray.http.Uri

import scala.collection.mutable
import scala.util.Try

object BounceRateCheckTool extends App {
  val hotelPage = mutable.Set.empty[String]
  val user2max = mutable.Map.empty[String, Int].withDefault(_ => 0)
  private val r = """^/api/1.x/hotel/\d+/tour/\d\d-(\d|a|b|c|d|e|f){32}$"""
  def parse(filename: String) = {
    val is = if (filename.endsWith(".gz")) {
      new GZIPInputStream(new FileInputStream(filename))
    } else {
      new FileInputStream(filename)
    }
    scala.io.Source.fromInputStream(is).getLines().foreach(line => {
      Try {
        val url = Uri(line.split(" ")(3))
        val path = url.path.toString()
        if (path.endsWith("api/1.x/search/tours")) {
          val uid = url.query.get("yuid").get
          url.query.get("request_index").map(_.toInt).foreach(ri => {
            user2max.update(uid, Math.max(ri, user2max(uid)))
          })
        }
        if (path.matches(r)) {
          val uid = url.query.get("yuid").get
          hotelPage += uid
        }
      }
    })
  }

  new File("/Users/berkut/tmp").list(new FilenameFilter() {
    override def accept(dir: File, name: String): Boolean = name.startsWith("tours-api-http-access")
  }).foreach(filename => parse("/Users/berkut/tmp/" + filename))


  user2max.groupBy(_._2).map({ case (ri, uidsAndRi) =>
    val uids = uidsAndRi.map(_._1)
    ri -> (uids.count(hotelPage.contains) * 100 / uids.size, uids.size)
  }).toSeq.sortBy(_._1).foreach { case (ri, (bounce, total)) => {
    println(s"Second ${ri.toDouble * 1.5}. Bounce $bounce%. Total uids: $total")
  }}

}
