package ru.yandex.tours.tools.tophotels

import java.io.FileOutputStream

import ru.yandex.tours.model.BaseModel.{LangToVal, Point}
import ru.yandex.tours.model.hotels.HotelsHolder.Hotel
import ru.yandex.tours.model.hotels.Partners
import ru.yandex.tours.util.parsing.Tabbed

object TopHotelsParser extends App {
  val urlPrefix = "http://tophotels.ru/main/hotel/"

  val hotels = scala.io.Source.fromFile("util-heavy/python/webmining/tophotels/data/tophotels.parsed.data.tsv").getLines().map {
    case Tabbed(url, fullName, lon, lat, country, locality, prevName, rating, reviewCount, mail, hotelUrl, phone, address) =>
      if (!url.startsWith(urlPrefix) || !url.endsWith("/")) throw new Exception(s"invalid url: $url")
      val localId = url.substring(urlPrefix.length, url.length - 1)
      val (name, stars) = if (!fullName.endsWith("*")) {
        (fullName, 0)
      } else {
        (fullName.take(fullName.length - 2).trim, fullName.takeRight(2).take(1).toInt)
      }
      val point = if (lon.nonEmpty && lat.nonEmpty ) {
        val latDouble = lat.toDouble
        val lonDouble = lon.toDouble
        if (lonDouble < -180 || lonDouble > 180 || latDouble < -90 || latDouble > 90) {
          println(s"Invalid coords: $latDouble, $lonDouble. Local id is $localId")
          None
        } else {
          Some(Point.newBuilder().setLatitude(latDouble).setLongitude(lonDouble).build)
        }
      } else {
        None
      }

      val ans = Hotel.newBuilder()
        .setPartnerUrl(url)
        .setPartnerId(Partners.topHotels.id)
        .setLocalId(localId)
        .setStars(stars)
        .setId(0)
        .setGeoId(0)
        .addName(LangToVal.newBuilder().setLang("ru").setValue(name))
      if (prevName.nonEmpty) {
        ans.addName(LangToVal.newBuilder().setLang("ru").setValue(prevName))
      }
      point.foreach(ans.setPoint)
      ans.build()
    case line =>
      val parts = line.split("\t")
      ???
  }.toSet
  val os = new FileOutputStream("tophotels.proto")
  println("total:" + hotels.size)
  println("with coords: " + hotels.count(_.hasPoint))
  hotels.foreach(_.writeDelimitedTo(os))
  os.close()
}
