package ru.yandex.tours.util.naming

import java.net.URLDecoder
import java.util.regex.Pattern

import com.google.common.base.Charsets
import ru.yandex.tours.util.Collections._

/**
 * Author: Vladislav Dolbilov (darl@yandex-team.ru)
 * Created: 17.03.16
 */
trait HotelNameUtils {
  private val hotelTypePattern = Pattern.compile(
    "(отель|пансион|гостиница|гостевой дом|хостел|санаторий)",
    Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE
  )

  private val ExPattern = "(?i)(?u)(.*) \\((?:Ex|Бывш)\\.(.*)\\)".r
  private val AllInclusive = "(?i)(.*) - all inclusive$".r

  private def cleanExPrefix(name: String) = name.trim.stripPrefix("Ex. ").stripPrefix("Бывш. ")

  def hasHotelPrefix(name: String): Boolean = hotelTypePattern.matcher(name).find()

  def splitCamelCase(name: String): String = {
    if (name.isEmpty) return name
    val result = new StringBuilder(name.length)
    name.toCharArray.sliding(2).foreach {
      case Array(c) ⇒
        //skip
      case Array(a, b) ⇒
        result += a
        if (Character.isAlphabetic(a) && b.isUpper) {
          result += ' '
        }
    }
    result += name.last
    result.toString()
  }

  def extractExNames(name: String): List[String] = {
    name match {
      case ExPattern(originalName, exNames) =>
        originalName :: exNames.split(",").map(cleanExPrefix).toList
      case AllInclusive(n) => List(n)
      case _ => List(name)
    }
  }

  /** extract several names from single string */
  def splitNameToNames(name: String): List[String] = {
    for {
      name2 ← extractExNames(name)
      name3 ← List(name2, splitCamelCase(name2)).distinct
    } yield name3
  }

  /** split name by whitespace and try to glue:
    *  - one letter word with next word
    *  - equals words
    */
  def splitNameToWords(normalized: String): Array[String] = {
    val words = normalized.split(" ")
    if (words.isEmpty) return words
    var skip = false
    val result = Array.newBuilder[String]
    words.sliding(2).foreach {
      case _ if skip =>
        skip = false
      case Array(first, second) if first.length == 1 =>
        skip = true
        result += first + " " + second
      case Array(first, second) if first == second =>
        skip = true
        result += first + " " + second
      case Array(first, second) =>
        result += first
      case Array(single) =>
    }
    if (!skip) result += words.last
    result.result()
  }

  private val synonyms = Seq(
    "олимпия" -> "olympia",
    "de luxe" -> "deluxe",
    "hotel" -> "hotels",
    "resort" -> "resorts",
    "seher sun" -> "sehersun",
    "nil bahir" -> "nilbahir",
    "holiday village" -> "hv",
    "garden" -> "gardens",
    "suite" -> "suites",
    "butik" -> "boutique",
    "apartments" -> "apts",
    "apartments" -> "apartment",
    "apartments" -> "apt",
    "apartment" -> "apt",
    "apartment" -> "apts",
    "larissa" -> "larisa",
    "m c" -> "mc",
    "отель" -> "гостиница"
  ).toMultiMap
  private val inverseSynonyms = synonyms.inverse2

  private def getSynonyms(word: String): Seq[String] = {
    synonyms.getOrElse(word, inverseSynonyms.getOrElse(word, Seq.empty))
  }

  def generateSynonyms(words: Array[String]): Array[String] = {
    val twoWords = words.sliding(2).map(_.mkString(" ")).toArray
    val synonyms1 = words.flatMap(getSynonyms).flatMap(splitNameToWords)
    val synonyms2 = twoWords.flatMap(getSynonyms)
    words ++ synonyms1 ++ synonyms2
  }

  val Booking = "(?:https?://)?www\\.booking\\.com/hotel/([^/]+)/([^/]+)\\.html(?:\\?.*)?(?:\\#.*)?".r
  val Ostrovok = "(?:https?://)?ostrovok\\.ru/rooms/([^/]+)/?(?:\\?.*)?(?:\\#.*)?".r
  val Oktogo = "(?:https?://)?oktogo\\.ru/([^/]+)/?(?:\\?.*)?(?:\\#.*)?".r
  val LevelTravel = "(?:https?://)?level\\.travel/hotels/([0-9]+)-([^/]+)/?(?:\\?.*)?(?:\\#.*)?".r
  val Hotels101 = "(?:https?://)?(?:www\\.)?101hotels\\.ru/main/cities/([^/]+)/([^/]+).html(?:\\?.*)?".r
  val YandexMaps = "(?:https?://)?yandex\\.ru/maps/org/([^/]+)/([0-9]+)".r

  def extractNameFromUrl(url: String): String = url match {
    case Booking(country, name) => name.split("\\.").head
    case Ostrovok(name) => name
    case Oktogo(name) => name
    case LevelTravel(id, name) => URLDecoder.decode(name, Charsets.UTF_8.name())
    case Hotels101(city, name) => name
    case _ => ""
  }
}

object HotelNameUtils extends HotelNameUtils
