package controllers.suggest

import java.util
import java.util.concurrent.ConcurrentHashMap

import ru.yandex.extdata.common.meta.DataType
import ru.yandex.tours.direction.DirectionsStats
import ru.yandex.tours.extdata.CompositeDataDef
import ru.yandex.tours.geo.base.region.Tree
import ru.yandex.tours.geo.base.{RegionBase, TreeBase}
import ru.yandex.tours.util.Collections._
import ru.yandex.tours.util.collections.MappedArray
import ru.yandex.tours.util.io.ByteBuffers
import ru.yandex.tours.util.parsing.{DoubleValue, IntValue, Tabbed}
import ru.yandex.tours.util.text.StringNormalizer
import ru.yandex.tours.util.{IO, Logging}
import shapeless._

import scala.annotation.tailrec
import scala.collection.mutable
import scala.reflect.ClassTag

/**
 * Author: Vladislav Dolbilov (darl@yandex-team.ru)
 * Created: 27.11.15
 */
class GeoSuggest[ID : ClassTag, R <: RegionBase[ID], T <: TreeBase[ID, R]]
  (tree: T, idParser: String => ID, directionPriority: ID => Double) extends Logging {

  private case class Row(shingle: String, regionId: ID, weight: Float)

  private def shinglesOf(text: String): Array[(String, Float)] = {
    for {
      (word, idx) <- text.split(" ").zipWithIndex
      w = if (idx == 0) 1f else 0.8f
      shingle <- word.take(1) +: word.take(2) +: word.sliding(3).toSeq
    } yield GeoSuggest.intern(shingle) -> w
  }

  private def norm(shingles: Seq[(String, Float)]) = {
    for ((shingle, w) <- shingles.toMultiMap)
      yield shingle -> w.max
  }

  protected def regionNames(region: R): Seq[(String, Float)] = {
    Seq(
      region.name.ruName -> 1f,
      region.id.toString -> 1.2f
    ) ++ region.allNames.toSeq.map(_ -> 0.5f)
  }

  private def regionShingles(region: R) = {
    for {
      (name, w0) <- regionNames(region)
      normalized = StringNormalizer.normalizeString(name)
      if normalized.matches("[a-zа-яё0-9 ]*")
      (shingle, w) <- shinglesOf(normalized)
    } yield (shingle, w * w0)
  }

  private val mapped: MappedArray[Row] = {
    val arr = (for {
      region <- tree.regions.par
      (shingle, w) <- norm(regionShingles(region))
    } yield Row(shingle, region.id, w)).seq.toArray
    util.Arrays.sort(arr, Ordering.by[Row, String](_.shingle))

    val file = IO.usingTmp("admin_geo_suggest") { os =>
      MappedArray.writeArray[Row](arr, None, {
        case Row(shingle, id, w) => Tabbed(shingle, id, w).getBytes
      }, os)
    }

    MappedArray.apply(ByteBuffers.mmap(file)) { bb =>
      val line = IO.writeString { out =>
        ByteBuffers.copyBufferToStream(out, bb, 0, bb.limit)
      }
      line match {
        case Tabbed(shingle, id, DoubleValue(w)) =>
          Row(shingle, idParser(id), w.toFloat)
      }
    }
  }

  private def pairsFor(shingle: String): Iterator[(ID, Float)] = {
    if (mapped.isEmpty) return Iterator.empty
    val idx = binarySearch(shingle, 0, mapped.size)
    if (idx < 0) Iterator.empty
    else {
      Iterator.range(idx, mapped.size)
        .takeWhile(i => mapped.get(i).shingle == shingle)
        .map(i => mapped.get(i).regionId -> mapped.get(i).weight)
    }
  }

  /**
   * in contrast to `util.Arrays.binarySearch` returns index of first matched element instead of random one
   */
  @tailrec
  private def binarySearch(shingle: String, start: Int, end: Int): Int = {
    if (start >= end) return start
    val position = (end + start) / 2

    val Row(s, _, _) = mapped.get(position)
    val cmp = s.compareTo(shingle)

    if (cmp < 0) binarySearch(shingle, position + 1, end)
    else binarySearch(shingle, start, position)
  }

  def size: Int = mapped.size

  def suggest(query: String, count: Int): Seq[R] = {
    val normalized = StringNormalizer.normalizeString(query)
    val bag = new mutable.HashMap[R, Float]()
    for {
      (shingle, w) <- norm(shinglesOf(normalized))
      (regionId, w2) <- pairsFor(shingle)
      region <- tree.region(regionId)
    } bag += (region -> (bag.getOrElse(region, 0f) + w * w2))

    bag.toVector
      .map { case (r, v) => r -> (v + directionPriority(r.id) / 10) }
      .sortBy(-_._2).map(_._1)
      .take(count)
  }
}

object GeoSuggest extends CompositeDataDef[YandexGeoSuggest, Tree :: DirectionsStats :: HNil] {
  override def dependsOn: Set[DataType] = Set(Tree.dataType)

  override def from(dependencies: Tree:: DirectionsStats :: HNil): YandexGeoSuggest = {
    val tree :: directionStats :: HNil = dependencies
    new YandexGeoSuggest(tree, IntValue.parse, directionStats.getPriority)
  }

  private val shingleInternMap = new ConcurrentHashMap[String, String]()
  private def intern(shingle: String): String = {
    val old = shingleInternMap.get(shingle)
    if (old ne null) return old
    shingleInternMap.putIfAbsent(shingle, shingle) match {
      case null => shingle
      case oldValue => oldValue
    }
  }
}