package ru.yandex.atom.periodic

import ru.yandex.atom.db.cassandra.CassandraComponent
import java.io.{File, FileWriter, BufferedWriter}
import com.typesafe.config.Config
import scala.collection.JavaConversions._
import spray.json.{JsObject, JsString, JsArray, CompactPrinter}
import ru.yandex.atom.utils.log.AtomLogger
import ru.yandex.atom.data.{NormalizedUrl, ReqID}
import scala.io.Source
import scala.concurrent.ExecutionContext.Implicits.global
import ru.yandex.atom.zookeeper.ZookeeperActorComponent
import ru.yandex.atom.utils.TimeUtil
import org.apache.zookeeper.CreateMode
import akka.util.Timeout
import concurrent._
import duration._
import org.apache.zookeeper.KeeperException.Code
import ru.yandex.atom.periodic.clean.{Task, CleanerComponent}

/**
 * @author avhaliullin
 */
trait TrieBuilderComponent extends ZookeeperSyncUtils {
  component: CassandraComponent
    with ZookeeperActorComponent
    with CleanerComponent =>

  def trieBuilder: TrieBuilder

  class TrieBuilder(config: TrieBuilderConfig) {
    val log = AtomLogger[TrieBuilder]()
    val jsonPrinter = CompactPrinter

    val zkPath = "/trie-build"

    def obtainLockAndBuild() {
      future {
        implicit val timeout = Timeout(config.taskPeriod / 2)
        val now = System.currentTimeMillis()
        val ts = TimeUtil.applyPeriods(config.taskPeriod, TimeUtil.periodsFrom(config.taskPeriod, 0L, now), 0L)
        val id = ReqID("trie builder", now.toString)

        zookeeperActor ! ZookeeperRequest.CreateRequest(id, zkPath, ZK.serializeString("Trie build locks"), CreateMode.PERSISTENT)

        val path = zkPath + "/" + ts.toString
        val lock = ZK.createRecover(id, path, ZK.serializeString(config.hostName), CreateMode.PERSISTENT, () => true) {
          case Code.NODEEXISTS => false
          case Code.NONODE =>
            log.warning(id, "ZK node {} not found, creating it", zkPath)
            ZK.create(id, zkPath, Array(), CreateMode.PERSISTENT)
            false
        }
        if (lock) {
          try {
            build(id, ts)
            log.info(id, "Trie build finished with no errors")
          } catch {
            case t: Throwable => log.error(id, t, "Failed to build trie")
          }
        } else {
          log.info(id, "Failed to obtain lock")
        }
      }
    }

    private def removeChildren(file: File) {
      Option(file.listFiles()).toIterable.flatten.foreach {
        file =>
          removeChildren(file)
          file.delete()
      }
    }

    private def build(id: ReqID, ts: Long) {
      import ru.yandex.atom.db.cassandra.querybuilder._


      log.info(id, "Trie build process started")

      val sourceFile = new File(config.workDir + "/trieSource." + ts)
      val trieDir = new File(config.workDir + "/trie")
      if (!trieDir.exists()) {
        trieDir.mkdir()
      } else {
        removeChildren(trieDir)
      }

      val sourceWriter = new TrieSourceWriter(new BufferedWriter(new FileWriter(sourceFile)))
      val writerF = cassandra.select(SELECT('id, 'host, 'main_mirror, 'urls) FROM 'url_lists).flatMap {
        iter =>
          iter.foldLeft(()) {
            (acc, it) =>
              it.foreach {
                row =>
                  val id = row.getLong("id")
                  val mm = row.getString("main_mirror")
                  val host = row.getString("host")
                  val urls = row.getSet("urls", classOf[String]).map(NormalizedUrl.importFromDB(host, mm, _))
                  sourceWriter.urls(id, urls)
              }
          }
      }
      try {
        Await.ready(writerF, config.readAllTO)
      } finally {
        sourceWriter.close()
      }
      log.info(id, "Building trie")

      val command = Array(config.trieCompilerPath,
        "-N", "structkey",
        "-S", "atomurls",
        "-i", sourceFile.getAbsolutePath,
        "-D", "atom-urls.trie",
        "-f", "4096,65536",
        "-n", "8",
        "-j"
      )
      val trieBuildProc = Runtime.getRuntime.exec(command, null, trieDir)
      val proOut = Source.fromInputStream(trieBuildProc.getInputStream)
      val procErr = Source.fromInputStream(trieBuildProc.getErrorStream)

      proOut.getLines().foreach {
        line =>
          log.info(id, "querydata_indexer out: " + line)
      }

      procErr.getLines().foreach {
        line =>
          log.info(id, "querydata_indexer err: " + line)
      }

      val procRes = trieBuildProc.waitFor()

      if (procRes != 0) {
        log.error(id, "Command '" + command.mkString(" ") + "' finished with return code " + procRes)
      } else {
        val finalDir = new File(config.workDir + "/trie-for-upload." + System.currentTimeMillis())
        trieDir.renameTo(finalDir)
        log.info(id, "querydata_index finished trie " + finalDir.getAbsolutePath)
      }
    }

    def cleanBefore = System.currentTimeMillis() - 1000 * 60 * 60 * 24

    scheduleCleaning(Task.cleanDir(log, "clean trie sources ", config.workDir, {
      file =>
        file.lastModified() < cleanBefore && file.getName.startsWith("trieSource.")
    }))

    scheduleCleaning(Task("clean trie locks", {
      id =>
        val cleanBefore = System.currentTimeMillis() - 1000 * 60 * 60 * 24
        implicit val to: Timeout = 20.seconds
        ZK.children(id, zkPath, (_, children) => children).foreach {
          name =>
            if (name.toLong < cleanBefore) {
              log.info(id, "Deleting ZK node {}", name)
              ZK.delete(id, zkPath + "/" + name, -1)
            }
        }
    }))

    class TrieSourceWriter(writer: BufferedWriter) {
      var lastIdOpt: Option[Long] = None
      var urlsAcc = Set[NormalizedUrl]()

      def urls(id: Long, urls: Iterable[NormalizedUrl]) {
        val lastId = lastIdOpt match {
          case Some(x) => x
          case None =>
            lastIdOpt = Some(id)
            id
        }

        if (lastId == id) {
          urlsAcc ++= urls
        } else {
          flush()
          urlsAcc = urls.toSet
          lastIdOpt = Some(id)
        }

      }

      def flush() {
        lastIdOpt.foreach {
          id =>
            val urlsJson = JsArray(urlsAcc.map {
              url =>
                JsObject("u" -> JsString(url.originalUrl.toExternalForm), "nu" -> JsString(url.normalizedUrl.toExternalForm))
            }.toList).toString(jsonPrinter)
            writer.write(s"#query\t$id\t$urlsJson\n")
        }
      }

      def close() {
        flush()
        writer.close()
      }
    }

  }

  case class TrieBuilderConfig(workDir: String, readAllTO: FiniteDuration, trieCompilerPath: String,
                               taskPeriod: FiniteDuration, hostName: String)

  object TrieBuilderConfig {

    import ru.yandex.atom.utils.config._

    implicit def apply(config: Config): TrieBuilderConfig = new TrieBuilderConfig(
      workDir = config.getString("workDir"),
      readAllTO = config.getFiniteDuration("readAllTimeout"),
      trieCompilerPath = config.getString("trieCompilerPath"),
      taskPeriod = config.getFiniteDuration("period"),
      hostName = config.getString("hostName")
    )
  }

}
