/* SSHDLoginFailures.scala */
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf

import org.json4s._
import org.json4s.jackson.JsonMethods._
import com.fasterxml.jackson.core.JsonParseException
import scala.util.matching.Regex

object SSHDLoginFailures {

  def main(args: Array[String]) {

    val keyID = args(0)
    val keySecret = args(1)

    val s3uri = s"s3n://$keyID:$keySecret@lumberjack-forest/*/*2015-03-*"

    val conf = new SparkConf().setAppName("SSHDLoginFailures - Scala")
    val sc = new SparkContext(conf)
    val lines = sc.textFile(s3uri)
    val nonempty = lines.filter(line => line != "")

    val jsonlines = nonempty.map(line => SyslogMessageParser.read(line)).filter(m => m.syslog_program != "?")
    val loginfailures = jsonlines.filter(m => isFailedPassword(m)).map(m => PasswordFailureConverter.convert(m))
    val counted = loginfailures.map(x => (x, 1L)).reduceByKey(_ + _)
    val sorted = counted.sortBy(x => x._2, false)
    
    //val output = loginfailures.filter(x => x.user == "?").collect()
    val output = sorted.collect()
    println("Result size:")
    println(output.length)
    for (l <- output) println(s"${l._2},${l._1.remote},${l._1.user},${l._1.host}")
  }

  def isFailedPassword(m: SyslogMessage): Boolean = {
    return m.syslog_program == "sshd" & m.syslog_message.startsWith("Failed password")
  }

}

case class SyslogMessage(syslog_message: String, syslog_program: String, host: String)
case class PasswordFailure(user: String, remote: String, host: String)

object PasswordFailureConverter {
  def convert(m: SyslogMessage): PasswordFailure = {
    val rexp = """^Failed password for ((invalid user )?[\w\.\_]+) from (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*$""".r
    return m.syslog_message match {
      case rexp(user, invalid, ip) => PasswordFailure(user, ip, m.host)
      case _ => PasswordFailure("?", "?", m.host)
    }
  }
}

object SyslogMessageParser {
  def read(raw: String): SyslogMessage = {
    implicit val formats = DefaultFormats
    try {
      return parse(raw).extractOrElse(SyslogMessage("?", "?", "?"))
    } catch {
      case jpe: JsonParseException => return SyslogMessage("?", "?", "?")
    }
  }

} 
