package tv.twitch.spark
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.Text
import org.apache.hadoop.dynamodb.DynamoDBItemWritable
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{SparkConf, SparkContext}
/* Importing DynamoDBInputFormat and DynamoDBOutputFormat */
import org.apache.hadoop.dynamodb.write.DynamoDBOutputFormat
import com.amazonaws.services.dynamodbv2.model.AttributeValue
import org.apache.spark.storage.StorageLevel
import org.apache.spark.sql.types._
import collection.JavaConverters._
import org.apache.spark.sql.functions._



/**
  * Created by slowenth on 12/2/16.
  */
object DynamoETL {

  case class FromObj (
    fromId: Int,
    rel_count: Int
                     )

  def Assoc2Dynamo(a:Assoc): DynamoDBItemWritable = {

    // TODO - parse out databags properly
    // Very cheap parser to parse out boolean databag only
//    val dbMap = a.data_bag.stripPrefix("{").stripSuffix("}").split(",").map{  s => s.split(":") match { case Array(k,v) => k.trim.stripPrefix("'").stripSuffix("'") -> new AttributeValue().withBOOL(if ( v.trim == "True") true else false ) } }.toMap.asJava

    val m = new java.util.HashMap[String,AttributeValue]()
    m.put("fromID", new  AttributeValue().withN(a.from_id.toString))
    m.put("toID", new  AttributeValue().withN(a.to_id.toString))
    m.put("creationDate", new AttributeValue().withS(a.creation_date))
//    m.put("dataBag", new AttributeValue().withM(dbMap))

    val dbi = new DynamoDBItemWritable()
    dbi.setItem(m)
    dbi
  }

  def Count2Dynamo( c: (Int,Int)): DynamoDBItemWritable = {
    val m = new java.util.HashMap[String,AttributeValue]()
    m.put("fromID", new  AttributeValue().withN(c._1.toString))
    m.put("rel_count", new  AttributeValue().withN(c._2.toString))

    val dbi = new DynamoDBItemWritable()
    dbi.setItem(m)
    dbi
  }


  def main(args: Array[String]): Unit = {

    val sourceURL = "jdbc:postgresql://cohesion-chat-staging-master.cifgffw7w2ar.us-west-2.rds.amazonaws.com/cohesion?user=chat_02&password=7mHRC0UBSzYgJM0YfJGE"
    val tablePrefix = "cohesion2"
    val assocKind = "moderates"
    val inverseAssocKind = "moderated_by"

    val baseTable = tablePrefix + "_" + assocKind
    val baseTableCounts = baseTable + "_counts"
    val inverseTableCounts = tablePrefix + "_" + inverseAssocKind + "_counts"

    val conf = new SparkConf()
    val sc =  new SparkContext(conf)

    val sqlContext = new HiveContext(sc)


    import sqlContext.implicits._

    val df = sqlContext.read.format("jdbc").options(Map(
      "url" -> sourceURL,
      "dbtable" -> "associations",
      "partitionColumn" -> "id",
      "lowerBound" -> "1",
      "upperBound" -> "30000000",
      "numPartitions" -> "1000"
    )).load

    df.persist(StorageLevel.DISK_ONLY)
    
    val ds = df.as[Assoc]

    val moderates = ds.filter( col("assoc_kind") === "moderates").rdd

    val hConf3 = new JobConf()
    hConf3.set("dynamodb.output.tableName",baseTable)
    hConf3.set("dynamodb.regionid","us-west-2")
    moderates.mapPartitions( p => p.map (a => (new Text(""),Assoc2Dynamo(a)) ))
      .saveAsHadoopFile("something", classOf[Text], classOf[DynamoDBItemWritable], classOf[DynamoDBOutputFormat], hConf3)

    val fromCount = moderates.map( f => (f.from_id,1)).reduceByKey(_+_, 100)
    val hConf1 = new JobConf()
    hConf1.set("dynamodb.output.tableName", baseTableCounts)
    hConf1.set("dynamodb.regionid","us-west-2")

    fromCount.mapPartitions( p => p.map (c => (new Text(""),Count2Dynamo(c)) ))
      .saveAsHadoopFile("something", classOf[Text], classOf[DynamoDBItemWritable], classOf[DynamoDBOutputFormat], hConf1)

    val toCount = moderates.map( f => (f.to_id,1)).reduceByKey(_+_, 100)

    val hConf2 = new JobConf()
    hConf2.set("dynamodb.output.tableName", inverseTableCounts)
    hConf2.set("dynamodb.regionid","us-west-2")

    toCount.mapPartitions( p => p.map (c => (new Text(""),Count2Dynamo(c)) ))
      .saveAsHadoopFile("something", classOf[Text], classOf[DynamoDBItemWritable], classOf[DynamoDBOutputFormat], hConf2)


  }



}
