################
## Note: To use this module, you will need to install and configure https://git-aws.internal.justin.tv/feeds/terraform-provider-datapipeline.
#        That is because terraform does not currently support data pipelines.
#        You need go 1.9+ then run `go get code.justin.tv/feeds/terraform-provider-datapipeline/cmd/terraform-provider-datapipeline`
#        Then modify your ~/.terraformrc according to README.md in the provider git repository.
#
## Warning:  Twitch's default account VPCs may not work with EMR pipeline jobs.  You may need to make a special VPC
##           for these jobs.  Check out https://git-aws.internal.justin.tv/feeds/terraform-core/tree/master/terraform/environment/global
##           for examples of feed's making these VPC for their two accounts.
##
## How it works:  This just copies the default data pipeline setup.  It will copy your DynamoDB table to the s3 bucket
##                on a schedule.  The format is usually \n delimited plain text rows in a gzipped file.
##
## To restore:    Use AWS's DataPipeline UI to create a restore pipeline.  Point it to the directory for the date you want to restore.
##                I recommend restoring to a table different than the one you back up from, and switching your code to read from the
##                new table name.
################

variable region {
  description = "Which region the dynamodb table is in"
}

variable profile {
  description = "The AWS profile to use for aws clients to create the data pipeline"
}

variable subnet {
  description = "A subnet to run EMR jobs in.  Note: This needs to be more public than twitch's default private subnets"
}

variable days_per_backup {
  default     = 7
  description = "Number of days to wait between backups"
}

variable dynamodb_table_name {
  description = "Which dynamodb table to backup"
}

variable s3_backup_bucket {
  description = "The s3 bucket that contains the table backups"
}

variable s3_log_bucket {
  description = "The s3 bucket that contains emr job logs"
}

variable read_throughput_ratio {
  description = "Value 0.0 < x < 1.0 that is the % of total table read capacity to use"
  default     = ".2"
}

provider "datapipeline" {
  region  = "${var.region}"
  profile = "${var.profile}"
}

data "template_file" "data_pipeline" {
  template = "${file("${path.module}/data_pipeline.tpl")}"

  vars {
    subnet_id             = "${var.subnet}"
    days                  = "${var.days_per_backup}"
    log_bucket            = "${var.s3_log_bucket}"
    region                = "${var.region}"
    table_name            = "${var.dynamodb_table_name}"
    backup_s3_bucket      = "${var.s3_backup_bucket}"
    read_throughput_ratio = "${var.read_throughput_ratio}"
  }
}

resource datapipeline "backup_job" {
  name        = "terraform-backup-${var.dynamodb_table_name}"
  description = "Terraform scheduled backup of ${var.dynamodb_table_name}"
  definition  = "${data.template_file.data_pipeline.rendered}"
}
