// Default provider in first region
provider "aws" {
  profile             = "twitch-feed-aws"
  allowed_account_ids = ["914569885343"]
  region              = "us-west-2"
}

// higher dynamodb_splits_counts will use more read capacity, so make sure you have room
// worker_count and dynamodb_splits_counts need to be in balance or you will have OOM errors when running
// you need a high enough dynamodb_splits_counts to the worker_count so there is enough data to distribute
// worker_count and dynamodb_splits_counts need to be in balance or you will have OOM errors when running
// worker_count is an arbitrary number based on how much parallelism you want. If the job is taking too long,
// increase worker count and appropriately adjust the dynamodb_splits_counts
//
// how to calculate worker_count and dynamodb_splits_counts. Does not need to be exact, can be close.
// dynamodb_splits_counts = 4 * ((worker_count - 1) * 2 - 1)
// i.e. 148 = 4 * ((20 - 1) * 2 - 1)

// any changes here should also be made in the qa job to test out
module "follows_tahoe_export" {
  source = "../modules/db-to-s3-glue"
  team   = "feeds"
  name   = "graphdb_production_follows"
  is_qa  = false

  schema_version               = 4
  dynamodb_read_capacity_ratio = 0.5
  worker_count                 = 20
  dynamodb_splits_count        = 148
  trigger_schedule             = "0 9 * * ? *" // trigger job once a day at 09:00 PST. Advised by data infra to run shortly after midnight
  api_key_parameter_name       = "followsdbexport-producer-api-key"
  api_key_kms_key_id           = "d896877f-fd04-4c4f-92c8-85baef26524c"
  tahoe_producer_name          = "followsdbexport"
  tahoe_producer_role_arn      = "arn:aws:iam::331582574546:role/producer-followsdbexport"
  create_s3_output_bucket      = 1
  create_s3_script_bucket      = 1

  tahoe_view_name = "follows"

  schema        = file("./follows_table_schema.json")
  output_fields = ["from_user_id", "target_user_id", "action", "notifs_on", "followed_on", "updated_on"]
  cleaning_code = file("./follows_spark_cleaning_code.py")
}

// qa jobs are created without triggers so no need to worry about the trigger_schedule
module "follows_tahoe_qa_export" {
  source = "../modules/db-to-s3-glue"
  team   = "feeds"
  name   = "graphdb_production_follows"
  is_qa  = true

  schema_version               = 4
  dynamodb_read_capacity_ratio = 0.5
  worker_count                 = 20
  dynamodb_splits_count        = 148
  trigger_schedule             = "0 9 * * ? *" // trigger job once a day at 09:00 PST. Advised by data infra to run shortly after midnight
  api_key_parameter_name       = "followsdbexport-producer-api-key"
  api_key_kms_key_id           = "d896877f-fd04-4c4f-92c8-85baef26524c"
  tahoe_producer_name          = "followsdbexport"
  tahoe_producer_role_arn      = "arn:aws:iam::331582574546:role/producer-followsdbexport"
  s3_output_kms_key_arn        = "arn:aws:kms:us-west-2:914569885343:key/7d718b25-6a9a-49bc-a312-a197a595d1be"
  create_s3_output_bucket      = 0
  create_s3_script_bucket      = 0

  tahoe_view_name = "follows"

  schema        = file("./follows_table_schema.json")
  output_fields = ["from_user_id", "target_user_id", "action", "notifs_on", "followed_on", "updated_on"]
  cleaning_code = file("./follows_spark_cleaning_code.py")
}
