# Error rate for httpserver performing DB operations
resource "aws_cloudwatch_metric_alarm" "httpserver_db_error_rate" {
  alarm_name                = "httpserver-db-error-rate"
  comparison_operator       = "GreaterThanOrEqualToThreshold"
  evaluation_periods        = "1"
  threshold                 = "5"
  alarm_description         = "httpserver database operation error rate exceeds 5%"
  insufficient_data_actions = []
  ok_actions                = ["${module.pagerduty.pd_low_urgency_arn}"]
  alarm_actions             = ["${module.pagerduty.pd_low_urgency_arn}"]

  metric_query {
    id = "httpserverErrorRate"

    metric {
      # This metric is fired on every DB operation,
      metric_name = "QueryError"
      namespace   = "eventbus-httpserver"
      period      = "300"
      stat        = "Average"
      unit        = "Count"

      dimensions = {
        Service  = "eventbus-httpserver"
        Region   = "us-west-2"
        Stage    = "${var.environment}"
        Substage = "primary"
      }
    }
  }

  metric_query {
    id          = "httpserverErrorPercent"
    return_data = true
    expression  = "httpserverErrorRate*100"
    label       = "Percent Errors"
  }
}

# Error rate for converger performing DB operations
resource "aws_cloudwatch_metric_alarm" "converger_db_error_rate" {
  alarm_name                = "converger-db-error-rate"
  comparison_operator       = "GreaterThanOrEqualToThreshold"
  evaluation_periods        = "1"
  threshold                 = "5"
  alarm_description         = "converger database operation error rate exceeds 5%"
  insufficient_data_actions = []
  ok_actions                = ["${module.pagerduty.pd_low_urgency_arn}"]
  alarm_actions             = ["${module.pagerduty.pd_low_urgency_arn}"]

  metric_query {
    id = "convergerErrorRate"

    metric {
      # This metric is fired on every DB operation,
      metric_name = "QueryError"
      namespace   = "eventbus-converger"
      period      = "300"
      stat        = "Average"
      unit        = "Count"

      dimensions = {
        Service  = "eventbus-converger"
        Region   = "us-west-2"
        Stage    = "${var.environment}"
        Substage = "primary"
      }
    }
  }

  metric_query {
    id          = "convergerErrorPercent"
    return_data = true
    expression  = "convergerErrorRate*100"
    label       = "Percent Errors"
  }
}

resource "aws_cloudwatch_metric_alarm" "db_cpu_utilization" {
  alarm_name                = "db-cpu-utilization"
  comparison_operator       = "GreaterThanOrEqualToThreshold"
  evaluation_periods        = "2"
  threshold                 = "60"
  alarm_description         = "controlplane database CPUUtilization greater than 60%"
  insufficient_data_actions = []
  ok_actions                = ["${module.pagerduty.pd_low_urgency_arn}"]
  alarm_actions             = ["${module.pagerduty.pd_low_urgency_arn}"]

  metric_query {
    id          = "cpuUtil"
    return_data = true

    metric {
      metric_name = "CPUUtilization"
      namespace   = "AWS/RDS"
      period      = "120"
      stat        = "Average"
      unit        = "Percent"

      dimensions = {
        DBClusterIdentifier = "eventbus-${var.environment}"
      }
    }
  }
}
