resource "aws_cloudwatch_metric_alarm" "cpu_utilization_validator" {
  alarm_name                = "cpu-utilization-validator"
  comparison_operator       = "GreaterThanOrEqualToThreshold"
  evaluation_periods        = "2"
  threshold                 = "80"
  alarm_description         = "validator CPUUtilization has exceeded 80%"
  insufficient_data_actions = []
  ok_actions                = ["${module.pagerduty.pd_high_urgency_arn}"]
  alarm_actions             = ["${module.pagerduty.pd_high_urgency_arn}"]

  metric_query {
    id          = "m1"
    return_data = true

    metric {
      metric_name = "CPUUtilization"
      namespace   = "AWS/ECS"
      period      = "120"
      stat        = "Sum"
      unit        = "Percent"

      dimensions = {
        ClusterName = "eventbus-${var.environment}"
        ServiceName = "controlplane-validator-${var.environment}"
      }
    }
  }
}

// Use produced CloudWatch application metrics to ensure validator
// does not stop performing validations.
resource "aws_cloudwatch_metric_alarm" "validation_throughput" {
  alarm_name                = "validation-throughput"
  comparison_operator       = "LessThanOrEqualToThreshold"
  evaluation_periods        = "3"
  threshold                 = "10" // (2/13/2020, nherson) there are >500 things to validate, so <10 is a serious problem
  alarm_description         = "validator is hung, not validating fast enough, or encountering too many non-OK outcomes"
  insufficient_data_actions = ["${module.pagerduty.pd_high_urgency_arn}"]
  ok_actions                = ["${module.pagerduty.pd_high_urgency_arn}"]
  alarm_actions             = ["${module.pagerduty.pd_high_urgency_arn}"]

  metric_query {
    id          = "m1"
    return_data = true

    metric {
      metric_name = "ValidationOk"
      namespace   = "eventbus-validator"
      period      = "300"
      stat        = "Sum"
      unit        = "Count"

      dimensions = {
        ClusterName = "eventbus-${var.environment}"
        ServiceName = "controlplane-validator-${var.environment}"

        Service  = "eventbus-validator"
        Region   = "us-west-2"
        Stage    = var.environment
        Substage = "primary"
      }
    }
  }
}