# Alarms when the cache cpu crosses 80% for 20 minutes
resource "aws_cloudwatch_metric_alarm" "cache_avg_cpu" {
  alarm_name          = "${var.team_code}-${var.name}-${var.environment}-cache-cpu"
  alarm_description   = "Monitors the average CPU of the ${var.name} redis cache"
  comparison_operator = "GreaterThanOrEqualToThreshold"
  evaluation_periods  = 20
  metric_name         = "CPUUtilization"
  namespace           = "AWS/ElastiCache"
  period              = 60
  statistic           = "Average"
  threshold           = 80
  alarm_actions       = [aws_sns_topic.pagerduty_alerts.arn]
  ok_actions          = [aws_sns_topic.pagerduty_alerts.arn]
  treat_missing_data  = "notBreaching"

  dimensions = {
    CacheClusterId = "${var.team_code}-perm-prd"
  }
}

# Alarms when the cache engine cpu crosses 80% for 20 minutes
resource "aws_cloudwatch_metric_alarm" "cache_engine_avg_cpu" {
  alarm_name          = "${var.team_code}-${var.name}-${var.environment}-cache-engine-cpu"
  alarm_description   = "Monitors the average engine CPU of the ${var.name} redis cache"
  comparison_operator = "GreaterThanOrEqualToThreshold"
  evaluation_periods  = 20
  metric_name         = "EngineCPUUtilization"
  namespace           = "AWS/ElastiCache"
  period              = 60
  statistic           = "Average"
  threshold           = 80
  alarm_actions       = [aws_sns_topic.pagerduty_alerts.arn]
  ok_actions          = [aws_sns_topic.pagerduty_alerts.arn]
  treat_missing_data  = "notBreaching"

  dimensions = {
    CacheClusterId = "${var.team_code}-perm-prd"
  }
}

# Alarms when the bytes used for the cache crosses 300,000,000 for 5 minutes
resource "aws_cloudwatch_metric_alarm" "avg_bytes_used" {
  alarm_name          = "${var.team_code}-${var.name}-${var.environment}-cache-bytes_used"
  alarm_description   = "Monitors the average bytes used in the ${var.name} redis cache"
  comparison_operator = "GreaterThanOrEqualToThreshold"
  evaluation_periods  = 5
  metric_name         = "BytesUsedForCache"
  namespace           = "AWS/ElastiCache"
  period              = 60
  statistic           = "Average"
  threshold           = 300000000
  alarm_actions       = [aws_sns_topic.pagerduty_alerts.arn]
  ok_actions          = [aws_sns_topic.pagerduty_alerts.arn]
  treat_missing_data  = "notBreaching"

  dimensions = {
    CacheClusterId = "${var.team_code}-perm-prd"
  }
}

# Alarms when the number of connections to the cache exceeds 2000 for 5 minutes
resource "aws_cloudwatch_metric_alarm" "avg_conns" {
  alarm_name          = "${var.team_code}-${var.name}-${var.environment}-cache-conns"
  alarm_description   = "Monitors the average number of connections to the ${var.name} redis cache"
  comparison_operator = "GreaterThanOrEqualToThreshold"
  evaluation_periods  = 5
  metric_name         = "CurrConnections"
  namespace           = "AWS/ElastiCache"
  period              = 60
  statistic           = "Average"
  threshold           = 2000
  alarm_actions       = [aws_sns_topic.pagerduty_alerts.arn]
  ok_actions          = [aws_sns_topic.pagerduty_alerts.arn]
  treat_missing_data  = "notBreaching"

  dimensions = {
    CacheClusterId = "${var.team_code}-perm-prd"
  }
}

# Alarms when the available freeable memory drops below 1 million bytes over 5 minutes
resource "aws_cloudwatch_metric_alarm" "avg_freeable_memory" {
  alarm_name          = "${var.team_code}-${var.name}-${var.environment}-cache-freeable-mem"
  alarm_description   = "Monitors the average available freeable memory in the ${var.name} redis cache"
  comparison_operator = "LessThanOrEqualToThreshold"
  evaluation_periods  = 5
  metric_name         = "FreeableMemory"
  namespace           = "AWS/ElastiCache"
  period              = 60
  statistic           = "Average"
  threshold           = 1000000
  alarm_actions       = [aws_sns_topic.pagerduty_alerts.arn]
  ok_actions          = [aws_sns_topic.pagerduty_alerts.arn]
  treat_missing_data  = "notBreaching"

  dimensions = {
    CacheClusterId = "${var.team_code}-perm-prd"
  }
}
