# Track CPU Utilization
resource "aws_cloudwatch_metric_alarm" "cpu-alarm" {
  alarm_name = "${var.name}-${var.environment}-${var.cluster_id}-cpu-usage"
  comparison_operator = "GreaterThanOrEqualToThreshold"
  evaluation_periods = "1"
  metric_name = "CPUUtilization"
  namespace = "AWS/EC2"
  period = "300" # 5 minutes
  statistic = "Average"
  threshold = "85"
  alarm_description = "This metric monitor Grid Windows 10 ec2 cpu utilization"

  dimensions = {
    AutoScalingGroupName = "${aws_autoscaling_group.node.name}"
  }

  alarm_actions = ["${var.sns_arn_low}"] # Trigger Low Priority PD Incident
  ok_actions    = ["${var.sns_arn_low}"] # Resolve Low Priority PD Incident
}

# Track Instance & System Reachability
# Ensure there are not 2 Status Check Failures over 2 Minutes
resource "aws_cloudwatch_metric_alarm" "status-check" {
  alarm_name = "${var.name}-${var.environment}-${var.cluster_id}-cpu-usage-status-check"
  metric_name = "StatusCheckFailed" # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ec2-metricscollected.html
  namespace = "AWS/EC2"
  comparison_operator = "GreaterThanOrEqualToThreshold"
  threshold = "1" # 1 = failing, 0 = passing
  evaluation_periods = "2" # Over 2 periods
  period = "60" # 1 minute per period (2 minutes total)
  statistic = "Sum"
  alarm_description = "This metric tracks instance and system reachability"

  dimensions = {
    AutoScalingGroupName = "${aws_autoscaling_group.node.name}"
  }

  alarm_actions = ["${var.sns_arn_high}"] # Trigger PD Incident
  ok_actions    = ["${var.sns_arn_high}"] # Resolve PD Incident
}

# Alarm for Disk Free Space at a low priority
resource "aws_cloudwatch_metric_alarm" "disk_free_space" {
  alarm_name = "${var.name}-${var.environment}-${var.cluster_id}-disk-free"
  metric_name = "LogicalDisk % Free Space"
  namespace = "Windows/Grid"
  comparison_operator = "LessThanOrEqualToThreshold"
  threshold = "15" # as a percentage
  evaluation_periods = "1"
  period = "3600" # 1 hour
  statistic = "Minimum"
  alarm_description = "This metric tracks Disk Free Space as a percentage"

  dimensions = {
    AutoScalingGroupName = "${aws_autoscaling_group.node.name}"
  }

  alarm_actions = ["${var.sns_arn_low}"] # Trigger PD Incident
  ok_actions    = ["${var.sns_arn_low}"] # Resolve PD Incident
}

# Alarm for Critical Disk Free Space at a high priority
resource "aws_cloudwatch_metric_alarm" "disk_free_space_critical" {
  alarm_name = "${var.name}-${var.environment}-${var.cluster_id}-disk-free-critical"
  metric_name = "LogicalDisk % Free Space"
  namespace = "Windows/Grid"
  comparison_operator = "LessThanOrEqualToThreshold"
  threshold = "10" # as a percentage
  evaluation_periods = "1"
  period = "3600" # 1 hour
  statistic = "Minimum"
  alarm_description = "This metric tracks Disk Free Space as a percentage"

  dimensions = {
    AutoScalingGroupName = "${aws_autoscaling_group.node.name}"
  }

  alarm_actions = ["${var.sns_arn_high}"] # Trigger PD Incident
  ok_actions    = ["${var.sns_arn_high}"] # Resolve PD Incident
}
