resource "aws_sns_topic" "consul_alarm" {
  name = "consul-alarm-${var.aws_region}"
}

resource "aws_sns_topic_subscription" "pd_subscription" {
  topic_arn              = aws_sns_topic.consul_alarm.arn
  protocol               = "https"
  endpoint               = "https://events.pagerduty.com/integration/57079c6135914c1eab38af97726295a2/enqueue" #GSOC pagerduty
  endpoint_auto_confirms = true
}

# number of instances behind asg is <= 3
resource "aws_cloudwatch_metric_alarm" "asg_instance_too_low" {
  alarm_name          = "consul-${var.aws_region}-instances-too-low"
  metric_name         = "GroupInServiceInstances"
  namespace           = "AWS/AutoScaling"
  comparison_operator = "LessThanOrEqualToThreshold"
  threshold           = "3"
  evaluation_periods  = "2"
  period              = "300"
  statistic           = "Average"
  alarm_description   = "The number of healthy consul masters in ${var.aws_region} is too low. Runbook https://wiki.twitch.com/display/VID/GSOC+Consul+Runbook#GSOCConsulRunbook-MonitoredAspectsAWSSpecific"
  alarm_actions       = ["${aws_sns_topic.consul_alarm.arn}"]
  ok_actions          = ["${aws_sns_topic.consul_alarm.arn}"]

  dimensions = {
    AutoScalingGroupName = aws_autoscaling_group.cm_asg.name
  }
}


# alb 1 or no healthy hosts
resource "aws_cloudwatch_metric_alarm" "alb_unhealthy" {
  alarm_name          = "consul-alb-${var.aws_region}-failing"
  metric_name         = "HealthyHostCount"
  namespace           = "AWS/ApplicationELB"
  comparison_operator = "LessThanOrEqualToThreshold"
  threshold           = "1"
  evaluation_periods  = "3"
  period              = "300"
  statistic           = "Average"
  alarm_description   = "There are no consul masters behind the consul ALB in ${var.aws_region}. Runbook https://wiki.twitch.com/display/VID/GSOC+Consul+Runbook#GSOCConsulRunbook-MonitoredAspectsAWSSpecific"
  alarm_actions       = ["${aws_sns_topic.consul_alarm.arn}"]
  ok_actions          = ["${aws_sns_topic.consul_alarm.arn}"]

  dimensions = {
    TargetGroup  = element(split(":", aws_lb_target_group.consul_server_tg.arn), 5) #targetgroup/tg-name/tg-string-of-nums
    LoadBalancer = join("/", slice(split("/", aws_lb.consul_alb.arn), 1, 4))
  }
}

# lan nlb 1 or no healthy hosts
resource "aws_cloudwatch_metric_alarm" "nlb_unhealthy" {
  alarm_name          = "consul-nlb-${var.aws_region}-failing"
  metric_name         = "HealthyHostCount"
  namespace           = "AWS/NetworkELB"
  comparison_operator = "LessThanOrEqualToThreshold"
  threshold           = "1"
  evaluation_periods  = "3"
  period              = "300"
  statistic           = "Sum"
  alarm_description   = "There are no consul masters behind the consul NLB in ${var.aws_region}. Runbook https://wiki.twitch.com/display/VID/GSOC+Consul+Runbook#GSOCConsulRunbook-MonitoredAspectsAWSSpecific"
  alarm_actions       = ["${aws_sns_topic.consul_alarm.arn}"]
  ok_actions          = ["${aws_sns_topic.consul_alarm.arn}"]

  dimensions = {
    TargetGroup  = element(split(":", aws_lb_target_group.consul_nlb_tg.arn), 5) #targetgroup/tg-name/tg-string-of-nums
    LoadBalancer = join("/", slice(split("/", aws_lb.consul_nlb.arn), 1, 4))
  }
}

# wan nlb 1 or no health hosts
resource "aws_cloudwatch_metric_alarm" "nlb_wan_unhealthy" {
  alarm_name          = "consul-nlb-wan-${var.aws_region}-failing"
  metric_name         = "HealthyHostCount"
  namespace           = "AWS/NetworkELB"
  comparison_operator = "LessThanOrEqualToThreshold"
  threshold           = "1"
  evaluation_periods  = "3"
  period              = "300"
  statistic           = "Sum"
  alarm_description   = "There are no consul masters behind the consul-wan NLB in ${var.aws_region}. Runbook https://wiki.twitch.com/display/VID/GSOC+Consul+Runbook#GSOCConsulRunbook-MonitoredAspectsAWSSpecific"
  alarm_actions       = ["${aws_sns_topic.consul_alarm.arn}"]
  ok_actions          = ["${aws_sns_topic.consul_alarm.arn}"]

  dimensions = {
    TargetGroup  = element(split(":", aws_lb_target_group.consul_nlb_wan_tg.arn), 5) #targetgroup/tg-name/tg-string-of-nums
    LoadBalancer = join("/", slice(split("/", aws_lb.consul_nlb.arn), 1, 4))
  }
}

# vpce nlb 1 or no healthy hosts
resource "aws_cloudwatch_metric_alarm" "vpce_nlb_unhealthy" {
  alarm_name          = "consul-vpce-nlb-${var.aws_region}-failing"
  metric_name         = "HealthyHostCount"
  namespace           = "AWS/NetworkELB"
  comparison_operator = "LessThanOrEqualToThreshold"
  threshold           = "1"
  evaluation_periods  = "3"
  period              = "300"
  statistic           = "Sum"
  alarm_description   = "The target group of the consul NLB used for VPCe's in ${var.aws_region} has too few targets. Runbook https://wiki.twitch.com/display/VID/GSOC+Consul+Runbook#GSOCConsulRunbook-MonitoredAspectsAWSSpecific"
  alarm_actions       = ["${aws_sns_topic.consul_alarm.arn}"]
  ok_actions          = ["${aws_sns_topic.consul_alarm.arn}"]

  dimensions = {
    TargetGroup  = element(split(":", aws_lb_target_group.consul_vpce_nlb_tg.arn), 5) #targetgroup/tg-name/tg-string-of-nums
    LoadBalancer = join("/", slice(split("/", aws_lb.consul_vpce_nlb.arn), 1, 4))
  }
}
