locals {
  # The members that run dhcp
  ib_dhcp_ips = {
    cmh01 = "10.27.40.25"
    iad06 = "10.37.12.25"
  }
  # All members
  ib_ips = {
    cmh01 = "10.27.40.25"
    lhr05 = "10.31.12.25"
    iad06 = "10.37.12.25"
  }

}

resource "aws_sns_topic" "ib_sns_topic" {
  name = "infoblox-cw-alarm-topic"
}

resource "aws_sns_topic_subscription" "pd_subscription" {
  topic_arn              = aws_sns_topic.ib_sns_topic.arn
  protocol               = "https"
  endpoint               = "https://events.pagerduty.com/integration/40834d4b44754cb7a3d65da93914e85a/enqueue"
  endpoint_auto_confirms = true
}

resource "aws_sns_topic" "ib_sns_bh_topic" {
  name = "infoblox-cw-alarm-business-hours-topic"
}

resource "aws_sns_topic_subscription" "pd_subscription_bh" {
  topic_arn              = aws_sns_topic.ib_sns_bh_topic.arn
  protocol               = "https"
  endpoint               = "https://events.pagerduty.com/integration/fb79deea275748acb67a80c0b88c7a7d/enqueue"
  endpoint_auto_confirms = true
}

resource "aws_sns_topic" "ib_sns_gsoc_topic" {
  name = "infoblox-cw-alarm-gsoc-topic"
}

resource "aws_sns_topic_subscription" "pd_subscription_gsoc" {
  topic_arn              = aws_sns_topic.ib_sns_gsoc_topic.arn
  protocol               = "https"
  endpoint               = "https://events.pagerduty.com/integration/ab6fa97f65074aaa8b5ced5ac5365fb0/enqueue"
  endpoint_auto_confirms = true
}

resource "aws_cloudwatch_metric_alarm" "ib_dhcp_discover_rate" {
  for_each            = local.ib_dhcp_ips
  alarm_name          = "Infoblox ${each.key} DHCP Discover Rate"
  comparison_operator = "GreaterThanThreshold"
  threshold           = "1000"
  evaluation_periods  = "3"
  alarm_description   = "Infoblox Appliance ${each.key} DHCPDISCOVER rate too high. Runbook https://wiki.twitch.com/display/VID/Infoblox+DORA+Traffic+too+high"
  alarm_actions       = [aws_sns_topic.ib_sns_gsoc_topic.arn]
  ok_actions          = [aws_sns_topic.ib_sns_gsoc_topic.arn]
  actions_enabled     = var.enable_monitoring

  metric_query {
    id          = "e1"
    expression  = "RATE(m1)*PERIOD(m1) /60"
    return_data = true
  }

  metric_query {
    id = "m1"
    metric {
      namespace   = "Infoblox"
      metric_name = "ibDhcpTotalNoOfDiscovers.0"
      period      = "3600"
      stat        = "Average"
      dimensions = {
        ip = each.value
      }
    }
  }

}

resource "aws_cloudwatch_metric_alarm" "ib_dhcp_service" {
  for_each            = local.ib_dhcp_ips
  alarm_name          = "Infoblox ${each.key} DHCP Service"
  metric_name         = "ibServiceStatus.dhcp"
  namespace           = "Infoblox"
  comparison_operator = "GreaterThanThreshold"
  threshold           = "1"
  evaluation_periods  = "3"
  period              = "300"
  statistic           = "Average"
  alarm_description   = "Infoblox Appliance ${each.key} DHCP service affected. Runbook https://wiki.twitch.com/display/VID/Infoblox+POP+DHCP+Service"
  alarm_actions       = [aws_sns_topic.ib_sns_bh_topic.arn]
  ok_actions          = [aws_sns_topic.ib_sns_bh_topic.arn]
  actions_enabled     = var.enable_monitoring
  dimensions = {
    ip = each.value
  }
}

resource "aws_cloudwatch_composite_alarm" "ib_all_dhcp_down" {
  alarm_description = "Both DHCP failover members are reporting their DHCP service is down. Runbook https://wiki.twitch.com/display/VID/Infoblox+POP+DHCP+Service"
  alarm_name        = "Infoblox all DHCP Services down"
  actions_enabled   = var.enable_monitoring
  alarm_actions     = [aws_sns_topic.ib_sns_topic.arn]
  ok_actions        = [aws_sns_topic.ib_sns_topic.arn]

  alarm_rule = "ALARM(\"${aws_cloudwatch_metric_alarm.ib_dhcp_service["iad06"].alarm_name}\") AND ALARM(\"${aws_cloudwatch_metric_alarm.ib_dhcp_service["cmh01"].alarm_name}\")"

}

resource "aws_cloudwatch_metric_alarm" "ib_member_status" {
  for_each            = local.ib_ips
  alarm_name          = "Infoblox ${each.key} Member Status"
  metric_name         = "ibNodeServiceStatus.node-status"
  namespace           = "Infoblox"
  comparison_operator = "GreaterThanThreshold"
  threshold           = "1"
  evaluation_periods  = "3"
  period              = "300"
  statistic           = "Average"
  alarm_description   = "Infoblox Appliance ${each.key} has lost membership. Runbook https://wiki.twitch.com/display/VID/Infoblox+POP+Member+Status"
  alarm_actions       = [aws_sns_topic.ib_sns_topic.arn]
  ok_actions          = [aws_sns_topic.ib_sns_topic.arn]
  actions_enabled     = var.enable_monitoring
  dimensions = {
    ip = each.value
  }
}

resource "aws_cloudwatch_metric_alarm" "ib_dns_service" {
  for_each            = local.ib_ips
  alarm_name          = "Infoblox ${each.key} DNS Service"
  metric_name         = "ibServiceStatus.dns"
  namespace           = "Infoblox"
  comparison_operator = "GreaterThanThreshold"
  threshold           = "1"
  evaluation_periods  = "3"
  period              = "300"
  statistic           = "Average"
  alarm_description   = "Infoblox Appliance ${each.key} DNS service affected. Runbook https://wiki.twitch.com/display/VID/Infoblox+POP+DNS+Service"
  alarm_actions       = [aws_sns_topic.ib_sns_topic.arn]
  ok_actions          = [aws_sns_topic.ib_sns_topic.arn]
  actions_enabled     = var.enable_monitoring
  dimensions = {
    ip = each.value
  }
}

resource "aws_cloudwatch_metric_alarm" "ib_db_usage" {
  for_each            = local.ib_ips
  alarm_name          = "Infoblox ${each.key} Database Usage"
  metric_name         = "ibNodeServiceStatus.db-object"
  namespace           = "Infoblox"
  comparison_operator = "GreaterThanThreshold"
  threshold           = "1"
  evaluation_periods  = "3"
  period              = "300"
  statistic           = "Average"
  actions_enabled     = var.enable_monitoring
  alarm_description   = "Infoblox Appliance ${each.key} Database Usage above threshold. Runbook https://wiki.twitch.com/display/VID/Infoblox+POP+DB+Usage"
  alarm_actions       = [aws_sns_topic.ib_sns_bh_topic.arn]
  ok_actions          = [aws_sns_topic.ib_sns_bh_topic.arn]

  dimensions = {
    ip = each.value
  }
}
