resource "aws_sns_topic" "pagerduty_business_hours" {
  name = "video-coreservices-dns-logs-ingestion-pagerduty-business-hours-alarm"

  tags = local.tags
}

resource "aws_sns_topic_subscription" "pd_subscription" {
  topic_arn              = aws_sns_topic.pagerduty_business_hours.arn
  protocol               = "https"
  endpoint               = "https://events.pagerduty.com/integration/fb79deea275748acb67a80c0b88c7a7d/enqueue"
  endpoint_auto_confirms = true
}

# Detect if we're ingesting less than expected DNS log events at least 25m in the past hour
resource "aws_cloudwatch_metric_alarm" "dns_logs_ingestion_anomaly_too_low" {
  alarm_name          = "dns-resolver Logs Ingestion Volume Too Low Anomaly"
  comparison_operator = "LessThanLowerThreshold"
  evaluation_periods  = "12"
  datapoints_to_alarm = "5"
  threshold_metric_id = "ad1"
  alarm_description   = "dns-resolver log events ingestion volume below expected amount in the past hour. Check if dnstap is working on all dns-resolvers. Runbook: https://wiki.twitch.com/display/VID/dnstap+not+sending+logs+to+cloudwatch"

  alarm_actions = ["${aws_sns_topic.pagerduty_business_hours.arn}"]

  insufficient_data_actions = []

  metric_query {
    id          = "ad1"
    expression  = "ANOMALY_DETECTION_BAND(m1,2)"
    label       = "IncomingLogEvents (Expected)"
    return_data = "true"
  }

  metric_query {
    id          = "m1"
    return_data = "true"

    metric {
      metric_name = "IncomingLogEvents"
      namespace   = "AWS/Logs"
      period      = "300"
      stat        = "Sum"
      dimensions = {
        LogGroupName = "svclogs/dnstap"
      }
    }
  }

  tags = local.tags
}
