provider "aws" {}

# Pagerduty setup
resource "aws_sns_topic" "pagerduty" {
  name = "video-coreservices-dns-servfail-pagerduty-alarm-${var.pop}"

  tags = var.tags
}

resource "aws_sns_topic_subscription" "pd_subscription" {
  topic_arn              = aws_sns_topic.pagerduty.arn
  protocol               = "https"
  endpoint               = "https://events.pagerduty.com/integration/40834d4b44754cb7a3d65da93914e85a/enqueue"
  endpoint_auto_confirms = true
}

locals {
  metric_name = "ivs-domains-servfail-${var.pop}"
  namespace   = "unbound/servfail"
}

# Filter metrics
resource "aws_cloudwatch_log_metric_filter" "servfail" {
  name           = local.metric_name
  pattern        = <<EOS
[month, date, time, host="*${var.pop}*", process, line, error, failure=SERVFAIL, domain="*.live-video.a2z.com*" || domain="*.justin.tv*" || domain="*.live-video.net*" || domain="*.ttvnw.net*", etc]
EOS
  log_group_name = "/vidcs/unbound"

  metric_transformation {
    name          = local.metric_name
    namespace     = local.namespace
    value         = "1"
    default_value = "0"
  }
}


# Detect if we're experiencing more than expected SERVFAILs events at least 5 times in the past 10 mins
resource "aws_cloudwatch_metric_alarm" "dns_logs_ingestion_anomaly_too_low" {
  alarm_name          = "dns-resolver ${var.pop} Servfails for IVS domains abnormally high"
  comparison_operator = "GreaterThanUpperThreshold"
  evaluation_periods  = "10"
  datapoints_to_alarm = "5"
  threshold_metric_id = "ad1"
  alarm_description   = "dns-resolver (${var.pop}) SERVFAILs for IVS domains abnormally high. Runbook: https://wiki.twitch.com/display/VID/Video+nagios+alerts#Videonagiosalerts-dns-resolver(pop)SERVFAILsforIVSdomainsabnormallyHigh"

  alarm_actions = ["${aws_sns_topic.pagerduty.arn}"]

  insufficient_data_actions = []

  metric_query {
    id          = "ad1"
    expression  = "ANOMALY_DETECTION_BAND(m1,5)"
    label       = "SERVFAILs (Expected)"
    return_data = "true"
  }

  metric_query {
    id          = "m1"
    return_data = "true"

    metric {
      metric_name = local.metric_name
      namespace   = local.namespace
      period      = "60"
      stat        = "Sum"
    }
  }

  tags = var.tags
}
