ClientId: "solomon-alerting-test"

AuthConfig {
  TvmConfig {
    ClientId: 2016815
    Secret: "yav://sec-01dscwa6tj6pvxdb3tknwtn6x1/client_secret"
  }

  InternalAccess {
    Allow: "tvm-2010238"
  }
}

AuthConfig {
    OAuthConfig {
        Host: "solomon-test.yandex-team.ru"
    }
}

ClustersConfig: [
  {
    ClusterId: "sas"
    Addresses: [
      "solomon://metabase",
      "solomon://stockpile"
    ]
  }
]

# Global metabase client that will be use to resolve sensors cross all alert evaluation.
MetabaseClientConfig: {
  GrpcConfig: {
    # Metabase nodes for single cluster
    Addresses: "solomon://metabase:5710"
    # Thread pool that will be use to delivery grpc request to stockpile
    ThreadPoolName: "CpuLowPriority"
    # Limit response from metabase, huge response indicate that too many sensors resolved by
    # selector and it's lead to problem during load all of this lines from stockpile.
    MaxInboundMessageSize: {
      Value: 20,
      Unit: MEGABYTES
    }
    # Read request small enough, this limit will indicate about problem on executor side.
    MaxOutboundMessageSize: {
      Value: 1,
      Unit: MEGABYTES
    }
    ConnectTimeout: {
      Value: 5,
      Unit: SECONDS
    }
    ReadTimeout: {
      Value: 30,
      Unit: SECONDS
    }
    CircuitBreakerConfig: {
      FailureQuantileThreshold: 0.4
      ResetTimeout: {
        Value: 30,
        Unit: SECONDS
      }
    }
    # per endpoint on node inflight limiter
    limiter_config: [
      {
        min_rtt: { Value: 100, Unit: MILLISECONDS }
        min_limit: 1
        max_limit: 20
        rtt_tolerance: 5
        use_as_default: true
      }
    ]
  }
  FindCacheConfig: {
    RefreshInterval: {
      Value: 10,
      Unit: MINUTES
    }
  }
  UniqueLabelsCacheConfig: {
    ExpireTtl: {
      Value: 4,  # Half of the unroll interval
      Unit: MINUTES
    }
  }
}

# Global stockpile client that will be use to load sensors cross all alert evaluation.
StockpileClientConfig: {
  GrpcConfig: {
    # Stockpile nodes for single cluster
    Addresses: "solomon://stockpile:5700"
    # Thread pool that will be use to delivery grpc request to stockpile
    ThreadPoolName: "CpuLowPriority"
    # Limit response on single sensor loaded by executor.
    MaxInboundMessageSize: {
      Value: 20,
      Unit: MEGABYTES
    }
    # Read request small enough, this limit will indicate about problem on executor side.
    MaxOutboundMessageSize: {
      Value: 1,
      Unit: MEGABYTES
    }
    ReadTimeout: {
      Value: 25,
      Unit: SECONDS
    }
    CircuitBreakerConfig: {
      FailureQuantileThreshold: 0.4
      ResetTimeout: {
        Value: 30,
        Unit: SECONDS
      }
    }
    # per endpoint on node inflight limiter
    limiter_config: [
      {
        min_rtt: { Value: 100, Unit: MILLISECONDS }
        min_limit: 1
        max_limit: 10
        rtt_tolerance: 5
        use_as_default: true
      }
    ]
  }
}

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Persistance layer
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
KikimrConfig {
  ConnectionConfig {
    Endpoint: "ydb-ru-prestable.yandex.net:2135"
    Database: "/ru-prestable/solomon/development/solomon"

    tvm_auth {
      # TODO: use alerting user
      client_id: 2010238 # solomon-test
      client_secret: "yav://sec-01dq7m99vxbhb20ddbfdgszdhv/client_secret"
    }

    ThreadPoolName: "CpuLowPriority"

    MaxInboundMessageSize: { Value: 64, Unit: MEGABYTES }
    MaxOutboundMessageSize: { Value: 64, Unit: MEGABYTES }
    ConnectTimeout: { Value: 5, Unit: SECONDS }
    ReadTimeout: { Value: 25, Unit: SECONDS }
  }
  SchemaRoot: "/ru-prestable/solomon/development/solomon"
}

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Broker
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Responsibles:
# 1) Unroll multi alert into sub alerts
# 2) Assign alert evaluation to one of the node
# 3) Accept all user requests for particular project
# 4) Notify by evaluation status
BrokerConfig: {
  UnrollConfig: {
    # Interval between alert unrolling. Small interval required more resources.
    UnrollInterval: {
      Value: 8,
      Unit: MINUTES
    }
    # Timeout for unroll one multi alert
    UnrollTimeout: {
      Value: 30,
      Unit: SECONDS
    }
  }
  # Configuration for notification channels
  NotificationConfig: {
    # Define behaviour for retryable errors from external system. For example
    # for webhook notification channel http status code 5xx indicate that
    # request able retry after some times.
    RetryConfig: {
      # The number of retries allowed before a task can fail permanently.
      # If both TaskRetryLimit and TaskAgeLimit are specified, then one of
      # limits must be exceeded before a task can fail permanently.
      TaskRetryLimit: 0
      # The maximum age from the first attempt to execute a task after which
      # any new task failure can be permanent.
      # If both TaskRetryLimit and TaskAgeLimit are specified, then both
      # limits must be exceeded before a task can fail permanently.
      TaskAgeLimit: {
        Value: 10,
        Unit: MINUTES
      }
      # Controls the delay before the first retry. Subsequent retries will use this
      # value adjusted according to the RetryConfig#RetryDelayMultiplier.
      InitialDelay: {
        Value: 10,
        Unit: SECONDS
      }
      # Controls the change in retry delay. The retry delay of the previous call
      # is multiplied by the multiplier to calculate the retry delay for the next call.
      RetryDelayMultiplier: 2
      # MaxRetryDelay puts a limit on the value of the retry delay, so that the
      # RetryConfig#retryDelayMultiplier can't increase the retry delay
      # higher than this amount.
      MaxRetryDelay: {
        Value: 10,
        Unit: MINUTES
      }
    }
    # Common configuration for email notication channels
    EmailConfig: {
      # Max count messages waiting to send per notification channel
      SendQueueSize: 100
      # Path to template on file system with mustache format that define subhect part
      #PathToSubjectTemplate: "/Berkanavt/solomon/alert/template/email/subject.mustache"
      # Path to template on file system with mustache format that define subhect content
      #PathToContentTemplate: "/Berkanavt/solomon/alert/template/email/content.mustache"
      # Use as from part email
      From: "solomon-test@yandex-team.ru"
      MailClientConfig: {
        # https://wiki.yandex-team.ru/Pochta/mx/
        SmtpHost: "outbound-relay.yandex.net"
        SmtpPort: 25
        SmptConnectTimeout: {
          Value: 3,
          Unit: SECONDS
        }
      }
      ConnectionCount: 10
      ThreadPoolName: "CpuLowPriority"
    }
    # Common configuration for webhook notification channels
    WebhookConfig: {
      # Path to template on file system with mustache format that define body of POST request
      #PathToContentTemplate: "/Berkanavt/solomon/alert/template/webhook/content.mustache"

      # Use generic async http client for all server requests
      HttpClientConfig: {
        ConnectTimeout: {
          Value: 3,
          Unit: SECONDS
        }
        RequestTimeout: {
          Value: 10,
          Unit: SECONDS
        }
      }
    }
    JugglerConfig: {
      Address: "juggler-push.search.yandex.net:80"
      # 100 events packed into 1 batch, recommended by juggler team limit 100 batch request per seconds
      RateLimitEventsPerSecond: 5000
      CircuitBreakerFailureQuantileThreshold: 0.4
      CircuitBreakerResetTimeout: {
        Value: 30,
        Unit: SECONDS
      }
      ThreadPoolName: "CpuLowPriority"
    }
    TelegramConfig: {
      BotName: "SolomonTestMonitoringBot"
      Token: "yav://sec-01d2d2br12cq71kz5qd5bcx71q/SolomonTestMonitoringBot"
      # general rate limit for telegram bot
      RateLimitEventsPerSecond: 30
      # telegram allows 20 queries per minute -> 20/60 = 1/3
      RateLimitEventsPerSecondForGroup: 0.3
      # rate limit for specific chat
      RateLimitEventsPerChat: 1
      FetchPeriod: {
        Value: 5,
        Unit: SECONDS
      }
      ThreadPoolName: "CpuLowPriority"
      TtlDeletion : {
        Value: 3,
        Unit: HOURS
      }
    }
    YaChatsConfig {
      ThreadPoolName: "CpuLowPriority"
      Url: "https://botplatform.yandex.net"
      Token: "yav://sec-01dmjfzn46m7wfs725tdmwz12w/oauth_chats"
    }
    ChartsClientConfig: {
      RequestTimeout: {
        Value: 30,
        Unit: SECONDS
      }
      Token: "yav://sec-01cjs7ct10ppxrqejvxba8pq3w/Token"
    }
    Address: "https://solomon-test.yandex-team.ru"
  }
}

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Executor subsistem
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Executor responsibles:
# 1) Pereodically evaluate assigned to node alerts
# 2) Report about evaluation state to broker assigned alert
ExecutorConfig: {
  # Interval between particular alert evaluation. Small interval required more resources.
  EvalInterval: {
    Value: 1,
    Unit: MINUTES
  }
  # Timeout on evaluate partucular alert. Evaluation include resolve sensors via metabase,
  # load sensors from stockpile.
  AlertTimeout: {
    Value: 30,
    Unit: SECONDS
  }
  # Max count alert executes at the same time. This parameter allow avoid OOM, when each
  # alert load from stockpile a lot's of points.
  MaxConcurrentAlert: 2000
  # Max count warmup alerts(first time execute after assign) at the same time
  MaxConcurrentWarmupAlert: 20
  # Evaluagion lag can occurs becase alerts not evaluates already many time for example because
  # service was shutdown or because another sistem reply to slow. This parameter allow skip
  # some times to be able more faster achive current time. By default limit equal to 5 hource
  MaxEvaluationLag: {
    Value: 30,
    Unit: MINUTES
  }
  AlertingStatusesShardConfig: {
    Project: "solomon",
    Cluster: "testing",
    Service: "alerting_statuses"
  }
}

StaffClientConfig {
  Url: "https://staff-api.yandex-team.ru"
  CacheTtl: {
    Value: 1,
    Unit: HOURS
  }
  OAuthToken: "yav://sec-01cjs7ct10ppxrqejvxba8pq3w/OAuthToken"
}

YasmsClientConfig {
  Url: "http://phone-passport-test.yandex.ru"
  Sender: "solomon"
}

AmbryClientConfig {
  Url: "https://yasm-prestable.yandex-team.ru/srvambry"
  ThreadPoolName: "CpuLowPriority"
}

NotifyConfig: {
  Host: "https://yc-tools.yandex-team.ru/notify"

  HttpClientConfig: {
    ConnectTimeout: {
      Value: 3,
      Unit: SECONDS
    }
    RequestTimeout: {
      Value: 10,
      Unit: SECONDS
    }
  }

  oauth_token: "yav://sec-01cjs7ct10ppxrqejvxba8pq3w/Token"
}

# http server for expose sensors
HttpServerConfig: {
  Bind: "::"
  Port: 4530
  ThreadsCount: 2
}

RpcServerConfig {
  GrpcServerConfig: {
    Port: 8799
    ThreadPoolName: "CpuLowPriority"
  }
  InternalGrpcServerConfig: {
    Port: 8800
    ThreadPoolName: "CpuLowPriority"
    MaxMessageSize: { Value: 64, Unit: MEGABYTES }
  }
}

AlertingDiscoveryConfig {
  GrpcClientConfig: {
    Addresses: ["solomon://alerting"]
    ThreadPoolName: "CpuLowPriority"
    CircuitBreakerConfig: {
      FailureQuantileThreshold: 0.4
      ResetTimeout: {
        Value: 30,
        Unit: SECONDS
      }
    }
    MaxInboundMessageSize: {
      Value: 64,
      Unit: MEGABYTES
    }
    MaxOutboundMessageSize: {
      Value: 64,
      Unit: MEGABYTES
    }
  }
  RefreshInterval: { Value: 5, Unit: MINUTES }
  ThreadPoolName: "CpuLowPriority"
}

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Thread pools
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Specifies a thread pools able to use by application.
ThreadPoolsConfig {
  ThreadPools: [
    {
      Name: "CpuLowPriority"
      Threads: 10
      ThreadPoolType: FORK_JOIN
    },
    {
      Name: "CpuHighPriority"
      Threads: 4
    },
    {
      Name: "Io"
      Threads: 4
    },
    {
      Name: "Scheduler"
      Threads: 4
    }
  ]
}

LoggingConfig {
  LogTo: FILE
  LogFile: "/logs/alerting.log"
  Loggers: [
    {
      Name: "root"
      Level: INFO
    },
    {
      Name: "ru.yandex"
      Level: INFO
    },
    {
      Name: "access"
      Level: INFO
    }
  ]
}

TaskSchedulerConfig {
  ThreadPoolName: "CpuLowPriority"
}
