Files
2026-03-12 15:17:52 +07:00

276 lines
8.0 KiB
JSON

{
"alerts": [
{
"alert": "HighLatency",
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{service=\"payment-service\"}[5m])) > 0.5",
"for": "5m",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "High request latency detected",
"description": "95th percentile latency is {{ $value }}s for payment-service",
"runbook_url": "https://runbooks.company.com/high-latency"
},
"historical_data": {
"fires_per_day": 2.5,
"false_positive_rate": 0.15,
"average_duration_minutes": 12
}
},
{
"alert": "ServiceDown",
"expr": "up{service=\"payment-service\"} == 0",
"labels": {
"severity": "critical",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "Payment service is down",
"description": "Payment service has been down for more than 1 minute",
"runbook_url": "https://runbooks.company.com/service-down"
},
"historical_data": {
"fires_per_day": 0.1,
"false_positive_rate": 0.05,
"average_duration_minutes": 3
}
},
{
"alert": "HighErrorRate",
"expr": "sum(rate(http_requests_total{service=\"payment-service\",code=~\"5..\"}[5m])) / sum(rate(http_requests_total{service=\"payment-service\"}[5m])) > 0.01",
"for": "2m",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "High error rate detected",
"description": "Error rate is {{ $value | humanizePercentage }} for payment-service",
"runbook_url": "https://runbooks.company.com/high-error-rate"
},
"historical_data": {
"fires_per_day": 1.8,
"false_positive_rate": 0.25,
"average_duration_minutes": 8
}
},
{
"alert": "HighCPUUsage",
"expr": "rate(process_cpu_seconds_total{service=\"payment-service\"}[5m]) * 100 > 80",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "High CPU usage",
"description": "CPU usage is {{ $value }}% for payment-service"
},
"historical_data": {
"fires_per_day": 15.2,
"false_positive_rate": 0.8,
"average_duration_minutes": 45
}
},
{
"alert": "HighMemoryUsage",
"expr": "process_resident_memory_bytes{service=\"payment-service\"} / process_virtual_memory_max_bytes{service=\"payment-service\"} * 100 > 85",
"labels": {
"severity": "info",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "High memory usage",
"description": "Memory usage is {{ $value }}% for payment-service"
},
"historical_data": {
"fires_per_day": 8.5,
"false_positive_rate": 0.6,
"average_duration_minutes": 30
}
},
{
"alert": "DatabaseConnectionPoolExhaustion",
"expr": "db_connections_active{service=\"payment-service\"} / db_connections_max{service=\"payment-service\"} > 0.9",
"for": "1m",
"labels": {
"severity": "critical",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "Database connection pool near exhaustion",
"description": "Connection pool utilization is {{ $value | humanizePercentage }}",
"runbook_url": "https://runbooks.company.com/db-connections"
},
"historical_data": {
"fires_per_day": 0.3,
"false_positive_rate": 0.1,
"average_duration_minutes": 5
}
},
{
"alert": "LowTraffic",
"expr": "sum(rate(http_requests_total{service=\"payment-service\"}[5m])) < 10",
"for": "10m",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "Unusually low traffic",
"description": "Request rate is {{ $value }} RPS, which is unusually low"
},
"historical_data": {
"fires_per_day": 12.0,
"false_positive_rate": 0.9,
"average_duration_minutes": 120
}
},
{
"alert": "HighLatencyDuplicate",
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{service=\"payment-service\"}[5m])) > 0.5",
"for": "5m",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "High request latency detected (duplicate)",
"description": "95th percentile latency is {{ $value }}s for payment-service"
},
"historical_data": {
"fires_per_day": 2.5,
"false_positive_rate": 0.15,
"average_duration_minutes": 12
}
},
{
"alert": "VeryLowErrorRate",
"expr": "sum(rate(http_requests_total{service=\"payment-service\",code=~\"5..\"}[5m])) / sum(rate(http_requests_total{service=\"payment-service\"}[5m])) > 0.001",
"labels": {
"severity": "info",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "Error rate above 0.1%",
"description": "Error rate is {{ $value | humanizePercentage }}"
},
"historical_data": {
"fires_per_day": 25.0,
"false_positive_rate": 0.95,
"average_duration_minutes": 5
}
},
{
"alert": "DiskUsageHigh",
"expr": "disk_usage_percent{service=\"payment-service\"} > 85",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "Disk usage high",
"description": "Disk usage is {{ $value }}%"
},
"historical_data": {
"fires_per_day": 3.2,
"false_positive_rate": 0.4,
"average_duration_minutes": 240
}
}
],
"services": [
{
"name": "payment-service",
"type": "api",
"criticality": "critical",
"team": "payments"
},
{
"name": "user-service",
"type": "api",
"criticality": "high",
"team": "identity"
},
{
"name": "notification-service",
"type": "api",
"criticality": "medium",
"team": "communications"
}
],
"alert_routing": {
"routes": [
{
"match": {
"severity": "critical"
},
"receiver": "pager-critical",
"group_wait": "10s",
"group_interval": "1m",
"repeat_interval": "5m"
},
{
"match": {
"severity": "warning"
},
"receiver": "slack-warnings",
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "1h"
},
{
"match": {
"severity": "info"
},
"receiver": "email-info",
"group_wait": "2m",
"group_interval": "10m",
"repeat_interval": "24h"
}
]
},
"receivers": [
{
"name": "pager-critical",
"pagerduty_configs": [
{
"routing_key": "pager-key-critical",
"description": "Critical alert: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}"
}
]
},
{
"name": "slack-warnings",
"slack_configs": [
{
"api_url": "https://hooks.slack.com/services/warnings",
"channel": "#alerts-warnings",
"title": "Warning Alert",
"text": "{{ range .Alerts }}{{ .Annotations.description }}{{ end }}"
}
]
},
{
"name": "email-info",
"email_configs": [
{
"to": "team-notifications@company.com",
"subject": "Info Alert: {{ .GroupLabels.alertname }}",
"body": "{{ range .Alerts }}{{ .Annotations.description }}{{ end }}"
}
]
}
]
}