add brain

This commit is contained in:
2026-03-12 15:17:52 +07:00
parent fd9f558fa1
commit e7821a7a9d
355 changed files with 93784 additions and 24 deletions

View File

@@ -0,0 +1,276 @@
{
"alerts": [
{
"alert": "HighLatency",
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{service=\"payment-service\"}[5m])) > 0.5",
"for": "5m",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "High request latency detected",
"description": "95th percentile latency is {{ $value }}s for payment-service",
"runbook_url": "https://runbooks.company.com/high-latency"
},
"historical_data": {
"fires_per_day": 2.5,
"false_positive_rate": 0.15,
"average_duration_minutes": 12
}
},
{
"alert": "ServiceDown",
"expr": "up{service=\"payment-service\"} == 0",
"labels": {
"severity": "critical",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "Payment service is down",
"description": "Payment service has been down for more than 1 minute",
"runbook_url": "https://runbooks.company.com/service-down"
},
"historical_data": {
"fires_per_day": 0.1,
"false_positive_rate": 0.05,
"average_duration_minutes": 3
}
},
{
"alert": "HighErrorRate",
"expr": "sum(rate(http_requests_total{service=\"payment-service\",code=~\"5..\"}[5m])) / sum(rate(http_requests_total{service=\"payment-service\"}[5m])) > 0.01",
"for": "2m",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "High error rate detected",
"description": "Error rate is {{ $value | humanizePercentage }} for payment-service",
"runbook_url": "https://runbooks.company.com/high-error-rate"
},
"historical_data": {
"fires_per_day": 1.8,
"false_positive_rate": 0.25,
"average_duration_minutes": 8
}
},
{
"alert": "HighCPUUsage",
"expr": "rate(process_cpu_seconds_total{service=\"payment-service\"}[5m]) * 100 > 80",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "High CPU usage",
"description": "CPU usage is {{ $value }}% for payment-service"
},
"historical_data": {
"fires_per_day": 15.2,
"false_positive_rate": 0.8,
"average_duration_minutes": 45
}
},
{
"alert": "HighMemoryUsage",
"expr": "process_resident_memory_bytes{service=\"payment-service\"} / process_virtual_memory_max_bytes{service=\"payment-service\"} * 100 > 85",
"labels": {
"severity": "info",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "High memory usage",
"description": "Memory usage is {{ $value }}% for payment-service"
},
"historical_data": {
"fires_per_day": 8.5,
"false_positive_rate": 0.6,
"average_duration_minutes": 30
}
},
{
"alert": "DatabaseConnectionPoolExhaustion",
"expr": "db_connections_active{service=\"payment-service\"} / db_connections_max{service=\"payment-service\"} > 0.9",
"for": "1m",
"labels": {
"severity": "critical",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "Database connection pool near exhaustion",
"description": "Connection pool utilization is {{ $value | humanizePercentage }}",
"runbook_url": "https://runbooks.company.com/db-connections"
},
"historical_data": {
"fires_per_day": 0.3,
"false_positive_rate": 0.1,
"average_duration_minutes": 5
}
},
{
"alert": "LowTraffic",
"expr": "sum(rate(http_requests_total{service=\"payment-service\"}[5m])) < 10",
"for": "10m",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "Unusually low traffic",
"description": "Request rate is {{ $value }} RPS, which is unusually low"
},
"historical_data": {
"fires_per_day": 12.0,
"false_positive_rate": 0.9,
"average_duration_minutes": 120
}
},
{
"alert": "HighLatencyDuplicate",
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{service=\"payment-service\"}[5m])) > 0.5",
"for": "5m",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "High request latency detected (duplicate)",
"description": "95th percentile latency is {{ $value }}s for payment-service"
},
"historical_data": {
"fires_per_day": 2.5,
"false_positive_rate": 0.15,
"average_duration_minutes": 12
}
},
{
"alert": "VeryLowErrorRate",
"expr": "sum(rate(http_requests_total{service=\"payment-service\",code=~\"5..\"}[5m])) / sum(rate(http_requests_total{service=\"payment-service\"}[5m])) > 0.001",
"labels": {
"severity": "info",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "Error rate above 0.1%",
"description": "Error rate is {{ $value | humanizePercentage }}"
},
"historical_data": {
"fires_per_day": 25.0,
"false_positive_rate": 0.95,
"average_duration_minutes": 5
}
},
{
"alert": "DiskUsageHigh",
"expr": "disk_usage_percent{service=\"payment-service\"} > 85",
"labels": {
"severity": "warning",
"service": "payment-service",
"team": "payments"
},
"annotations": {
"summary": "Disk usage high",
"description": "Disk usage is {{ $value }}%"
},
"historical_data": {
"fires_per_day": 3.2,
"false_positive_rate": 0.4,
"average_duration_minutes": 240
}
}
],
"services": [
{
"name": "payment-service",
"type": "api",
"criticality": "critical",
"team": "payments"
},
{
"name": "user-service",
"type": "api",
"criticality": "high",
"team": "identity"
},
{
"name": "notification-service",
"type": "api",
"criticality": "medium",
"team": "communications"
}
],
"alert_routing": {
"routes": [
{
"match": {
"severity": "critical"
},
"receiver": "pager-critical",
"group_wait": "10s",
"group_interval": "1m",
"repeat_interval": "5m"
},
{
"match": {
"severity": "warning"
},
"receiver": "slack-warnings",
"group_wait": "30s",
"group_interval": "5m",
"repeat_interval": "1h"
},
{
"match": {
"severity": "info"
},
"receiver": "email-info",
"group_wait": "2m",
"group_interval": "10m",
"repeat_interval": "24h"
}
]
},
"receivers": [
{
"name": "pager-critical",
"pagerduty_configs": [
{
"routing_key": "pager-key-critical",
"description": "Critical alert: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}"
}
]
},
{
"name": "slack-warnings",
"slack_configs": [
{
"api_url": "https://hooks.slack.com/services/warnings",
"channel": "#alerts-warnings",
"title": "Warning Alert",
"text": "{{ range .Alerts }}{{ .Annotations.description }}{{ end }}"
}
]
},
{
"name": "email-info",
"email_configs": [
{
"to": "team-notifications@company.com",
"subject": "Info Alert: {{ .GroupLabels.alertname }}",
"body": "{{ range .Alerts }}{{ .Annotations.description }}{{ end }}"
}
]
}
]
}

View File

@@ -0,0 +1,83 @@
{
"name": "payment-service",
"type": "api",
"criticality": "critical",
"user_facing": true,
"description": "Handles payment processing and transaction management",
"team": "payments",
"environment": "production",
"dependencies": [
{
"name": "user-service",
"type": "api",
"criticality": "high"
},
{
"name": "payment-gateway",
"type": "external",
"criticality": "critical"
},
{
"name": "fraud-detection",
"type": "ml",
"criticality": "high"
}
],
"endpoints": [
{
"path": "/api/v1/payments",
"method": "POST",
"sla_latency_ms": 500,
"expected_tps": 100
},
{
"path": "/api/v1/payments/{id}",
"method": "GET",
"sla_latency_ms": 200,
"expected_tps": 500
},
{
"path": "/api/v1/payments/{id}/refund",
"method": "POST",
"sla_latency_ms": 1000,
"expected_tps": 10
}
],
"business_metrics": {
"revenue_per_hour": {
"metric": "sum(payment_amount * rate(payments_successful_total[1h]))",
"target": 50000,
"unit": "USD"
},
"conversion_rate": {
"metric": "sum(rate(payments_successful_total[5m])) / sum(rate(payment_attempts_total[5m]))",
"target": 0.95,
"unit": "percentage"
}
},
"infrastructure": {
"container_orchestrator": "kubernetes",
"replicas": 6,
"cpu_limit": "2000m",
"memory_limit": "4Gi",
"database": {
"type": "postgresql",
"connection_pool_size": 20
},
"cache": {
"type": "redis",
"cluster_size": 3
}
},
"compliance_requirements": [
"PCI-DSS",
"SOX",
"GDPR"
],
"tags": [
"payment",
"transaction",
"critical-path",
"revenue-generating"
]
}

View File

@@ -0,0 +1,113 @@
{
"name": "customer-portal",
"type": "web",
"criticality": "high",
"user_facing": true,
"description": "Customer-facing web application for account management and billing",
"team": "frontend",
"environment": "production",
"dependencies": [
{
"name": "user-service",
"type": "api",
"criticality": "high"
},
{
"name": "billing-service",
"type": "api",
"criticality": "high"
},
{
"name": "notification-service",
"type": "api",
"criticality": "medium"
},
{
"name": "cdn",
"type": "external",
"criticality": "medium"
}
],
"pages": [
{
"path": "/dashboard",
"sla_load_time_ms": 2000,
"expected_concurrent_users": 1000
},
{
"path": "/billing",
"sla_load_time_ms": 3000,
"expected_concurrent_users": 200
},
{
"path": "/settings",
"sla_load_time_ms": 1500,
"expected_concurrent_users": 100
}
],
"business_metrics": {
"daily_active_users": {
"metric": "count(user_sessions_started_total[1d])",
"target": 10000,
"unit": "users"
},
"session_duration": {
"metric": "avg(user_session_duration_seconds)",
"target": 300,
"unit": "seconds"
},
"bounce_rate": {
"metric": "sum(rate(page_views_bounced_total[1h])) / sum(rate(page_views_total[1h]))",
"target": 0.3,
"unit": "percentage"
}
},
"infrastructure": {
"container_orchestrator": "kubernetes",
"replicas": 4,
"cpu_limit": "1000m",
"memory_limit": "2Gi",
"storage": {
"type": "nfs",
"size": "50Gi"
},
"ingress": {
"type": "nginx",
"ssl_termination": true,
"rate_limiting": {
"requests_per_second": 100,
"burst": 200
}
}
},
"monitoring": {
"synthetic_checks": [
{
"name": "login_flow",
"url": "/auth/login",
"frequency": "1m",
"locations": ["us-east", "eu-west", "ap-south"]
},
{
"name": "checkout_flow",
"url": "/billing/checkout",
"frequency": "5m",
"locations": ["us-east", "eu-west"]
}
],
"rum": {
"enabled": true,
"sampling_rate": 0.1
}
},
"compliance_requirements": [
"GDPR",
"CCPA"
],
"tags": [
"frontend",
"customer-facing",
"billing",
"high-traffic"
]
}