add brain

2026-03-12 15:17:52 +07:00
parent fd9f558fa1
commit e7821a7a9d
355 changed files with 93784 additions and 24 deletions
--- a/.brain/.agent/skills/engineering-advanced-skills/observability-designer/expected_outputs/sample_dashboard.json
+++ b/.brain/.agent/skills/engineering-advanced-skills/observability-designer/expected_outputs/sample_dashboard.json
@@ -0,0 +1,811 @@
+{
+  "metadata": {
+    "title": "customer-portal - SRE Dashboard",
+    "service": {
+      "name": "customer-portal",
+      "type": "web",
+      "criticality": "high",
+      "user_facing": true,
+      "description": "Customer-facing web application for account management and billing",
+      "team": "frontend",
+      "environment": "production",
+      "dependencies": [
+        {
+          "name": "user-service",
+          "type": "api",
+          "criticality": "high"
+        },
+        {
+          "name": "billing-service",
+          "type": "api",
+          "criticality": "high"
+        },
+        {
+          "name": "notification-service",
+          "type": "api",
+          "criticality": "medium"
+        },
+        {
+          "name": "cdn",
+          "type": "external",
+          "criticality": "medium"
+        }
+      ],
+      "pages": [
+        {
+          "path": "/dashboard",
+          "sla_load_time_ms": 2000,
+          "expected_concurrent_users": 1000
+        },
+        {
+          "path": "/billing",
+          "sla_load_time_ms": 3000,
+          "expected_concurrent_users": 200
+        },
+        {
+          "path": "/settings",
+          "sla_load_time_ms": 1500,
+          "expected_concurrent_users": 100
+        }
+      ],
+      "business_metrics": {
+        "daily_active_users": {
+          "metric": "count(user_sessions_started_total[1d])",
+          "target": 10000,
+          "unit": "users"
+        },
+        "session_duration": {
+          "metric": "avg(user_session_duration_seconds)",
+          "target": 300,
+          "unit": "seconds"
+        },
+        "bounce_rate": {
+          "metric": "sum(rate(page_views_bounced_total[1h])) / sum(rate(page_views_total[1h]))",
+          "target": 0.3,
+          "unit": "percentage"
+        }
+      },
+      "infrastructure": {
+        "container_orchestrator": "kubernetes",
+        "replicas": 4,
+        "cpu_limit": "1000m",
+        "memory_limit": "2Gi",
+        "storage": {
+          "type": "nfs",
+          "size": "50Gi"
+        },
+        "ingress": {
+          "type": "nginx",
+          "ssl_termination": true,
+          "rate_limiting": {
+            "requests_per_second": 100,
+            "burst": 200
+          }
+        }
+      },
+      "monitoring": {
+        "synthetic_checks": [
+          {
+            "name": "login_flow",
+            "url": "/auth/login",
+            "frequency": "1m",
+            "locations": [
+              "us-east",
+              "eu-west",
+              "ap-south"
+            ]
+          },
+          {
+            "name": "checkout_flow",
+            "url": "/billing/checkout",
+            "frequency": "5m",
+            "locations": [
+              "us-east",
+              "eu-west"
+            ]
+          }
+        ],
+        "rum": {
+          "enabled": true,
+          "sampling_rate": 0.1
+        }
+      },
+      "compliance_requirements": [
+        "GDPR",
+        "CCPA"
+      ],
+      "tags": [
+        "frontend",
+        "customer-facing",
+        "billing",
+        "high-traffic"
+      ]
+    },
+    "target_role": "sre",
+    "generated_at": "2026-02-16T14:02:03.421248Z",
+    "version": "1.0"
+  },
+  "configuration": {
+    "time_ranges": [
+      "1h",
+      "6h",
+      "1d",
+      "7d"
+    ],
+    "default_time_range": "6h",
+    "refresh_interval": "30s",
+    "timezone": "UTC",
+    "theme": "dark"
+  },
+  "layout": {
+    "grid_settings": {
+      "width": 24,
+      "height_unit": "px",
+      "cell_height": 30
+    },
+    "sections": [
+      {
+        "title": "Service Overview",
+        "collapsed": false,
+        "y_position": 0,
+        "panels": [
+          "service_status",
+          "slo_summary",
+          "error_budget"
+        ]
+      },
+      {
+        "title": "Golden Signals",
+        "collapsed": false,
+        "y_position": 8,
+        "panels": [
+          "latency",
+          "traffic",
+          "errors",
+          "saturation"
+        ]
+      },
+      {
+        "title": "Resource Utilization",
+        "collapsed": false,
+        "y_position": 16,
+        "panels": [
+          "cpu_usage",
+          "memory_usage",
+          "network_io",
+          "disk_io"
+        ]
+      },
+      {
+        "title": "Dependencies & Downstream",
+        "collapsed": true,
+        "y_position": 24,
+        "panels": [
+          "dependency_status",
+          "downstream_latency",
+          "circuit_breakers"
+        ]
+      }
+    ]
+  },
+  "panels": [
+    {
+      "id": "service_status",
+      "title": "Service Status",
+      "type": "stat",
+      "grid_pos": {
+        "x": 0,
+        "y": 0,
+        "w": 6,
+        "h": 4
+      },
+      "targets": [
+        {
+          "expr": "up{service=\"customer-portal\"}",
+          "legendFormat": "Status"
+        }
+      ],
+      "field_config": {
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Status"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "mode": "thresholds"
+                }
+              },
+              {
+                "id": "thresholds",
+                "value": {
+                  "steps": [
+                    {
+                      "color": "red",
+                      "value": 0
+                    },
+                    {
+                      "color": "green",
+                      "value": 1
+                    }
+                  ]
+                }
+              },
+              {
+                "id": "mappings",
+                "value": [
+                  {
+                    "options": {
+                      "0": {
+                        "text": "DOWN"
+                      }
+                    },
+                    "type": "value"
+                  },
+                  {
+                    "options": {
+                      "1": {
+                        "text": "UP"
+                      }
+                    },
+                    "type": "value"
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      },
+      "options": {
+        "orientation": "horizontal",
+        "textMode": "value_and_name"
+      }
+    },
+    {
+      "id": "slo_summary",
+      "title": "SLO Achievement (30d)",
+      "type": "stat",
+      "grid_pos": {
+        "x": 6,
+        "y": 0,
+        "w": 9,
+        "h": 4
+      },
+      "targets": [
+        {
+          "expr": "(1 - (increase(http_requests_total{service=\"customer-portal\",code=~\"5..\"}[30d]) / increase(http_requests_total{service=\"customer-portal\"}[30d]))) * 100",
+          "legendFormat": "Availability"
+        },
+        {
+          "expr": "histogram_quantile(0.95, increase(http_request_duration_seconds_bucket{service=\"customer-portal\"}[30d])) * 1000",
+          "legendFormat": "P95 Latency (ms)"
+        }
+      ],
+      "field_config": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "thresholds": {
+            "steps": [
+              {
+                "color": "red",
+                "value": 0
+              },
+              {
+                "color": "yellow",
+                "value": 99.0
+              },
+              {
+                "color": "green",
+                "value": 99.9
+              }
+            ]
+          }
+        }
+      },
+      "options": {
+        "orientation": "horizontal",
+        "textMode": "value_and_name"
+      }
+    },
+    {
+      "id": "error_budget",
+      "title": "Error Budget Remaining",
+      "type": "gauge",
+      "grid_pos": {
+        "x": 15,
+        "y": 0,
+        "w": 9,
+        "h": 4
+      },
+      "targets": [
+        {
+          "expr": "(1 - (increase(http_requests_total{service=\"customer-portal\",code=~\"5..\"}[30d]) / increase(http_requests_total{service=\"customer-portal\"}[30d])) - 0.999) / 0.001 * 100",
+          "legendFormat": "Error Budget %"
+        }
+      ],
+      "field_config": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "min": 0,
+          "max": 100,
+          "thresholds": {
+            "steps": [
+              {
+                "color": "red",
+                "value": 0
+              },
+              {
+                "color": "yellow",
+                "value": 25
+              },
+              {
+                "color": "green",
+                "value": 50
+              }
+            ]
+          },
+          "unit": "percent"
+        }
+      },
+      "options": {
+        "showThresholdLabels": true,
+        "showThresholdMarkers": true
+      }
+    },
+    {
+      "id": "latency",
+      "title": "Request Latency",
+      "type": "timeseries",
+      "grid_pos": {
+        "x": 0,
+        "y": 8,
+        "w": 12,
+        "h": 6
+      },
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket{service=\"customer-portal\"}[5m])) * 1000",
+          "legendFormat": "P50 Latency"
+        },
+        {
+          "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{service=\"customer-portal\"}[5m])) * 1000",
+          "legendFormat": "P95 Latency"
+        },
+        {
+          "expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{service=\"customer-portal\"}[5m])) * 1000",
+          "legendFormat": "P99 Latency"
+        }
+      ],
+      "field_config": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "unit": "ms",
+          "custom": {
+            "drawStyle": "line",
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "fillOpacity": 10
+          }
+        }
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "bottom"
+        }
+      }
+    },
+    {
+      "id": "traffic",
+      "title": "Request Rate",
+      "type": "timeseries",
+      "grid_pos": {
+        "x": 12,
+        "y": 8,
+        "w": 12,
+        "h": 6
+      },
+      "targets": [
+        {
+          "expr": "sum(rate(http_requests_total{service=\"customer-portal\"}[5m]))",
+          "legendFormat": "Total RPS"
+        },
+        {
+          "expr": "sum(rate(http_requests_total{service=\"customer-portal\",code=~\"2..\"}[5m]))",
+          "legendFormat": "2xx RPS"
+        },
+        {
+          "expr": "sum(rate(http_requests_total{service=\"customer-portal\",code=~\"4..\"}[5m]))",
+          "legendFormat": "4xx RPS"
+        },
+        {
+          "expr": "sum(rate(http_requests_total{service=\"customer-portal\",code=~\"5..\"}[5m]))",
+          "legendFormat": "5xx RPS"
+        }
+      ],
+      "field_config": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "unit": "reqps",
+          "custom": {
+            "drawStyle": "line",
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "fillOpacity": 0
+          }
+        }
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "bottom"
+        }
+      }
+    },
+    {
+      "id": "errors",
+      "title": "Error Rate",
+      "type": "timeseries",
+      "grid_pos": {
+        "x": 0,
+        "y": 14,
+        "w": 12,
+        "h": 6
+      },
+      "targets": [
+        {
+          "expr": "sum(rate(http_requests_total{service=\"customer-portal\",code=~\"5..\"}[5m])) / sum(rate(http_requests_total{service=\"customer-portal\"}[5m])) * 100",
+          "legendFormat": "5xx Error Rate"
+        },
+        {
+          "expr": "sum(rate(http_requests_total{service=\"customer-portal\",code=~\"4..\"}[5m])) / sum(rate(http_requests_total{service=\"customer-portal\"}[5m])) * 100",
+          "legendFormat": "4xx Error Rate"
+        }
+      ],
+      "field_config": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "unit": "percent",
+          "custom": {
+            "drawStyle": "line",
+            "lineInterpolation": "linear",
+            "lineWidth": 2,
+            "fillOpacity": 20
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "5xx Error Rate"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "red"
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "bottom"
+        }
+      }
+    },
+    {
+      "id": "saturation",
+      "title": "Saturation Metrics",
+      "type": "timeseries",
+      "grid_pos": {
+        "x": 12,
+        "y": 14,
+        "w": 12,
+        "h": 6
+      },
+      "targets": [
+        {
+          "expr": "rate(process_cpu_seconds_total{service=\"customer-portal\"}[5m]) * 100",
+          "legendFormat": "CPU Usage %"
+        },
+        {
+          "expr": "process_resident_memory_bytes{service=\"customer-portal\"} / process_virtual_memory_max_bytes{service=\"customer-portal\"} * 100",
+          "legendFormat": "Memory Usage %"
+        }
+      ],
+      "field_config": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "unit": "percent",
+          "max": 100,
+          "custom": {
+            "drawStyle": "line",
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "fillOpacity": 10
+          }
+        }
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "bottom"
+        }
+      }
+    },
+    {
+      "id": "cpu_usage",
+      "title": "CPU Usage",
+      "type": "gauge",
+      "grid_pos": {
+        "x": 0,
+        "y": 20,
+        "w": 6,
+        "h": 4
+      },
+      "targets": [
+        {
+          "expr": "rate(process_cpu_seconds_total{service=\"customer-portal\"}[5m]) * 100",
+          "legendFormat": "CPU %"
+        }
+      ],
+      "field_config": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "thresholds": {
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "yellow",
+                "value": 70
+              },
+              {
+                "color": "red",
+                "value": 90
+              }
+            ]
+          }
+        }
+      },
+      "options": {
+        "showThresholdLabels": true,
+        "showThresholdMarkers": true
+      }
+    },
+    {
+      "id": "memory_usage",
+      "title": "Memory Usage",
+      "type": "gauge",
+      "grid_pos": {
+        "x": 6,
+        "y": 20,
+        "w": 6,
+        "h": 4
+      },
+      "targets": [
+        {
+          "expr": "process_resident_memory_bytes{service=\"customer-portal\"} / 1024 / 1024",
+          "legendFormat": "Memory MB"
+        }
+      ],
+      "field_config": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "unit": "decbytes",
+          "thresholds": {
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "yellow",
+                "value": 512000000
+              },
+              {
+                "color": "red",
+                "value": 1024000000
+              }
+            ]
+          }
+        }
+      }
+    },
+    {
+      "id": "network_io",
+      "title": "Network I/O",
+      "type": "timeseries",
+      "grid_pos": {
+        "x": 12,
+        "y": 20,
+        "w": 6,
+        "h": 4
+      },
+      "targets": [
+        {
+          "expr": "rate(process_network_receive_bytes_total{service=\"customer-portal\"}[5m])",
+          "legendFormat": "RX Bytes/s"
+        },
+        {
+          "expr": "rate(process_network_transmit_bytes_total{service=\"customer-portal\"}[5m])",
+          "legendFormat": "TX Bytes/s"
+        }
+      ],
+      "field_config": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "unit": "binBps"
+        }
+      }
+    },
+    {
+      "id": "disk_io",
+      "title": "Disk I/O",
+      "type": "timeseries",
+      "grid_pos": {
+        "x": 18,
+        "y": 20,
+        "w": 6,
+        "h": 4
+      },
+      "targets": [
+        {
+          "expr": "rate(process_disk_read_bytes_total{service=\"customer-portal\"}[5m])",
+          "legendFormat": "Read Bytes/s"
+        },
+        {
+          "expr": "rate(process_disk_write_bytes_total{service=\"customer-portal\"}[5m])",
+          "legendFormat": "Write Bytes/s"
+        }
+      ],
+      "field_config": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "unit": "binBps"
+        }
+      }
+    }
+  ],
+  "variables": [
+    {
+      "name": "environment",
+      "type": "query",
+      "query": "label_values(environment)",
+      "current": {
+        "text": "production",
+        "value": "production"
+      },
+      "includeAll": false,
+      "multi": false,
+      "refresh": "on_dashboard_load"
+    },
+    {
+      "name": "instance",
+      "type": "query",
+      "query": "label_values(up{service=\"customer-portal\"}, instance)",
+      "current": {
+        "text": "All",
+        "value": "$__all"
+      },
+      "includeAll": true,
+      "multi": true,
+      "refresh": "on_time_range_change"
+    },
+    {
+      "name": "handler",
+      "type": "query",
+      "query": "label_values(http_requests_total{service=\"customer-portal\"}, handler)",
+      "current": {
+        "text": "All",
+        "value": "$__all"
+      },
+      "includeAll": true,
+      "multi": true,
+      "refresh": "on_time_range_change"
+    }
+  ],
+  "alerts_integration": {
+    "alert_annotations": true,
+    "alert_rules_query": "ALERTS{service=\"customer-portal\"}",
+    "alert_panels": [
+      {
+        "title": "Active Alerts",
+        "type": "table",
+        "query": "ALERTS{service=\"customer-portal\",alertstate=\"firing\"}",
+        "columns": [
+          "alertname",
+          "severity",
+          "instance",
+          "description"
+        ]
+      }
+    ]
+  },
+  "drill_down_paths": {
+    "service_overview": {
+      "from": "service_status",
+      "to": "detailed_health_dashboard",
+      "url": "/d/service-health/customer-portal-health",
+      "params": [
+        "var-service",
+        "var-environment"
+      ]
+    },
+    "error_investigation": {
+      "from": "errors",
+      "to": "error_details_dashboard",
+      "url": "/d/errors/customer-portal-errors",
+      "params": [
+        "var-service",
+        "var-time_range"
+      ]
+    },
+    "latency_analysis": {
+      "from": "latency",
+      "to": "trace_analysis_dashboard",
+      "url": "/d/traces/customer-portal-traces",
+      "params": [
+        "var-service",
+        "var-handler"
+      ]
+    },
+    "capacity_planning": {
+      "from": "saturation",
+      "to": "capacity_dashboard",
+      "url": "/d/capacity/customer-portal-capacity",
+      "params": [
+        "var-service",
+        "var-time_range"
+      ]
+    }
+  }
+}
--- a/.brain/.agent/skills/engineering-advanced-skills/observability-designer/expected_outputs/sample_slo_framework.json
+++ b/.brain/.agent/skills/engineering-advanced-skills/observability-designer/expected_outputs/sample_slo_framework.json
@@ -0,0 +1,545 @@
+{
+  "metadata": {
+    "service": {
+      "name": "payment-service",
+      "type": "api",
+      "criticality": "critical",
+      "user_facing": true,
+      "description": "Handles payment processing and transaction management",
+      "team": "payments",
+      "environment": "production",
+      "dependencies": [
+        {
+          "name": "user-service",
+          "type": "api",
+          "criticality": "high"
+        },
+        {
+          "name": "payment-gateway",
+          "type": "external",
+          "criticality": "critical"
+        },
+        {
+          "name": "fraud-detection",
+          "type": "ml",
+          "criticality": "high"
+        }
+      ],
+      "endpoints": [
+        {
+          "path": "/api/v1/payments",
+          "method": "POST",
+          "sla_latency_ms": 500,
+          "expected_tps": 100
+        },
+        {
+          "path": "/api/v1/payments/{id}",
+          "method": "GET",
+          "sla_latency_ms": 200,
+          "expected_tps": 500
+        },
+        {
+          "path": "/api/v1/payments/{id}/refund",
+          "method": "POST",
+          "sla_latency_ms": 1000,
+          "expected_tps": 10
+        }
+      ],
+      "business_metrics": {
+        "revenue_per_hour": {
+          "metric": "sum(payment_amount * rate(payments_successful_total[1h]))",
+          "target": 50000,
+          "unit": "USD"
+        },
+        "conversion_rate": {
+          "metric": "sum(rate(payments_successful_total[5m])) / sum(rate(payment_attempts_total[5m]))",
+          "target": 0.95,
+          "unit": "percentage"
+        }
+      },
+      "infrastructure": {
+        "container_orchestrator": "kubernetes",
+        "replicas": 6,
+        "cpu_limit": "2000m",
+        "memory_limit": "4Gi",
+        "database": {
+          "type": "postgresql",
+          "connection_pool_size": 20
+        },
+        "cache": {
+          "type": "redis",
+          "cluster_size": 3
+        }
+      },
+      "compliance_requirements": [
+        "PCI-DSS",
+        "SOX",
+        "GDPR"
+      ],
+      "tags": [
+        "payment",
+        "transaction",
+        "critical-path",
+        "revenue-generating"
+      ]
+    },
+    "generated_at": "2026-02-16T14:01:57.572080Z",
+    "framework_version": "1.0"
+  },
+  "slis": [
+    {
+      "name": "Availability",
+      "description": "Percentage of successful requests",
+      "type": "ratio",
+      "good_events": "sum(rate(http_requests_total{service=\"payment-service\",code!~\"5..\"}))",
+      "total_events": "sum(rate(http_requests_total{service=\"payment-service\"}))",
+      "unit": "percentage"
+    },
+    {
+      "name": "Request Latency P95",
+      "description": "95th percentile of request latency",
+      "type": "threshold",
+      "query": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{service=\"payment-service\"}[5m]))",
+      "unit": "seconds"
+    },
+    {
+      "name": "Error Rate",
+      "description": "Rate of 5xx errors",
+      "type": "ratio",
+      "good_events": "sum(rate(http_requests_total{service=\"payment-service\",code!~\"5..\"}))",
+      "total_events": "sum(rate(http_requests_total{service=\"payment-service\"}))",
+      "unit": "percentage"
+    },
+    {
+      "name": "Request Throughput",
+      "description": "Requests per second",
+      "type": "gauge",
+      "query": "sum(rate(http_requests_total{service=\"payment-service\"}[5m]))",
+      "unit": "requests/sec"
+    },
+    {
+      "name": "User Journey Success Rate",
+      "description": "Percentage of successful complete user journeys",
+      "type": "ratio",
+      "good_events": "sum(rate(user_journey_total{service=\"payment-service\",status=\"success\"}[5m]))",
+      "total_events": "sum(rate(user_journey_total{service=\"payment-service\"}[5m]))",
+      "unit": "percentage"
+    },
+    {
+      "name": "Feature Availability",
+      "description": "Percentage of time key features are available",
+      "type": "ratio",
+      "good_events": "sum(rate(feature_checks_total{service=\"payment-service\",status=\"available\"}[5m]))",
+      "total_events": "sum(rate(feature_checks_total{service=\"payment-service\"}[5m]))",
+      "unit": "percentage"
+    }
+  ],
+  "slos": [
+    {
+      "name": "Availability SLO",
+      "description": "Service level objective for percentage of successful requests",
+      "sli_name": "Availability",
+      "target_value": 0.9999,
+      "target_display": "99.99%",
+      "operator": ">=",
+      "time_windows": [
+        "1h",
+        "1d",
+        "7d",
+        "30d"
+      ],
+      "measurement_window": "30d",
+      "service": "payment-service",
+      "criticality": "critical"
+    },
+    {
+      "name": "Request Latency P95 SLO",
+      "description": "Service level objective for 95th percentile of request latency",
+      "sli_name": "Request Latency P95",
+      "target_value": 100,
+      "target_display": "0.1s",
+      "operator": "<=",
+      "time_windows": [
+        "1h",
+        "1d",
+        "7d",
+        "30d"
+      ],
+      "measurement_window": "30d",
+      "service": "payment-service",
+      "criticality": "critical"
+    },
+    {
+      "name": "Error Rate SLO",
+      "description": "Service level objective for rate of 5xx errors",
+      "sli_name": "Error Rate",
+      "target_value": 0.001,
+      "target_display": "0.1%",
+      "operator": "<=",
+      "time_windows": [
+        "1h",
+        "1d",
+        "7d",
+        "30d"
+      ],
+      "measurement_window": "30d",
+      "service": "payment-service",
+      "criticality": "critical"
+    },
+    {
+      "name": "User Journey Success Rate SLO",
+      "description": "Service level objective for percentage of successful complete user journeys",
+      "sli_name": "User Journey Success Rate",
+      "target_value": 0.9999,
+      "target_display": "99.99%",
+      "operator": ">=",
+      "time_windows": [
+        "1h",
+        "1d",
+        "7d",
+        "30d"
+      ],
+      "measurement_window": "30d",
+      "service": "payment-service",
+      "criticality": "critical"
+    },
+    {
+      "name": "Feature Availability SLO",
+      "description": "Service level objective for percentage of time key features are available",
+      "sli_name": "Feature Availability",
+      "target_value": 0.9999,
+      "target_display": "99.99%",
+      "operator": ">=",
+      "time_windows": [
+        "1h",
+        "1d",
+        "7d",
+        "30d"
+      ],
+      "measurement_window": "30d",
+      "service": "payment-service",
+      "criticality": "critical"
+    }
+  ],
+  "error_budgets": [
+    {
+      "slo_name": "Availability SLO",
+      "error_budget_rate": 9.999999999998899e-05,
+      "error_budget_percentage": "0.010%",
+      "budgets_by_window": {
+        "1h": "0.4 seconds",
+        "1d": "8.6 seconds",
+        "7d": "1.0 minutes",
+        "30d": "4.3 minutes"
+      },
+      "burn_rate_alerts": [
+        {
+          "name": "Availability Burn Rate 2% Alert",
+          "description": "Alert when Availability is consuming error budget at 14.4x rate",
+          "severity": "critical",
+          "short_window": "5m",
+          "long_window": "1h",
+          "burn_rate_threshold": 14.4,
+          "budget_consumed": "2%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 14.4) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 14.4)",
+          "annotations": {
+            "summary": "High burn rate detected for Availability",
+            "description": "Error budget consumption rate is 14.4x normal, will exhaust 2% of monthly budget"
+          }
+        },
+        {
+          "name": "Availability Burn Rate 5% Alert",
+          "description": "Alert when Availability is consuming error budget at 6x rate",
+          "severity": "warning",
+          "short_window": "30m",
+          "long_window": "6h",
+          "burn_rate_threshold": 6,
+          "budget_consumed": "5%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 6) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 6)",
+          "annotations": {
+            "summary": "High burn rate detected for Availability",
+            "description": "Error budget consumption rate is 6x normal, will exhaust 5% of monthly budget"
+          }
+        },
+        {
+          "name": "Availability Burn Rate 10% Alert",
+          "description": "Alert when Availability is consuming error budget at 3x rate",
+          "severity": "info",
+          "short_window": "2h",
+          "long_window": "1d",
+          "burn_rate_threshold": 3,
+          "budget_consumed": "10%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 3) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 3)",
+          "annotations": {
+            "summary": "High burn rate detected for Availability",
+            "description": "Error budget consumption rate is 3x normal, will exhaust 10% of monthly budget"
+          }
+        },
+        {
+          "name": "Availability Burn Rate 10% Alert",
+          "description": "Alert when Availability is consuming error budget at 1x rate",
+          "severity": "info",
+          "short_window": "6h",
+          "long_window": "3d",
+          "burn_rate_threshold": 1,
+          "budget_consumed": "10%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 1) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 1)",
+          "annotations": {
+            "summary": "High burn rate detected for Availability",
+            "description": "Error budget consumption rate is 1x normal, will exhaust 10% of monthly budget"
+          }
+        }
+      ]
+    },
+    {
+      "slo_name": "User Journey Success Rate SLO",
+      "error_budget_rate": 9.999999999998899e-05,
+      "error_budget_percentage": "0.010%",
+      "budgets_by_window": {
+        "1h": "0.4 seconds",
+        "1d": "8.6 seconds",
+        "7d": "1.0 minutes",
+        "30d": "4.3 minutes"
+      },
+      "burn_rate_alerts": [
+        {
+          "name": "User Journey Success Rate Burn Rate 2% Alert",
+          "description": "Alert when User Journey Success Rate is consuming error budget at 14.4x rate",
+          "severity": "critical",
+          "short_window": "5m",
+          "long_window": "1h",
+          "burn_rate_threshold": 14.4,
+          "budget_consumed": "2%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 14.4) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 14.4)",
+          "annotations": {
+            "summary": "High burn rate detected for User Journey Success Rate",
+            "description": "Error budget consumption rate is 14.4x normal, will exhaust 2% of monthly budget"
+          }
+        },
+        {
+          "name": "User Journey Success Rate Burn Rate 5% Alert",
+          "description": "Alert when User Journey Success Rate is consuming error budget at 6x rate",
+          "severity": "warning",
+          "short_window": "30m",
+          "long_window": "6h",
+          "burn_rate_threshold": 6,
+          "budget_consumed": "5%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 6) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 6)",
+          "annotations": {
+            "summary": "High burn rate detected for User Journey Success Rate",
+            "description": "Error budget consumption rate is 6x normal, will exhaust 5% of monthly budget"
+          }
+        },
+        {
+          "name": "User Journey Success Rate Burn Rate 10% Alert",
+          "description": "Alert when User Journey Success Rate is consuming error budget at 3x rate",
+          "severity": "info",
+          "short_window": "2h",
+          "long_window": "1d",
+          "burn_rate_threshold": 3,
+          "budget_consumed": "10%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 3) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 3)",
+          "annotations": {
+            "summary": "High burn rate detected for User Journey Success Rate",
+            "description": "Error budget consumption rate is 3x normal, will exhaust 10% of monthly budget"
+          }
+        },
+        {
+          "name": "User Journey Success Rate Burn Rate 10% Alert",
+          "description": "Alert when User Journey Success Rate is consuming error budget at 1x rate",
+          "severity": "info",
+          "short_window": "6h",
+          "long_window": "3d",
+          "burn_rate_threshold": 1,
+          "budget_consumed": "10%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 1) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 1)",
+          "annotations": {
+            "summary": "High burn rate detected for User Journey Success Rate",
+            "description": "Error budget consumption rate is 1x normal, will exhaust 10% of monthly budget"
+          }
+        }
+      ]
+    },
+    {
+      "slo_name": "Feature Availability SLO",
+      "error_budget_rate": 9.999999999998899e-05,
+      "error_budget_percentage": "0.010%",
+      "budgets_by_window": {
+        "1h": "0.4 seconds",
+        "1d": "8.6 seconds",
+        "7d": "1.0 minutes",
+        "30d": "4.3 minutes"
+      },
+      "burn_rate_alerts": [
+        {
+          "name": "Feature Availability Burn Rate 2% Alert",
+          "description": "Alert when Feature Availability is consuming error budget at 14.4x rate",
+          "severity": "critical",
+          "short_window": "5m",
+          "long_window": "1h",
+          "burn_rate_threshold": 14.4,
+          "budget_consumed": "2%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 14.4) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 14.4)",
+          "annotations": {
+            "summary": "High burn rate detected for Feature Availability",
+            "description": "Error budget consumption rate is 14.4x normal, will exhaust 2% of monthly budget"
+          }
+        },
+        {
+          "name": "Feature Availability Burn Rate 5% Alert",
+          "description": "Alert when Feature Availability is consuming error budget at 6x rate",
+          "severity": "warning",
+          "short_window": "30m",
+          "long_window": "6h",
+          "burn_rate_threshold": 6,
+          "budget_consumed": "5%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 6) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 6)",
+          "annotations": {
+            "summary": "High burn rate detected for Feature Availability",
+            "description": "Error budget consumption rate is 6x normal, will exhaust 5% of monthly budget"
+          }
+        },
+        {
+          "name": "Feature Availability Burn Rate 10% Alert",
+          "description": "Alert when Feature Availability is consuming error budget at 3x rate",
+          "severity": "info",
+          "short_window": "2h",
+          "long_window": "1d",
+          "burn_rate_threshold": 3,
+          "budget_consumed": "10%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 3) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 3)",
+          "annotations": {
+            "summary": "High burn rate detected for Feature Availability",
+            "description": "Error budget consumption rate is 3x normal, will exhaust 10% of monthly budget"
+          }
+        },
+        {
+          "name": "Feature Availability Burn Rate 10% Alert",
+          "description": "Alert when Feature Availability is consuming error budget at 1x rate",
+          "severity": "info",
+          "short_window": "6h",
+          "long_window": "3d",
+          "burn_rate_threshold": 1,
+          "budget_consumed": "10%",
+          "condition": "((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_short > 1) and ((1 - (sum(rate(http_requests_total{service='payment-service',code!~'5..'})) / sum(rate(http_requests_total{service='payment-service'}))))_long > 1)",
+          "annotations": {
+            "summary": "High burn rate detected for Feature Availability",
+            "description": "Error budget consumption rate is 1x normal, will exhaust 10% of monthly budget"
+          }
+        }
+      ]
+    }
+  ],
+  "sla_recommendations": {
+    "applicable": true,
+    "service": "payment-service",
+    "commitments": [
+      {
+        "metric": "Availability",
+        "target": 0.9989,
+        "target_display": "99.89%",
+        "measurement_window": "monthly",
+        "measurement_method": "Uptime monitoring with 1-minute granularity"
+      },
+      {
+        "metric": "Feature Availability",
+        "target": 0.9989,
+        "target_display": "99.89%",
+        "measurement_window": "monthly",
+        "measurement_method": "Uptime monitoring with 1-minute granularity"
+      }
+    ],
+    "penalties": [
+      {
+        "breach_threshold": "< 99.99%",
+        "credit_percentage": 10
+      },
+      {
+        "breach_threshold": "< 99.9%",
+        "credit_percentage": 25
+      },
+      {
+        "breach_threshold": "< 99%",
+        "credit_percentage": 50
+      }
+    ],
+    "measurement_methodology": "External synthetic monitoring from multiple geographic locations",
+    "exclusions": [
+      "Planned maintenance windows (with 72h advance notice)",
+      "Customer-side network or infrastructure issues",
+      "Force majeure events",
+      "Third-party service dependencies beyond our control"
+    ]
+  },
+  "monitoring_recommendations": {
+    "metrics": {
+      "collection": "Prometheus with service discovery",
+      "retention": "90 days for raw metrics, 1 year for aggregated",
+      "alerting": "Prometheus Alertmanager with multi-window burn rate alerts"
+    },
+    "logging": {
+      "format": "Structured JSON logs with correlation IDs",
+      "aggregation": "ELK stack or equivalent with proper indexing",
+      "retention": "30 days for debug logs, 90 days for error logs"
+    },
+    "tracing": {
+      "sampling": "Adaptive sampling with 1% base rate",
+      "storage": "Jaeger or Zipkin with 7-day retention",
+      "integration": "OpenTelemetry instrumentation"
+    }
+  },
+  "implementation_guide": {
+    "prerequisites": [
+      "Service instrumented with metrics collection (Prometheus format)",
+      "Structured logging with correlation IDs",
+      "Monitoring infrastructure (Prometheus, Grafana, Alertmanager)",
+      "Incident response processes and escalation policies"
+    ],
+    "implementation_steps": [
+      {
+        "step": 1,
+        "title": "Instrument Service",
+        "description": "Add metrics collection for all defined SLIs",
+        "estimated_effort": "1-2 days"
+      },
+      {
+        "step": 2,
+        "title": "Configure Recording Rules",
+        "description": "Set up Prometheus recording rules for SLI calculations",
+        "estimated_effort": "4-8 hours"
+      },
+      {
+        "step": 3,
+        "title": "Implement Burn Rate Alerts",
+        "description": "Configure multi-window burn rate alerting rules",
+        "estimated_effort": "1 day"
+      },
+      {
+        "step": 4,
+        "title": "Create SLO Dashboard",
+        "description": "Build Grafana dashboard for SLO tracking and error budget monitoring",
+        "estimated_effort": "4-6 hours"
+      },
+      {
+        "step": 5,
+        "title": "Test and Validate",
+        "description": "Test alerting and validate SLI measurements against expectations",
+        "estimated_effort": "1-2 days"
+      },
+      {
+        "step": 6,
+        "title": "Documentation and Training",
+        "description": "Document runbooks and train team on SLO monitoring",
+        "estimated_effort": "1 day"
+      }
+    ],
+    "validation_checklist": [
+      "All SLIs produce expected metric values",
+      "Burn rate alerts fire correctly during simulated outages",
+      "Error budget calculations match manual verification",
+      "Dashboard displays accurate SLO achievement rates",
+      "Alert routing reaches correct escalation paths",
+      "Runbooks are complete and tested"
+    ]
+  }
+}