CleanArchitecture-template/.brain/.agent/skills/engineering-advanced-skills/observability-designer/scripts/dashboard_generator.py

#!/usr/bin/env python3
"""
Dashboard Generator - Generate comprehensive dashboard specifications

This script generates dashboard specifications based on service/system descriptions:
- Panel layout optimized for different screen sizes and roles
- Metric queries (Prometheus-style) for comprehensive monitoring
- Visualization types appropriate for different metric types
- Drill-down paths for effective troubleshooting workflows
- Golden signals coverage (latency, traffic, errors, saturation)
- RED/USE method implementation
- Business metrics integration

Usage:
    python dashboard_generator.py --input service_definition.json --output dashboard_spec.json
    python dashboard_generator.py --service-type api --name "Payment Service" --output payment_dashboard.json
"""

import json
import argparse
import sys
import math
from typing import Dict, List, Any, Tuple
from datetime import datetime, timedelta


class DashboardGenerator:
    """Generate comprehensive dashboard specifications."""

    # Dashboard layout templates by role
    ROLE_LAYOUTS = {
        'sre': {
            'primary_focus': ['availability', 'latency', 'errors', 'resource_utilization'],
            'secondary_focus': ['throughput', 'capacity', 'dependencies'],
            'time_ranges': ['1h', '6h', '1d', '7d'],
            'default_refresh': '30s'
        },
        'developer': {
            'primary_focus': ['latency', 'errors', 'throughput', 'business_metrics'],
            'secondary_focus': ['resource_utilization', 'dependencies'],
            'time_ranges': ['15m', '1h', '6h', '1d'],
            'default_refresh': '1m'
        },
        'executive': {
            'primary_focus': ['availability', 'business_metrics', 'user_experience'],
            'secondary_focus': ['cost', 'capacity_trends'],
            'time_ranges': ['1d', '7d', '30d'],
            'default_refresh': '5m'
        },
        'ops': {
            'primary_focus': ['resource_utilization', 'capacity', 'alerts', 'deployments'],
            'secondary_focus': ['throughput', 'latency'],
            'time_ranges': ['5m', '30m', '2h', '1d'],
            'default_refresh': '15s'
        }
    }

    # Service type specific metric configurations
    SERVICE_METRICS = {
        'api': {
            'golden_signals': ['latency', 'traffic', 'errors', 'saturation'],
            'key_metrics': [
                'http_requests_total',
                'http_request_duration_seconds',
                'http_request_size_bytes',
                'http_response_size_bytes'
            ],
            'resource_metrics': ['cpu_usage', 'memory_usage', 'goroutines']
        },
        'web': {
            'golden_signals': ['latency', 'traffic', 'errors', 'saturation'],
            'key_metrics': [
                'http_requests_total',
                'http_request_duration_seconds',
                'page_load_time',
                'user_sessions'
            ],
            'resource_metrics': ['cpu_usage', 'memory_usage', 'connections']
        },
        'database': {
            'golden_signals': ['latency', 'traffic', 'errors', 'saturation'],
            'key_metrics': [
                'db_connections_active',
                'db_query_duration_seconds',
                'db_queries_total',
                'db_slow_queries_total'
            ],
            'resource_metrics': ['cpu_usage', 'memory_usage', 'disk_io', 'connections']
        },
        'queue': {
            'golden_signals': ['latency', 'traffic', 'errors', 'saturation'],
            'key_metrics': [
                'queue_depth',
                'message_processing_duration',
                'messages_published_total',
                'messages_consumed_total'
            ],
            'resource_metrics': ['cpu_usage', 'memory_usage', 'disk_usage']
        }
    }

    # Visualization type recommendations
    VISUALIZATION_TYPES = {
        'latency': 'line_chart',
        'throughput': 'line_chart',
        'error_rate': 'line_chart',
        'success_rate': 'stat',
        'resource_utilization': 'gauge',
        'queue_depth': 'bar_chart',
        'status': 'stat',
        'distribution': 'heatmap',
        'alerts': 'table',
        'logs': 'logs_panel'
    }

    def __init__(self):
        """Initialize the Dashboard Generator."""
        self.service_config = {}
        self.dashboard_spec = {}

    def load_service_definition(self, file_path: str) -> Dict[str, Any]:
        """Load service definition from JSON file."""
        try:
            with open(file_path, 'r') as f:
                return json.load(f)
        except FileNotFoundError:
            raise ValueError(f"Service definition file not found: {file_path}")
        except json.JSONDecodeError as e:
            raise ValueError(f"Invalid JSON in service definition: {e}")

    def create_service_definition(self, service_type: str, name: str,
                                criticality: str = 'medium') -> Dict[str, Any]:
        """Create a service definition from parameters."""
        return {
            'name': name,
            'type': service_type,
            'criticality': criticality,
            'description': f'{name} - A {criticality} criticality {service_type} service',
            'team': 'platform',
            'environment': 'production',
            'dependencies': [],
            'tags': []
        }

    def generate_dashboard_specification(self, service_def: Dict[str, Any],
                                       target_role: str = 'sre') -> Dict[str, Any]:
        """Generate comprehensive dashboard specification."""
        service_name = service_def.get('name', 'Service')
        service_type = service_def.get('type', 'api')

        # Get role-specific configuration
        role_config = self.ROLE_LAYOUTS.get(target_role, self.ROLE_LAYOUTS['sre'])

        dashboard_spec = {
            'metadata': {
                'title': f"{service_name} - {target_role.upper()} Dashboard",
                'service': service_def,
                'target_role': target_role,
                'generated_at': datetime.utcnow().isoformat() + 'Z',
                'version': '1.0'
            },
            'configuration': {
                'time_ranges': role_config['time_ranges'],
                'default_time_range': role_config['time_ranges'][1],  # Second option as default
                'refresh_interval': role_config['default_refresh'],
                'timezone': 'UTC',
                'theme': 'dark'
            },
            'layout': self._generate_dashboard_layout(service_def, role_config),
            'panels': self._generate_panels(service_def, role_config),
            'variables': self._generate_template_variables(service_def),
            'alerts_integration': self._generate_alerts_integration(service_def),
            'drill_down_paths': self._generate_drill_down_paths(service_def)
        }

        return dashboard_spec

    def _generate_dashboard_layout(self, service_def: Dict[str, Any],
                                 role_config: Dict[str, Any]) -> Dict[str, Any]:
        """Generate dashboard layout configuration."""
        return {
            'grid_settings': {
                'width': 24,  # Grafana-style 24-column grid
                'height_unit': 'px',
                'cell_height': 30
            },
            'sections': [
                {
                    'title': 'Service Overview',
                    'collapsed': False,
                    'y_position': 0,
                    'panels': ['service_status', 'slo_summary', 'error_budget']
                },
                {
                    'title': 'Golden Signals',
                    'collapsed': False,
                    'y_position': 8,
                    'panels': ['latency', 'traffic', 'errors', 'saturation']
                },
                {
                    'title': 'Resource Utilization',
                    'collapsed': False,
                    'y_position': 16,
                    'panels': ['cpu_usage', 'memory_usage', 'network_io', 'disk_io']
                },
                {
                    'title': 'Dependencies & Downstream',
                    'collapsed': True,
                    'y_position': 24,
                    'panels': ['dependency_status', 'downstream_latency', 'circuit_breakers']
                }
            ]
        }

    def _generate_panels(self, service_def: Dict[str, Any],
                        role_config: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Generate dashboard panels based on service and role."""
        service_name = service_def.get('name', 'service')
        service_type = service_def.get('type', 'api')
        panels = []

        # Service Overview Panels
        panels.extend(self._create_overview_panels(service_def))

        # Golden Signals Panels
        panels.extend(self._create_golden_signals_panels(service_def))

        # Resource Utilization Panels
        panels.extend(self._create_resource_panels(service_def))

        # Service-specific panels
        if service_type == 'api':
            panels.extend(self._create_api_specific_panels(service_def))
        elif service_type == 'database':
            panels.extend(self._create_database_specific_panels(service_def))
        elif service_type == 'queue':
            panels.extend(self._create_queue_specific_panels(service_def))

        # Role-specific additional panels
        if 'business_metrics' in role_config['primary_focus']:
            panels.extend(self._create_business_metrics_panels(service_def))

        if 'capacity' in role_config['primary_focus']:
            panels.extend(self._create_capacity_panels(service_def))

        return panels

    def _create_overview_panels(self, service_def: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Create service overview panels."""
        service_name = service_def.get('name', 'service')

        return [
            {
                'id': 'service_status',
                'title': 'Service Status',
                'type': 'stat',
                'grid_pos': {'x': 0, 'y': 0, 'w': 6, 'h': 4},
                'targets': [
                    {
                        'expr': f'up{{service="{service_name}"}}',
                        'legendFormat': 'Status'
                    }
                ],
                'field_config': {
                    'overrides': [
                        {
                            'matcher': {'id': 'byName', 'options': 'Status'},
                            'properties': [
                                {'id': 'color', 'value': {'mode': 'thresholds'}},
                                {'id': 'thresholds', 'value': {
                                    'steps': [
                                        {'color': 'red', 'value': 0},
                                        {'color': 'green', 'value': 1}
                                    ]
                                }},
                                {'id': 'mappings', 'value': [
                                    {'options': {'0': {'text': 'DOWN'}}, 'type': 'value'},
                                    {'options': {'1': {'text': 'UP'}}, 'type': 'value'}
                                ]}
                            ]
                        }
                    ]
                },
                'options': {
                    'orientation': 'horizontal',
                    'textMode': 'value_and_name'
                }
            },
            {
                'id': 'slo_summary',
                'title': 'SLO Achievement (30d)',
                'type': 'stat',
                'grid_pos': {'x': 6, 'y': 0, 'w': 9, 'h': 4},
                'targets': [
                    {
                        'expr': f'(1 - (increase(http_requests_total{{service="{service_name}",code=~"5.."}}[30d]) / increase(http_requests_total{{service="{service_name}"}}[30d]))) * 100',
                        'legendFormat': 'Availability'
                    },
                    {
                        'expr': f'histogram_quantile(0.95, increase(http_request_duration_seconds_bucket{{service="{service_name}"}}[30d])) * 1000',
                        'legendFormat': 'P95 Latency (ms)'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'thresholds'},
                        'thresholds': {
                            'steps': [
                                {'color': 'red', 'value': 0},
                                {'color': 'yellow', 'value': 99.0},
                                {'color': 'green', 'value': 99.9}
                            ]
                        }
                    }
                },
                'options': {
                    'orientation': 'horizontal',
                    'textMode': 'value_and_name'
                }
            },
            {
                'id': 'error_budget',
                'title': 'Error Budget Remaining',
                'type': 'gauge',
                'grid_pos': {'x': 15, 'y': 0, 'w': 9, 'h': 4},
                'targets': [
                    {
                        'expr': f'(1 - (increase(http_requests_total{{service="{service_name}",code=~"5.."}}[30d]) / increase(http_requests_total{{service="{service_name}"}}[30d])) - 0.999) / 0.001 * 100',
                        'legendFormat': 'Error Budget %'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'thresholds'},
                        'min': 0,
                        'max': 100,
                        'thresholds': {
                            'steps': [
                                {'color': 'red', 'value': 0},
                                {'color': 'yellow', 'value': 25},
                                {'color': 'green', 'value': 50}
                            ]
                        },
                        'unit': 'percent'
                    }
                },
                'options': {
                    'showThresholdLabels': True,
                    'showThresholdMarkers': True
                }
            }
        ]

    def _create_golden_signals_panels(self, service_def: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Create golden signals monitoring panels."""
        service_name = service_def.get('name', 'service')

        return [
            {
                'id': 'latency',
                'title': 'Request Latency',
                'type': 'timeseries',
                'grid_pos': {'x': 0, 'y': 8, 'w': 12, 'h': 6},
                'targets': [
                    {
                        'expr': f'histogram_quantile(0.50, rate(http_request_duration_seconds_bucket{{service="{service_name}"}}[5m])) * 1000',
                        'legendFormat': 'P50 Latency'
                    },
                    {
                        'expr': f'histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{{service="{service_name}"}}[5m])) * 1000',
                        'legendFormat': 'P95 Latency'
                    },
                    {
                        'expr': f'histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{{service="{service_name}"}}[5m])) * 1000',
                        'legendFormat': 'P99 Latency'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'palette-classic'},
                        'unit': 'ms',
                        'custom': {
                            'drawStyle': 'line',
                            'lineInterpolation': 'linear',
                            'lineWidth': 1,
                            'fillOpacity': 10
                        }
                    }
                },
                'options': {
                    'tooltip': {'mode': 'multi', 'sort': 'desc'},
                    'legend': {'displayMode': 'table', 'placement': 'bottom'}
                }
            },
            {
                'id': 'traffic',
                'title': 'Request Rate',
                'type': 'timeseries',
                'grid_pos': {'x': 12, 'y': 8, 'w': 12, 'h': 6},
                'targets': [
                    {
                        'expr': f'sum(rate(http_requests_total{{service="{service_name}"}}[5m]))',
                        'legendFormat': 'Total RPS'
                    },
                    {
                        'expr': f'sum(rate(http_requests_total{{service="{service_name}",code=~"2.."}}[5m]))',
                        'legendFormat': '2xx RPS'
                    },
                    {
                        'expr': f'sum(rate(http_requests_total{{service="{service_name}",code=~"4.."}}[5m]))',
                        'legendFormat': '4xx RPS'
                    },
                    {
                        'expr': f'sum(rate(http_requests_total{{service="{service_name}",code=~"5.."}}[5m]))',
                        'legendFormat': '5xx RPS'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'palette-classic'},
                        'unit': 'reqps',
                        'custom': {
                            'drawStyle': 'line',
                            'lineInterpolation': 'linear',
                            'lineWidth': 1,
                            'fillOpacity': 0
                        }
                    }
                },
                'options': {
                    'tooltip': {'mode': 'multi', 'sort': 'desc'},
                    'legend': {'displayMode': 'table', 'placement': 'bottom'}
                }
            },
            {
                'id': 'errors',
                'title': 'Error Rate',
                'type': 'timeseries',
                'grid_pos': {'x': 0, 'y': 14, 'w': 12, 'h': 6},
                'targets': [
                    {
                        'expr': f'sum(rate(http_requests_total{{service="{service_name}",code=~"5.."}}[5m])) / sum(rate(http_requests_total{{service="{service_name}"}}[5m])) * 100',
                        'legendFormat': '5xx Error Rate'
                    },
                    {
                        'expr': f'sum(rate(http_requests_total{{service="{service_name}",code=~"4.."}}[5m])) / sum(rate(http_requests_total{{service="{service_name}"}}[5m])) * 100',
                        'legendFormat': '4xx Error Rate'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'palette-classic'},
                        'unit': 'percent',
                        'custom': {
                            'drawStyle': 'line',
                            'lineInterpolation': 'linear',
                            'lineWidth': 2,
                            'fillOpacity': 20
                        }
                    },
                    'overrides': [
                        {
                            'matcher': {'id': 'byName', 'options': '5xx Error Rate'},
                            'properties': [{'id': 'color', 'value': {'fixedColor': 'red'}}]
                        }
                    ]
                },
                'options': {
                    'tooltip': {'mode': 'multi', 'sort': 'desc'},
                    'legend': {'displayMode': 'table', 'placement': 'bottom'}
                }
            },
            {
                'id': 'saturation',
                'title': 'Saturation Metrics',
                'type': 'timeseries',
                'grid_pos': {'x': 12, 'y': 14, 'w': 12, 'h': 6},
                'targets': [
                    {
                        'expr': f'rate(process_cpu_seconds_total{{service="{service_name}"}}[5m]) * 100',
                        'legendFormat': 'CPU Usage %'
                    },
                    {
                        'expr': f'process_resident_memory_bytes{{service="{service_name}"}} / process_virtual_memory_max_bytes{{service="{service_name}"}} * 100',
                        'legendFormat': 'Memory Usage %'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'palette-classic'},
                        'unit': 'percent',
                        'max': 100,
                        'custom': {
                            'drawStyle': 'line',
                            'lineInterpolation': 'linear',
                            'lineWidth': 1,
                            'fillOpacity': 10
                        }
                    }
                },
                'options': {
                    'tooltip': {'mode': 'multi', 'sort': 'desc'},
                    'legend': {'displayMode': 'table', 'placement': 'bottom'}
                }
            }
        ]

    def _create_resource_panels(self, service_def: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Create resource utilization panels."""
        service_name = service_def.get('name', 'service')

        return [
            {
                'id': 'cpu_usage',
                'title': 'CPU Usage',
                'type': 'gauge',
                'grid_pos': {'x': 0, 'y': 20, 'w': 6, 'h': 4},
                'targets': [
                    {
                        'expr': f'rate(process_cpu_seconds_total{{service="{service_name}"}}[5m]) * 100',
                        'legendFormat': 'CPU %'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'thresholds'},
                        'unit': 'percent',
                        'min': 0,
                        'max': 100,
                        'thresholds': {
                            'steps': [
                                {'color': 'green', 'value': 0},
                                {'color': 'yellow', 'value': 70},
                                {'color': 'red', 'value': 90}
                            ]
                        }
                    }
                },
                'options': {
                    'showThresholdLabels': True,
                    'showThresholdMarkers': True
                }
            },
            {
                'id': 'memory_usage',
                'title': 'Memory Usage',
                'type': 'gauge',
                'grid_pos': {'x': 6, 'y': 20, 'w': 6, 'h': 4},
                'targets': [
                    {
                        'expr': f'process_resident_memory_bytes{{service="{service_name}"}} / 1024 / 1024',
                        'legendFormat': 'Memory MB'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'thresholds'},
                        'unit': 'decbytes',
                        'thresholds': {
                            'steps': [
                                {'color': 'green', 'value': 0},
                                {'color': 'yellow', 'value': 512000000},  # 512MB
                                {'color': 'red', 'value': 1024000000}     # 1GB
                            ]
                        }
                    }
                }
            },
            {
                'id': 'network_io',
                'title': 'Network I/O',
                'type': 'timeseries',
                'grid_pos': {'x': 12, 'y': 20, 'w': 6, 'h': 4},
                'targets': [
                    {
                        'expr': f'rate(process_network_receive_bytes_total{{service="{service_name}"}}[5m])',
                        'legendFormat': 'RX Bytes/s'
                    },
                    {
                        'expr': f'rate(process_network_transmit_bytes_total{{service="{service_name}"}}[5m])',
                        'legendFormat': 'TX Bytes/s'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'palette-classic'},
                        'unit': 'binBps'
                    }
                }
            },
            {
                'id': 'disk_io',
                'title': 'Disk I/O',
                'type': 'timeseries',
                'grid_pos': {'x': 18, 'y': 20, 'w': 6, 'h': 4},
                'targets': [
                    {
                        'expr': f'rate(process_disk_read_bytes_total{{service="{service_name}"}}[5m])',
                        'legendFormat': 'Read Bytes/s'
                    },
                    {
                        'expr': f'rate(process_disk_write_bytes_total{{service="{service_name}"}}[5m])',
                        'legendFormat': 'Write Bytes/s'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'palette-classic'},
                        'unit': 'binBps'
                    }
                }
            }
        ]

    def _create_api_specific_panels(self, service_def: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Create API-specific panels."""
        service_name = service_def.get('name', 'service')

        return [
            {
                'id': 'endpoint_latency',
                'title': 'Top Slowest Endpoints',
                'type': 'table',
                'grid_pos': {'x': 0, 'y': 24, 'w': 12, 'h': 6},
                'targets': [
                    {
                        'expr': f'topk(10, histogram_quantile(0.95, sum by (handler) (rate(http_request_duration_seconds_bucket{{service="{service_name}"}}[5m])))) * 1000',
                        'legendFormat': '{{handler}}',
                        'format': 'table',
                        'instant': True
                    }
                ],
                'transformations': [
                    {
                        'id': 'organize',
                        'options': {
                            'excludeByName': {'Time': True},
                            'renameByName': {'Value': 'P95 Latency (ms)'}
                        }
                    }
                ],
                'field_config': {
                    'overrides': [
                        {
                            'matcher': {'id': 'byName', 'options': 'P95 Latency (ms)'},
                            'properties': [
                                {'id': 'color', 'value': {'mode': 'thresholds'}},
                                {'id': 'thresholds', 'value': {
                                    'steps': [
                                        {'color': 'green', 'value': 0},
                                        {'color': 'yellow', 'value': 100},
                                        {'color': 'red', 'value': 500}
                                    ]
                                }}
                            ]
                        }
                    ]
                }
            },
            {
                'id': 'request_size_distribution',
                'title': 'Request Size Distribution',
                'type': 'heatmap',
                'grid_pos': {'x': 12, 'y': 24, 'w': 12, 'h': 6},
                'targets': [
                    {
                        'expr': f'sum by (le) (rate(http_request_size_bytes_bucket{{service="{service_name}"}}[5m]))',
                        'legendFormat': '{{le}}'
                    }
                ],
                'options': {
                    'calculate': True,
                    'yAxis': {'unit': 'bytes'},
                    'color': {'scheme': 'Spectral'}
                }
            }
        ]

    def _create_database_specific_panels(self, service_def: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Create database-specific panels."""
        service_name = service_def.get('name', 'service')

        return [
            {
                'id': 'db_connections',
                'title': 'Database Connections',
                'type': 'timeseries',
                'grid_pos': {'x': 0, 'y': 24, 'w': 8, 'h': 6},
                'targets': [
                    {
                        'expr': f'db_connections_active{{service="{service_name}"}}',
                        'legendFormat': 'Active Connections'
                    },
                    {
                        'expr': f'db_connections_idle{{service="{service_name}"}}',
                        'legendFormat': 'Idle Connections'
                    },
                    {
                        'expr': f'db_connections_max{{service="{service_name}"}}',
                        'legendFormat': 'Max Connections'
                    }
                ]
            },
            {
                'id': 'query_performance',
                'title': 'Query Performance',
                'type': 'timeseries',
                'grid_pos': {'x': 8, 'y': 24, 'w': 8, 'h': 6},
                'targets': [
                    {
                        'expr': f'rate(db_queries_total{{service="{service_name}"}}[5m])',
                        'legendFormat': 'Queries/sec'
                    },
                    {
                        'expr': f'rate(db_slow_queries_total{{service="{service_name}"}}[5m])',
                        'legendFormat': 'Slow Queries/sec'
                    }
                ]
            },
            {
                'id': 'db_locks',
                'title': 'Database Locks',
                'type': 'stat',
                'grid_pos': {'x': 16, 'y': 24, 'w': 8, 'h': 6},
                'targets': [
                    {
                        'expr': f'db_locks_waiting{{service="{service_name}"}}',
                        'legendFormat': 'Waiting Locks'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'thresholds'},
                        'thresholds': {
                            'steps': [
                                {'color': 'green', 'value': 0},
                                {'color': 'yellow', 'value': 1},
                                {'color': 'red', 'value': 5}
                            ]
                        }
                    }
                }
            }
        ]

    def _create_queue_specific_panels(self, service_def: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Create queue-specific panels."""
        service_name = service_def.get('name', 'service')

        return [
            {
                'id': 'queue_depth',
                'title': 'Queue Depth',
                'type': 'timeseries',
                'grid_pos': {'x': 0, 'y': 24, 'w': 12, 'h': 6},
                'targets': [
                    {
                        'expr': f'queue_depth{{service="{service_name}"}}',
                        'legendFormat': 'Messages in Queue'
                    }
                ]
            },
            {
                'id': 'message_throughput',
                'title': 'Message Throughput',
                'type': 'timeseries',
                'grid_pos': {'x': 12, 'y': 24, 'w': 12, 'h': 6},
                'targets': [
                    {
                        'expr': f'rate(messages_published_total{{service="{service_name}"}}[5m])',
                        'legendFormat': 'Published/sec'
                    },
                    {
                        'expr': f'rate(messages_consumed_total{{service="{service_name}"}}[5m])',
                        'legendFormat': 'Consumed/sec'
                    }
                ]
            }
        ]

    def _create_business_metrics_panels(self, service_def: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Create business metrics panels."""
        service_name = service_def.get('name', 'service')

        return [
            {
                'id': 'business_kpis',
                'title': 'Business KPIs',
                'type': 'stat',
                'grid_pos': {'x': 0, 'y': 30, 'w': 24, 'h': 4},
                'targets': [
                    {
                        'expr': f'rate(business_transactions_total{{service="{service_name}"}}[1h])',
                        'legendFormat': 'Transactions/hour'
                    },
                    {
                        'expr': f'avg(business_transaction_value{{service="{service_name}"}}) * rate(business_transactions_total{{service="{service_name}"}}[1h])',
                        'legendFormat': 'Revenue/hour'
                    },
                    {
                        'expr': f'rate(user_registrations_total{{service="{service_name}"}}[1h])',
                        'legendFormat': 'New Users/hour'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'palette-classic'},
                        'custom': {
                            'displayMode': 'basic'
                        }
                    }
                },
                'options': {
                    'orientation': 'horizontal',
                    'textMode': 'value_and_name'
                }
            }
        ]

    def _create_capacity_panels(self, service_def: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Create capacity planning panels."""
        service_name = service_def.get('name', 'service')

        return [
            {
                'id': 'capacity_trends',
                'title': 'Capacity Trends (7d)',
                'type': 'timeseries',
                'grid_pos': {'x': 0, 'y': 34, 'w': 24, 'h': 6},
                'targets': [
                    {
                        'expr': f'predict_linear(avg_over_time(rate(http_requests_total{{service="{service_name}"}}[5m])[7d:1h]), 7*24*3600)',
                        'legendFormat': 'Predicted Traffic (7d)'
                    },
                    {
                        'expr': f'predict_linear(avg_over_time(process_resident_memory_bytes{{service="{service_name}"}}[7d:1h]), 7*24*3600)',
                        'legendFormat': 'Predicted Memory Usage (7d)'
                    }
                ],
                'field_config': {
                    'defaults': {
                        'color': {'mode': 'palette-classic'},
                        'custom': {
                            'drawStyle': 'line',
                            'lineStyle': {'dash': [10, 10]}
                        }
                    }
                }
            }
        ]

    def _generate_template_variables(self, service_def: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Generate template variables for dynamic dashboard filtering."""
        service_name = service_def.get('name', 'service')

        return [
            {
                'name': 'environment',
                'type': 'query',
                'query': 'label_values(environment)',
                'current': {'text': 'production', 'value': 'production'},
                'includeAll': False,
                'multi': False,
                'refresh': 'on_dashboard_load'
            },
            {
                'name': 'instance',
                'type': 'query',
                'query': f'label_values(up{{service="{service_name}"}}, instance)',
                'current': {'text': 'All', 'value': '$__all'},
                'includeAll': True,
                'multi': True,
                'refresh': 'on_time_range_change'
            },
            {
                'name': 'handler',
                'type': 'query',
                'query': f'label_values(http_requests_total{{service="{service_name}"}}, handler)',
                'current': {'text': 'All', 'value': '$__all'},
                'includeAll': True,
                'multi': True,
                'refresh': 'on_time_range_change'
            }
        ]

    def _generate_alerts_integration(self, service_def: Dict[str, Any]) -> Dict[str, Any]:
        """Generate alerts integration configuration."""
        service_name = service_def.get('name', 'service')

        return {
            'alert_annotations': True,
            'alert_rules_query': f'ALERTS{{service="{service_name}"}}',
            'alert_panels': [
                {
                    'title': 'Active Alerts',
                    'type': 'table',
                    'query': f'ALERTS{{service="{service_name}",alertstate="firing"}}',
                    'columns': ['alertname', 'severity', 'instance', 'description']
                }
            ]
        }

    def _generate_drill_down_paths(self, service_def: Dict[str, Any]) -> Dict[str, Any]:
        """Generate drill-down navigation paths."""
        service_name = service_def.get('name', 'service')

        return {
            'service_overview': {
                'from': 'service_status',
                'to': 'detailed_health_dashboard',
                'url': f'/d/service-health/{service_name}-health',
                'params': ['var-service', 'var-environment']
            },
            'error_investigation': {
                'from': 'errors',
                'to': 'error_details_dashboard',
                'url': f'/d/errors/{service_name}-errors',
                'params': ['var-service', 'var-time_range']
            },
            'latency_analysis': {
                'from': 'latency',
                'to': 'trace_analysis_dashboard',
                'url': f'/d/traces/{service_name}-traces',
                'params': ['var-service', 'var-handler']
            },
            'capacity_planning': {
                'from': 'saturation',
                'to': 'capacity_dashboard',
                'url': f'/d/capacity/{service_name}-capacity',
                'params': ['var-service', 'var-time_range']
            }
        }

    def generate_grafana_json(self, dashboard_spec: Dict[str, Any]) -> Dict[str, Any]:
        """Convert dashboard specification to Grafana JSON format."""
        metadata = dashboard_spec['metadata']
        config = dashboard_spec['configuration']

        grafana_json = {
            'dashboard': {
                'id': None,
                'title': metadata['title'],
                'tags': [metadata['service']['type'], metadata['target_role'], 'generated'],
                'timezone': config['timezone'],
                'refresh': config['refresh_interval'],
                'time': {
                    'from': 'now-1h',
                    'to': 'now'
                },
                'templating': {
                    'list': dashboard_spec['variables']
                },
                'panels': self._convert_panels_to_grafana_format(dashboard_spec['panels']),
                'version': 1,
                'schemaVersion': 30
            },
            'overwrite': True
        }

        return grafana_json

    def _convert_panels_to_grafana_format(self, panels: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Convert panel specifications to Grafana format."""
        grafana_panels = []

        for panel in panels:
            grafana_panel = {
                'id': hash(panel['id']) % 1000,  # Generate numeric ID
                'title': panel['title'],
                'type': panel['type'],
                'gridPos': panel['grid_pos'],
                'targets': panel['targets'],
                'fieldConfig': panel.get('field_config', {}),
                'options': panel.get('options', {}),
                'transformations': panel.get('transformations', [])
            }
            grafana_panels.append(grafana_panel)

        return grafana_panels

    def generate_documentation(self, dashboard_spec: Dict[str, Any]) -> str:
        """Generate documentation for the dashboard."""
        metadata = dashboard_spec['metadata']
        service = metadata['service']

        doc_content = f"""# {metadata['title']} Documentation

## Overview
This dashboard provides comprehensive monitoring for {service['name']}, a {service['type']} service with {service['criticality']} criticality.

**Target Audience:** {metadata['target_role'].upper()} teams
**Generated:** {metadata['generated_at']}

## Dashboard Sections

### Service Overview
- **Service Status**: Real-time availability status
- **SLO Achievement**: 30-day SLO compliance metrics
- **Error Budget**: Remaining error budget visualization

### Golden Signals Monitoring
- **Latency**: P50, P95, P99 response times
- **Traffic**: Request rate by status code
- **Errors**: Error rates for 4xx and 5xx responses
- **Saturation**: CPU and memory utilization

### Resource Utilization
- **CPU Usage**: Process CPU consumption
- **Memory Usage**: Memory utilization tracking
- **Network I/O**: Network throughput metrics
- **Disk I/O**: Disk read/write operations

## Key Metrics

### SLIs Tracked
"""

        # Add service-type specific metrics
        service_type = service.get('type', 'api')
        if service_type in self.SERVICE_METRICS:
            metrics = self.SERVICE_METRICS[service_type]['key_metrics']
            for metric in metrics:
                doc_content += f"- `{metric}`: Core service metric\n"

        doc_content += f"""
## Alert Integration
- Active alerts are displayed in context with relevant panels
- Alert annotations show on time series charts
- Click-through to alert management system available

## Drill-Down Paths
"""

        drill_downs = dashboard_spec.get('drill_down_paths', {})
        for path_name, path_config in drill_downs.items():
            doc_content += f"- **{path_name}**: From {path_config['from']} → {path_config['to']}\n"

        doc_content += f"""
## Usage Guidelines

### Time Ranges
Use appropriate time ranges for different investigation types:
- **Real-time monitoring**: 15m - 1h
- **Recent incident investigation**: 1h - 6h
- **Trend analysis**: 1d - 7d
- **Capacity planning**: 7d - 30d

### Variables
- **environment**: Filter by deployment environment
- **instance**: Focus on specific service instances
- **handler**: Filter by API endpoint or handler

### Performance Optimization
- Use longer time ranges for capacity planning
- Refresh intervals are optimized per role:
  - SRE: 30s for operational awareness
  - Developer: 1m for troubleshooting
  - Executive: 5m for high-level monitoring

## Maintenance
- Dashboard panels automatically adapt to service changes
- Template variables refresh based on actual metric labels
- Review and update business metrics quarterly
"""

        return doc_content

    def export_specification(self, dashboard_spec: Dict[str, Any], output_file: str,
                           format_type: str = 'json'):
        """Export dashboard specification."""
        if format_type.lower() == 'json':
            with open(output_file, 'w') as f:
                json.dump(dashboard_spec, f, indent=2)
        elif format_type.lower() == 'grafana':
            grafana_json = self.generate_grafana_json(dashboard_spec)
            with open(output_file, 'w') as f:
                json.dump(grafana_json, f, indent=2)
        else:
            raise ValueError(f"Unsupported format: {format_type}")

    def print_summary(self, dashboard_spec: Dict[str, Any]):
        """Print human-readable summary of dashboard specification."""
        metadata = dashboard_spec['metadata']
        service = metadata['service']
        config = dashboard_spec['configuration']
        panels = dashboard_spec['panels']

        print(f"\n{'='*60}")
        print(f"DASHBOARD SPECIFICATION SUMMARY")
        print(f"{'='*60}")

        print(f"\nDashboard Details:")
        print(f"  Title: {metadata['title']}")
        print(f"  Target Role: {metadata['target_role'].upper()}")
        print(f"  Service: {service['name']} ({service['type']})")
        print(f"  Criticality: {service['criticality']}")
        print(f"  Generated: {metadata['generated_at']}")

        print(f"\nConfiguration:")
        print(f"  Default Time Range: {config['default_time_range']}")
        print(f"  Refresh Interval: {config['refresh_interval']}")
        print(f"  Available Time Ranges: {', '.join(config['time_ranges'])}")

        print(f"\nPanels ({len(panels)}):")
        panel_types = {}
        for panel in panels:
            panel_type = panel['type']
            panel_types[panel_type] = panel_types.get(panel_type, 0) + 1

        for panel_type, count in panel_types.items():
            print(f"  {panel_type}: {count}")

        variables = dashboard_spec.get('variables', [])
        print(f"\nTemplate Variables ({len(variables)}):")
        for var in variables:
            print(f"  {var['name']} ({var['type']})")

        drill_downs = dashboard_spec.get('drill_down_paths', {})
        print(f"\nDrill-down Paths: {len(drill_downs)}")

        print(f"\nKey Features:")
        print(f"  • Golden Signals monitoring")
        print(f"  • Resource utilization tracking")
        print(f"  • Alert integration")
        print(f"  • Role-optimized layout")
        print(f"  • Service-type specific panels")

        print(f"\n{'='*60}\n")


def main():
    """Main function for CLI usage."""
    parser = argparse.ArgumentParser(
        description='Generate comprehensive dashboard specifications',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
    # Generate from service definition file
    python dashboard_generator.py --input service.json --output dashboard.json

    # Generate from command line parameters
    python dashboard_generator.py --service-type api --name "Payment Service" --output payment_dashboard.json

    # Generate Grafana-compatible JSON
    python dashboard_generator.py --input service.json --output dashboard.json --format grafana

    # Generate with specific role focus
    python dashboard_generator.py --service-type web --name "Frontend" --role developer --output frontend_dev.json
        """
    )

    parser.add_argument('--input', '-i',
                       help='Input service definition JSON file')
    parser.add_argument('--output', '-o',
                       help='Output dashboard specification file')
    parser.add_argument('--service-type',
                       choices=['api', 'web', 'database', 'queue', 'batch', 'ml'],
                       help='Service type')
    parser.add_argument('--name',
                       help='Service name')
    parser.add_argument('--criticality',
                       choices=['critical', 'high', 'medium', 'low'],
                       default='medium',
                       help='Service criticality level')
    parser.add_argument('--role',
                       choices=['sre', 'developer', 'executive', 'ops'],
                       default='sre',
                       help='Target role for dashboard optimization')
    parser.add_argument('--format',
                       choices=['json', 'grafana'],
                       default='json',
                       help='Output format (json specification or grafana compatible)')
    parser.add_argument('--doc-output',
                       help='Generate documentation file')
    parser.add_argument('--summary-only', action='store_true',
                       help='Only display summary, do not save files')

    args = parser.parse_args()

    if not args.input and not (args.service_type and args.name):
        parser.error("Must provide either --input file or --service-type and --name")

    generator = DashboardGenerator()

    try:
        # Load or create service definition
        if args.input:
            service_def = generator.load_service_definition(args.input)
        else:
            service_def = generator.create_service_definition(
                args.service_type, args.name, args.criticality
            )

        # Generate dashboard specification
        dashboard_spec = generator.generate_dashboard_specification(service_def, args.role)

        # Output results
        if not args.summary_only:
            output_file = args.output or f"{service_def['name'].replace(' ', '_').lower()}_dashboard.json"
            generator.export_specification(dashboard_spec, output_file, args.format)
            print(f"Dashboard specification saved to: {output_file}")

            # Generate documentation if requested
            if args.doc_output:
                documentation = generator.generate_documentation(dashboard_spec)
                with open(args.doc_output, 'w') as f:
                    f.write(documentation)
                print(f"Documentation saved to: {args.doc_output}")

        # Always show summary
        generator.print_summary(dashboard_spec)

    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)


if __name__ == '__main__':
    main()