#!/usr/bin/env python3 """ Database Index Optimizer Analyzes schema definitions and query patterns to recommend optimal indexes: - Identifies missing indexes for common query patterns - Detects redundant and overlapping indexes - Suggests composite index column ordering - Estimates selectivity and performance impact - Generates CREATE INDEX statements with rationale Input: Schema JSON + Query patterns JSON Output: Index recommendations + CREATE INDEX SQL + before/after analysis Usage: python index_optimizer.py --schema schema.json --queries queries.json --output recommendations.json python index_optimizer.py --schema schema.json --queries queries.json --format text python index_optimizer.py --schema schema.json --queries queries.json --analyze-existing """ import argparse import json import re import sys from collections import defaultdict, namedtuple, Counter from typing import Dict, List, Set, Tuple, Optional, Any from dataclasses import dataclass, asdict import hashlib @dataclass class Column: name: str data_type: str nullable: bool = True unique: bool = False cardinality_estimate: Optional[int] = None @dataclass class Index: name: str table: str columns: List[str] unique: bool = False index_type: str = "btree" partial_condition: Optional[str] = None include_columns: List[str] = None size_estimate: Optional[int] = None @dataclass class QueryPattern: query_id: str query_type: str # SELECT, INSERT, UPDATE, DELETE table: str where_conditions: List[Dict[str, Any]] join_conditions: List[Dict[str, Any]] order_by: List[Dict[str, str]] # column, direction group_by: List[str] frequency: int = 1 avg_execution_time_ms: Optional[float] = None @dataclass class IndexRecommendation: recommendation_id: str table: str recommended_index: Index reason: str query_patterns_helped: List[str] estimated_benefit: str estimated_overhead: str priority: int # 1 = highest priority sql_statement: str selectivity_analysis: Dict[str, Any] @dataclass class RedundancyIssue: issue_type: str # DUPLICATE, OVERLAPPING, UNUSED affected_indexes: List[str] table: str description: str recommendation: str sql_statements: List[str] class SelectivityEstimator: """Estimates column selectivity based on naming patterns and data types.""" def __init__(self): # Selectivity patterns based on common column names and types self.high_selectivity_patterns = [ r'.*_id$', r'^id$', r'uuid', r'guid', r'email', r'username', r'ssn', r'account.*number', r'transaction.*id', r'reference.*number' ] self.medium_selectivity_patterns = [ r'name$', r'title$', r'description$', r'address', r'phone', r'zip', r'postal.*code', r'serial.*number', r'sku', r'product.*code' ] self.low_selectivity_patterns = [ r'status$', r'type$', r'category', r'state$', r'flag$', r'active$', r'enabled$', r'deleted$', r'visible$', r'gender$', r'priority$' ] self.very_low_selectivity_patterns = [ r'is_.*', r'has_.*', r'can_.*', r'boolean', r'bool' ] def estimate_selectivity(self, column: Column, table_size_estimate: int = 10000) -> float: """Estimate column selectivity (0.0 = all same values, 1.0 = all unique values).""" column_name_lower = column.name.lower() # Primary key or unique columns if column.unique or column.name.lower() in ['id', 'uuid', 'guid']: return 1.0 # Check cardinality estimate if available if column.cardinality_estimate: return min(column.cardinality_estimate / table_size_estimate, 1.0) # Pattern-based estimation for pattern in self.high_selectivity_patterns: if re.search(pattern, column_name_lower): return 0.9 # Very high selectivity for pattern in self.medium_selectivity_patterns: if re.search(pattern, column_name_lower): return 0.7 # Good selectivity for pattern in self.low_selectivity_patterns: if re.search(pattern, column_name_lower): return 0.2 # Poor selectivity for pattern in self.very_low_selectivity_patterns: if re.search(pattern, column_name_lower): return 0.1 # Very poor selectivity # Data type based estimation data_type_upper = column.data_type.upper() if data_type_upper.startswith('BOOL'): return 0.1 elif data_type_upper.startswith(('TINYINT', 'SMALLINT')): return 0.3 elif data_type_upper.startswith('INT'): return 0.8 elif data_type_upper.startswith(('VARCHAR', 'TEXT')): # Estimate based on column name if 'name' in column_name_lower: return 0.7 elif 'description' in column_name_lower or 'comment' in column_name_lower: return 0.9 else: return 0.6 # Default moderate selectivity return 0.5 class IndexOptimizer: def __init__(self): self.tables: Dict[str, Dict[str, Column]] = {} self.existing_indexes: Dict[str, List[Index]] = {} self.query_patterns: List[QueryPattern] = [] self.selectivity_estimator = SelectivityEstimator() # Configuration self.max_composite_index_columns = 6 self.min_selectivity_for_index = 0.1 self.redundancy_overlap_threshold = 0.8 def load_schema(self, schema_data: Dict[str, Any]) -> None: """Load schema definition.""" if 'tables' not in schema_data: raise ValueError("Schema must contain 'tables' key") for table_name, table_def in schema_data['tables'].items(): self.tables[table_name] = {} self.existing_indexes[table_name] = [] # Load columns for col_name, col_def in table_def.get('columns', {}).items(): column = Column( name=col_name, data_type=col_def.get('type', 'VARCHAR(255)'), nullable=col_def.get('nullable', True), unique=col_def.get('unique', False), cardinality_estimate=col_def.get('cardinality_estimate') ) self.tables[table_name][col_name] = column # Load existing indexes for idx_def in table_def.get('indexes', []): index = Index( name=idx_def['name'], table=table_name, columns=idx_def['columns'], unique=idx_def.get('unique', False), index_type=idx_def.get('type', 'btree'), partial_condition=idx_def.get('partial_condition'), include_columns=idx_def.get('include_columns', []) ) self.existing_indexes[table_name].append(index) def load_query_patterns(self, query_data: Dict[str, Any]) -> None: """Load query patterns for analysis.""" if 'queries' not in query_data: raise ValueError("Query data must contain 'queries' key") for query_def in query_data['queries']: pattern = QueryPattern( query_id=query_def['id'], query_type=query_def.get('type', 'SELECT').upper(), table=query_def['table'], where_conditions=query_def.get('where_conditions', []), join_conditions=query_def.get('join_conditions', []), order_by=query_def.get('order_by', []), group_by=query_def.get('group_by', []), frequency=query_def.get('frequency', 1), avg_execution_time_ms=query_def.get('avg_execution_time_ms') ) self.query_patterns.append(pattern) def analyze_missing_indexes(self) -> List[IndexRecommendation]: """Identify missing indexes based on query patterns.""" recommendations = [] for pattern in self.query_patterns: table_name = pattern.table if table_name not in self.tables: continue # Analyze WHERE conditions for single-column indexes for condition in pattern.where_conditions: column = condition.get('column') operator = condition.get('operator', '=') if column and column in self.tables[table_name]: if not self._has_covering_index(table_name, [column]): recommendation = self._create_single_column_recommendation( table_name, column, pattern, operator ) if recommendation: recommendations.append(recommendation) # Analyze composite indexes for multi-column WHERE conditions where_columns = [cond.get('column') for cond in pattern.where_conditions if cond.get('column') and cond.get('column') in self.tables[table_name]] if len(where_columns) > 1: composite_recommendation = self._create_composite_recommendation( table_name, where_columns, pattern ) if composite_recommendation: recommendations.append(composite_recommendation) # Analyze covering indexes for SELECT with ORDER BY if pattern.order_by and where_columns: covering_recommendation = self._create_covering_index_recommendation( table_name, where_columns, pattern ) if covering_recommendation: recommendations.append(covering_recommendation) # Analyze JOIN conditions for join_condition in pattern.join_conditions: local_column = join_condition.get('local_column') if local_column and local_column in self.tables[table_name]: if not self._has_covering_index(table_name, [local_column]): recommendation = self._create_join_index_recommendation( table_name, local_column, pattern, join_condition ) if recommendation: recommendations.append(recommendation) # Remove duplicates and prioritize recommendations = self._deduplicate_recommendations(recommendations) recommendations = self._prioritize_recommendations(recommendations) return recommendations def _has_covering_index(self, table_name: str, columns: List[str]) -> bool: """Check if existing indexes cover the specified columns.""" if table_name not in self.existing_indexes: return False for index in self.existing_indexes[table_name]: # Check if index starts with required columns (prefix match for composite) if len(index.columns) >= len(columns): if index.columns[:len(columns)] == columns: return True return False def _create_single_column_recommendation( self, table_name: str, column: str, pattern: QueryPattern, operator: str ) -> Optional[IndexRecommendation]: """Create recommendation for single-column index.""" column_obj = self.tables[table_name][column] selectivity = self.selectivity_estimator.estimate_selectivity(column_obj) # Skip very low selectivity columns unless frequently used if selectivity < self.min_selectivity_for_index and pattern.frequency < 100: return None index_name = f"idx_{table_name}_{column}" index = Index( name=index_name, table=table_name, columns=[column], unique=column_obj.unique, index_type="btree" ) reason = f"Optimize WHERE {column} {operator} queries" if pattern.frequency > 10: reason += f" (used {pattern.frequency} times)" return IndexRecommendation( recommendation_id=self._generate_recommendation_id(table_name, [column]), table=table_name, recommended_index=index, reason=reason, query_patterns_helped=[pattern.query_id], estimated_benefit=self._estimate_benefit(selectivity, pattern.frequency), estimated_overhead="Low (single column)", priority=self._calculate_priority(selectivity, pattern.frequency, 1), sql_statement=f"CREATE INDEX {index_name} ON {table_name} ({column});", selectivity_analysis={ "column_selectivity": selectivity, "estimated_reduction": f"{int(selectivity * 100)}%" } ) def _create_composite_recommendation( self, table_name: str, columns: List[str], pattern: QueryPattern ) -> Optional[IndexRecommendation]: """Create recommendation for composite index.""" if len(columns) > self.max_composite_index_columns: columns = columns[:self.max_composite_index_columns] # Order columns by selectivity (most selective first) column_selectivities = [] for col in columns: col_obj = self.tables[table_name][col] selectivity = self.selectivity_estimator.estimate_selectivity(col_obj) column_selectivities.append((col, selectivity)) # Sort by selectivity descending column_selectivities.sort(key=lambda x: x[1], reverse=True) ordered_columns = [col for col, _ in column_selectivities] # Calculate combined selectivity combined_selectivity = min(sum(sel for _, sel in column_selectivities) / len(columns), 0.95) index_name = f"idx_{table_name}_{'_'.join(ordered_columns)}" if len(index_name) > 63: # PostgreSQL limit index_name = f"idx_{table_name}_composite_{abs(hash('_'.join(ordered_columns))) % 10000}" index = Index( name=index_name, table=table_name, columns=ordered_columns, index_type="btree" ) reason = f"Optimize multi-column WHERE conditions: {', '.join(ordered_columns)}" return IndexRecommendation( recommendation_id=self._generate_recommendation_id(table_name, ordered_columns), table=table_name, recommended_index=index, reason=reason, query_patterns_helped=[pattern.query_id], estimated_benefit=self._estimate_benefit(combined_selectivity, pattern.frequency), estimated_overhead=f"Medium (composite index with {len(ordered_columns)} columns)", priority=self._calculate_priority(combined_selectivity, pattern.frequency, len(ordered_columns)), sql_statement=f"CREATE INDEX {index_name} ON {table_name} ({', '.join(ordered_columns)});", selectivity_analysis={ "column_selectivities": {col: sel for col, sel in column_selectivities}, "combined_selectivity": combined_selectivity, "column_order_rationale": "Ordered by selectivity (most selective first)" } ) def _create_covering_index_recommendation( self, table_name: str, where_columns: List[str], pattern: QueryPattern ) -> Optional[IndexRecommendation]: """Create recommendation for covering index.""" order_columns = [col['column'] for col in pattern.order_by if col['column'] in self.tables[table_name]] # Combine WHERE and ORDER BY columns index_columns = where_columns.copy() include_columns = [] # Add ORDER BY columns to index columns for col in order_columns: if col not in index_columns: index_columns.append(col) # Limit index columns if len(index_columns) > self.max_composite_index_columns: include_columns = index_columns[self.max_composite_index_columns:] index_columns = index_columns[:self.max_composite_index_columns] index_name = f"idx_{table_name}_covering_{'_'.join(index_columns[:3])}" if len(index_name) > 63: index_name = f"idx_{table_name}_covering_{abs(hash('_'.join(index_columns))) % 10000}" index = Index( name=index_name, table=table_name, columns=index_columns, include_columns=include_columns, index_type="btree" ) reason = f"Covering index for WHERE + ORDER BY optimization" # Calculate selectivity for main columns main_selectivity = 0.5 # Default for covering indexes if where_columns: selectivities = [ self.selectivity_estimator.estimate_selectivity(self.tables[table_name][col]) for col in where_columns[:2] # Consider first 2 columns ] main_selectivity = max(selectivities) sql_parts = [f"CREATE INDEX {index_name} ON {table_name} ({', '.join(index_columns)})"] if include_columns: sql_parts.append(f" INCLUDE ({', '.join(include_columns)})") sql_statement = ''.join(sql_parts) + ";" return IndexRecommendation( recommendation_id=self._generate_recommendation_id(table_name, index_columns, "covering"), table=table_name, recommended_index=index, reason=reason, query_patterns_helped=[pattern.query_id], estimated_benefit="High (eliminates table lookups for SELECT)", estimated_overhead=f"High (covering index with {len(index_columns)} columns)", priority=self._calculate_priority(main_selectivity, pattern.frequency, len(index_columns)), sql_statement=sql_statement, selectivity_analysis={ "main_columns_selectivity": main_selectivity, "covering_benefit": "Eliminates table lookup for SELECT queries" } ) def _create_join_index_recommendation( self, table_name: str, column: str, pattern: QueryPattern, join_condition: Dict[str, Any] ) -> Optional[IndexRecommendation]: """Create recommendation for JOIN optimization index.""" column_obj = self.tables[table_name][column] selectivity = self.selectivity_estimator.estimate_selectivity(column_obj) index_name = f"idx_{table_name}_{column}_join" index = Index( name=index_name, table=table_name, columns=[column], index_type="btree" ) foreign_table = join_condition.get('foreign_table', 'unknown') reason = f"Optimize JOIN with {foreign_table} table on {column}" return IndexRecommendation( recommendation_id=self._generate_recommendation_id(table_name, [column], "join"), table=table_name, recommended_index=index, reason=reason, query_patterns_helped=[pattern.query_id], estimated_benefit=self._estimate_join_benefit(pattern.frequency), estimated_overhead="Low (single column for JOIN)", priority=2, # JOINs are generally high priority sql_statement=f"CREATE INDEX {index_name} ON {table_name} ({column});", selectivity_analysis={ "column_selectivity": selectivity, "join_optimization": True } ) def _generate_recommendation_id(self, table: str, columns: List[str], suffix: str = "") -> str: """Generate unique recommendation ID.""" content = f"{table}_{'_'.join(sorted(columns))}_{suffix}" return hashlib.md5(content.encode()).hexdigest()[:8] def _estimate_benefit(self, selectivity: float, frequency: int) -> str: """Estimate performance benefit of index.""" if selectivity > 0.8 and frequency > 50: return "Very High" elif selectivity > 0.6 and frequency > 20: return "High" elif selectivity > 0.4 or frequency > 10: return "Medium" else: return "Low" def _estimate_join_benefit(self, frequency: int) -> str: """Estimate benefit for JOIN indexes.""" if frequency > 50: return "Very High (frequent JOINs)" elif frequency > 20: return "High (regular JOINs)" elif frequency > 5: return "Medium (occasional JOINs)" else: return "Low (rare JOINs)" def _calculate_priority(self, selectivity: float, frequency: int, column_count: int) -> int: """Calculate priority score (1 = highest priority).""" # Base score calculation score = 0 # Selectivity contribution (0-50 points) score += int(selectivity * 50) # Frequency contribution (0-30 points) score += min(frequency, 30) # Penalty for complex indexes (subtract points) score -= (column_count - 1) * 5 # Convert to priority levels if score >= 70: return 1 # Highest elif score >= 50: return 2 # High elif score >= 30: return 3 # Medium else: return 4 # Low def _deduplicate_recommendations(self, recommendations: List[IndexRecommendation]) -> List[IndexRecommendation]: """Remove duplicate recommendations.""" seen_indexes = set() unique_recommendations = [] for rec in recommendations: index_signature = (rec.table, tuple(rec.recommended_index.columns)) if index_signature not in seen_indexes: seen_indexes.add(index_signature) unique_recommendations.append(rec) else: # Merge query patterns helped for existing_rec in unique_recommendations: if (existing_rec.table == rec.table and existing_rec.recommended_index.columns == rec.recommended_index.columns): existing_rec.query_patterns_helped.extend(rec.query_patterns_helped) break return unique_recommendations def _prioritize_recommendations(self, recommendations: List[IndexRecommendation]) -> List[IndexRecommendation]: """Sort recommendations by priority.""" return sorted(recommendations, key=lambda x: (x.priority, -len(x.query_patterns_helped))) def analyze_redundant_indexes(self) -> List[RedundancyIssue]: """Identify redundant, overlapping, and potentially unused indexes.""" redundancy_issues = [] for table_name, indexes in self.existing_indexes.items(): if len(indexes) < 2: continue # Find duplicate indexes duplicates = self._find_duplicate_indexes(table_name, indexes) redundancy_issues.extend(duplicates) # Find overlapping indexes overlapping = self._find_overlapping_indexes(table_name, indexes) redundancy_issues.extend(overlapping) # Find potentially unused indexes unused = self._find_unused_indexes(table_name, indexes) redundancy_issues.extend(unused) return redundancy_issues def _find_duplicate_indexes(self, table_name: str, indexes: List[Index]) -> List[RedundancyIssue]: """Find exactly duplicate indexes.""" issues = [] seen_signatures = {} for index in indexes: signature = (tuple(index.columns), index.unique, index.partial_condition) if signature in seen_signatures: existing_index = seen_signatures[signature] issues.append(RedundancyIssue( issue_type="DUPLICATE", affected_indexes=[existing_index.name, index.name], table=table_name, description=f"Indexes '{existing_index.name}' and '{index.name}' are identical", recommendation=f"Drop one of the duplicate indexes", sql_statements=[f"DROP INDEX {index.name};"] )) else: seen_signatures[signature] = index return issues def _find_overlapping_indexes(self, table_name: str, indexes: List[Index]) -> List[RedundancyIssue]: """Find overlapping indexes that might be redundant.""" issues = [] for i, index1 in enumerate(indexes): for index2 in indexes[i+1:]: overlap_ratio = self._calculate_overlap_ratio(index1, index2) if overlap_ratio >= self.redundancy_overlap_threshold: # Determine which index to keep if len(index1.columns) <= len(index2.columns): redundant_index = index1 keep_index = index2 else: redundant_index = index2 keep_index = index1 issues.append(RedundancyIssue( issue_type="OVERLAPPING", affected_indexes=[index1.name, index2.name], table=table_name, description=f"Index '{redundant_index.name}' overlaps {int(overlap_ratio * 100)}% " f"with '{keep_index.name}'", recommendation=f"Consider dropping '{redundant_index.name}' as it's largely " f"covered by '{keep_index.name}'", sql_statements=[f"DROP INDEX {redundant_index.name};"] )) return issues def _calculate_overlap_ratio(self, index1: Index, index2: Index) -> float: """Calculate overlap ratio between two indexes.""" cols1 = set(index1.columns) cols2 = set(index2.columns) if not cols1 or not cols2: return 0.0 intersection = len(cols1.intersection(cols2)) union = len(cols1.union(cols2)) return intersection / union if union > 0 else 0.0 def _find_unused_indexes(self, table_name: str, indexes: List[Index]) -> List[RedundancyIssue]: """Find potentially unused indexes based on query patterns.""" issues = [] # Collect all columns used in query patterns for this table used_columns = set() table_patterns = [p for p in self.query_patterns if p.table == table_name] for pattern in table_patterns: # Add WHERE condition columns for condition in pattern.where_conditions: if condition.get('column'): used_columns.add(condition['column']) # Add JOIN columns for join in pattern.join_conditions: if join.get('local_column'): used_columns.add(join['local_column']) # Add ORDER BY columns for order in pattern.order_by: if order.get('column'): used_columns.add(order['column']) # Add GROUP BY columns used_columns.update(pattern.group_by) if not used_columns: return issues # Can't determine usage without query patterns for index in indexes: index_columns = set(index.columns) if not index_columns.intersection(used_columns): issues.append(RedundancyIssue( issue_type="UNUSED", affected_indexes=[index.name], table=table_name, description=f"Index '{index.name}' columns {index.columns} are not used in any query patterns", recommendation="Consider dropping this index if it's truly unused (verify with query logs)", sql_statements=[f"-- Review usage before dropping\n-- DROP INDEX {index.name};"] )) return issues def estimate_index_sizes(self) -> Dict[str, Dict[str, Any]]: """Estimate storage requirements for recommended indexes.""" size_estimates = {} # This is a simplified estimation - in practice, would need actual table statistics for table_name in self.tables: size_estimates[table_name] = { "estimated_table_rows": 10000, # Default estimate "existing_indexes_size_mb": len(self.existing_indexes.get(table_name, [])) * 5, # Rough estimate "index_overhead_per_column_mb": 2 # Rough estimate per column } return size_estimates def generate_analysis_report(self) -> Dict[str, Any]: """Generate comprehensive analysis report.""" recommendations = self.analyze_missing_indexes() redundancy_issues = self.analyze_redundant_indexes() size_estimates = self.estimate_index_sizes() # Calculate statistics total_existing_indexes = sum(len(indexes) for indexes in self.existing_indexes.values()) tables_analyzed = len(self.tables) query_patterns_analyzed = len(self.query_patterns) # Categorize recommendations by priority high_priority = [r for r in recommendations if r.priority <= 2] medium_priority = [r for r in recommendations if r.priority == 3] low_priority = [r for r in recommendations if r.priority >= 4] return { "analysis_summary": { "tables_analyzed": tables_analyzed, "query_patterns_analyzed": query_patterns_analyzed, "existing_indexes": total_existing_indexes, "total_recommendations": len(recommendations), "high_priority_recommendations": len(high_priority), "redundancy_issues_found": len(redundancy_issues) }, "index_recommendations": { "high_priority": [asdict(r) for r in high_priority], "medium_priority": [asdict(r) for r in medium_priority], "low_priority": [asdict(r) for r in low_priority] }, "redundancy_analysis": [asdict(issue) for issue in redundancy_issues], "size_estimates": size_estimates, "sql_statements": { "create_indexes": [rec.sql_statement for rec in recommendations], "drop_redundant": [ stmt for issue in redundancy_issues for stmt in issue.sql_statements ] }, "performance_impact": self._generate_performance_impact_analysis(recommendations) } def _generate_performance_impact_analysis(self, recommendations: List[IndexRecommendation]) -> Dict[str, Any]: """Generate performance impact analysis.""" impact_analysis = { "query_optimization": {}, "write_overhead": {}, "storage_impact": {} } # Analyze query optimization impact query_benefits = defaultdict(list) for rec in recommendations: for query_id in rec.query_patterns_helped: query_benefits[query_id].append(rec.estimated_benefit) impact_analysis["query_optimization"] = { "queries_improved": len(query_benefits), "high_impact_queries": len([q for q, benefits in query_benefits.items() if any("High" in benefit for benefit in benefits)]), "benefit_distribution": dict(Counter( rec.estimated_benefit for rec in recommendations )) } # Analyze write overhead impact_analysis["write_overhead"] = { "total_new_indexes": len(recommendations), "estimated_insert_overhead": f"{len(recommendations) * 5}%", # Rough estimate "tables_most_affected": list(Counter(rec.table for rec in recommendations).most_common(3)) } return impact_analysis def format_text_report(self, analysis: Dict[str, Any]) -> str: """Format analysis as human-readable text report.""" lines = [] lines.append("DATABASE INDEX OPTIMIZATION REPORT") lines.append("=" * 50) lines.append("") # Summary summary = analysis["analysis_summary"] lines.append("ANALYSIS SUMMARY") lines.append("-" * 16) lines.append(f"Tables Analyzed: {summary['tables_analyzed']}") lines.append(f"Query Patterns: {summary['query_patterns_analyzed']}") lines.append(f"Existing Indexes: {summary['existing_indexes']}") lines.append(f"New Recommendations: {summary['total_recommendations']}") lines.append(f"High Priority: {summary['high_priority_recommendations']}") lines.append(f"Redundancy Issues: {summary['redundancy_issues_found']}") lines.append("") # High Priority Recommendations high_priority = analysis["index_recommendations"]["high_priority"] if high_priority: lines.append(f"HIGH PRIORITY RECOMMENDATIONS ({len(high_priority)})") lines.append("-" * 35) for i, rec in enumerate(high_priority[:10], 1): # Show top 10 lines.append(f"{i}. {rec['table']}: {rec['reason']}") lines.append(f" Columns: {', '.join(rec['recommended_index']['columns'])}") lines.append(f" Benefit: {rec['estimated_benefit']}") lines.append(f" SQL: {rec['sql_statement']}") lines.append("") # Redundancy Issues redundancy = analysis["redundancy_analysis"] if redundancy: lines.append(f"REDUNDANCY ISSUES ({len(redundancy)})") lines.append("-" * 20) for issue in redundancy[:5]: # Show first 5 lines.append(f"• {issue['issue_type']}: {issue['description']}") lines.append(f" Recommendation: {issue['recommendation']}") if issue['sql_statements']: lines.append(f" SQL: {issue['sql_statements'][0]}") lines.append("") # Performance Impact perf_impact = analysis["performance_impact"] lines.append("PERFORMANCE IMPACT ANALYSIS") lines.append("-" * 30) query_opt = perf_impact["query_optimization"] lines.append(f"Queries to be optimized: {query_opt['queries_improved']}") lines.append(f"High impact optimizations: {query_opt['high_impact_queries']}") write_overhead = perf_impact["write_overhead"] lines.append(f"Estimated insert overhead: {write_overhead['estimated_insert_overhead']}") lines.append("") # SQL Statements Summary sql_statements = analysis["sql_statements"] create_statements = sql_statements["create_indexes"] if create_statements: lines.append("RECOMMENDED CREATE INDEX STATEMENTS") lines.append("-" * 36) for i, stmt in enumerate(create_statements[:10], 1): lines.append(f"{i}. {stmt}") if len(create_statements) > 10: lines.append(f"... and {len(create_statements) - 10} more") lines.append("") return "\n".join(lines) def main(): parser = argparse.ArgumentParser(description="Optimize database indexes based on schema and query patterns") parser.add_argument("--schema", "-s", required=True, help="Schema definition JSON file") parser.add_argument("--queries", "-q", required=True, help="Query patterns JSON file") parser.add_argument("--output", "-o", help="Output file (default: stdout)") parser.add_argument("--format", "-f", choices=["json", "text"], default="text", help="Output format") parser.add_argument("--analyze-existing", "-e", action="store_true", help="Include analysis of existing indexes") parser.add_argument("--min-priority", "-p", type=int, default=4, help="Minimum priority level to include (1=highest, 4=lowest)") args = parser.parse_args() try: # Load schema with open(args.schema, 'r') as f: schema_data = json.load(f) # Load queries with open(args.queries, 'r') as f: query_data = json.load(f) # Initialize optimizer optimizer = IndexOptimizer() optimizer.load_schema(schema_data) optimizer.load_query_patterns(query_data) # Generate analysis analysis = optimizer.generate_analysis_report() # Filter by priority if specified if args.min_priority < 4: for priority_level in ["high_priority", "medium_priority", "low_priority"]: analysis["index_recommendations"][priority_level] = [ rec for rec in analysis["index_recommendations"][priority_level] if rec["priority"] <= args.min_priority ] # Format output if args.format == "json": output = json.dumps(analysis, indent=2) else: output = optimizer.format_text_report(analysis) # Write output if args.output: with open(args.output, 'w') as f: f.write(output) else: print(output) return 0 except Exception as e: print(f"Error: {e}", file=sys.stderr) return 1 if __name__ == "__main__": sys.exit(main())