add brain

2026-03-12 15:17:52 +07:00
parent fd9f558fa1
commit e7821a7a9d
355 changed files with 93784 additions and 24 deletions
--- a/.brain/.agent/skills/engineering-advanced-skills/tech-debt-tracker/scripts/debt_scanner.py
+++ b/.brain/.agent/skills/engineering-advanced-skills/tech-debt-tracker/scripts/debt_scanner.py
@@ -0,0 +1,684 @@
+#!/usr/bin/env python3
+"""
+Tech Debt Scanner
+
+Scans a codebase directory for tech debt signals using AST parsing (Python) and 
+regex patterns (any language). Detects various forms of technical debt and generates
+both JSON inventory and human-readable reports.
+
+Usage:
+    python debt_scanner.py /path/to/codebase
+    python debt_scanner.py /path/to/codebase --config config.json
+    python debt_scanner.py /path/to/codebase --output report.json --format both
+"""
+
+import ast
+import json
+import argparse
+import os
+import re
+import sys
+from collections import defaultdict, Counter
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Any, Optional, Set, Tuple
+
+
+class DebtScanner:
+    """Main scanner class for detecting technical debt in codebases."""
+    
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        self.config = self._load_default_config()
+        if config:
+            self.config.update(config)
+        
+        self.debt_items = []
+        self.stats = defaultdict(int)
+        self.file_stats = {}
+        
+        # Compile regex patterns for performance
+        self._compile_patterns()
+    
+    def _load_default_config(self) -> Dict[str, Any]:
+        """Load default configuration for debt detection."""
+        return {
+            "max_function_length": 50,
+            "max_complexity": 10,
+            "max_nesting_depth": 4,
+            "max_file_size_lines": 500,
+            "min_duplicate_lines": 3,
+            "ignore_patterns": [
+                "*.pyc", "__pycache__", ".git", ".svn", "node_modules",
+                "build", "dist", "*.min.js", "*.map"
+            ],
+            "file_extensions": {
+                "python": [".py"],
+                "javascript": [".js", ".jsx", ".ts", ".tsx"],
+                "java": [".java"],
+                "csharp": [".cs"],
+                "cpp": [".cpp", ".cc", ".cxx", ".c", ".h", ".hpp"],
+                "ruby": [".rb"],
+                "php": [".php"],
+                "go": [".go"],
+                "rust": [".rs"],
+                "kotlin": [".kt"]
+            },
+            "comment_patterns": {
+                "todo": r"(?i)(TODO|FIXME|HACK|XXX|BUG)[\s:]*(.+)",
+                "commented_code": r"^\s*#.*[=(){}\[\];].*",
+                "magic_numbers": r"\b\d{2,}\b",
+                "long_strings": r'["\'](.{100,})["\']'
+            },
+            "severity_weights": {
+                "critical": 10,
+                "high": 7,
+                "medium": 5,
+                "low": 2,
+                "info": 1
+            }
+        }
+    
+    def _compile_patterns(self):
+        """Compile regex patterns for better performance."""
+        self.comment_regexes = {}
+        for name, pattern in self.config["comment_patterns"].items():
+            self.comment_regexes[name] = re.compile(pattern)
+        
+        # Common code smells patterns
+        self.smell_patterns = {
+            "empty_catch": re.compile(r"except[^:]*:\s*pass\s*$", re.MULTILINE),
+            "print_debug": re.compile(r"print\s*\([^)]*debug[^)]*\)", re.IGNORECASE),
+            "hardcoded_paths": re.compile(r'["\'][/\\][^"\']*[/\\][^"\']*["\']'),
+            "sql_injection_risk": re.compile(r'["\'].*%s.*["\'].*execute', re.IGNORECASE),
+        }
+    
+    def scan_directory(self, directory: str) -> Dict[str, Any]:
+        """
+        Scan a directory for tech debt.
+        
+        Args:
+            directory: Path to the directory to scan
+            
+        Returns:
+            Dictionary containing debt inventory and statistics
+        """
+        directory_path = Path(directory)
+        if not directory_path.exists():
+            raise ValueError(f"Directory does not exist: {directory}")
+        
+        print(f"Scanning directory: {directory}")
+        print("=" * 50)
+        
+        # Reset state
+        self.debt_items = []
+        self.stats = defaultdict(int)
+        self.file_stats = {}
+        
+        # Walk through directory
+        for root, dirs, files in os.walk(directory):
+            # Filter out ignored directories
+            dirs[:] = [d for d in dirs if not self._should_ignore(d)]
+            
+            for file in files:
+                if self._should_ignore(file):
+                    continue
+                
+                file_path = os.path.join(root, file)
+                relative_path = os.path.relpath(file_path, directory)
+                
+                try:
+                    self._scan_file(file_path, relative_path)
+                except Exception as e:
+                    print(f"Error scanning {relative_path}: {e}")
+                    self.stats["scan_errors"] += 1
+        
+        # Post-process results
+        self._detect_duplicates(directory)
+        self._calculate_priorities()
+        
+        return self._generate_report(directory)
+    
+    def _should_ignore(self, name: str) -> bool:
+        """Check if file/directory should be ignored."""
+        for pattern in self.config["ignore_patterns"]:
+            if "*" in pattern:
+                if re.match(pattern.replace("*", ".*"), name):
+                    return True
+            elif pattern in name:
+                return True
+        return False
+    
+    def _scan_file(self, file_path: str, relative_path: str):
+        """Scan a single file for tech debt."""
+        try:
+            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                content = f.read()
+                lines = content.splitlines()
+        except Exception as e:
+            print(f"Cannot read {relative_path}: {e}")
+            return
+        
+        file_ext = Path(file_path).suffix.lower()
+        file_info = {
+            "path": relative_path,
+            "lines": len(lines),
+            "size_kb": os.path.getsize(file_path) / 1024,
+            "language": self._detect_language(file_ext),
+            "debt_count": 0
+        }
+        
+        self.stats["files_scanned"] += 1
+        self.stats["total_lines"] += len(lines)
+        
+        # File size debt
+        if len(lines) > self.config["max_file_size_lines"]:
+            self._add_debt_item(
+                "large_file",
+                f"File is too large: {len(lines)} lines",
+                relative_path,
+                "medium",
+                {"lines": len(lines), "recommended_max": self.config["max_file_size_lines"]}
+            )
+            file_info["debt_count"] += 1
+        
+        # Language-specific analysis
+        if file_info["language"] == "python" and file_ext == ".py":
+            self._scan_python_file(relative_path, content, lines)
+        else:
+            self._scan_generic_file(relative_path, content, lines, file_info["language"])
+        
+        # Common patterns for all languages
+        self._scan_common_patterns(relative_path, content, lines)
+        
+        self.file_stats[relative_path] = file_info
+    
+    def _detect_language(self, file_ext: str) -> str:
+        """Detect programming language from file extension."""
+        for lang, extensions in self.config["file_extensions"].items():
+            if file_ext in extensions:
+                return lang
+        return "unknown"
+    
+    def _scan_python_file(self, file_path: str, content: str, lines: List[str]):
+        """Scan Python files using AST parsing."""
+        try:
+            tree = ast.parse(content)
+            analyzer = PythonASTAnalyzer(self.config)
+            debt_items = analyzer.analyze(tree, file_path, lines)
+            self.debt_items.extend(debt_items)
+            self.stats["python_files"] += 1
+        except SyntaxError as e:
+            self._add_debt_item(
+                "syntax_error",
+                f"Python syntax error: {e}",
+                file_path,
+                "high",
+                {"line": e.lineno, "error": str(e)}
+            )
+    
+    def _scan_generic_file(self, file_path: str, content: str, lines: List[str], language: str):
+        """Scan non-Python files using pattern matching."""
+        # Detect long lines
+        for i, line in enumerate(lines):
+            if len(line) > 120:
+                self._add_debt_item(
+                    "long_line",
+                    f"Line too long: {len(line)} characters",
+                    file_path,
+                    "low",
+                    {"line_number": i + 1, "length": len(line)}
+                )
+        
+        # Detect deep nesting (approximate)
+        for i, line in enumerate(lines):
+            indent_level = len(line) - len(line.lstrip())
+            if language in ["python"]:
+                indent_level = indent_level // 4  # Python uses 4-space indents
+            elif language in ["javascript", "java", "csharp", "cpp"]:
+                # Count braces for brace-based languages
+                brace_level = content[:content.find('\n'.join(lines[:i+1]))].count('{') - content[:content.find('\n'.join(lines[:i+1]))].count('}')
+                if brace_level > self.config["max_nesting_depth"]:
+                    self._add_debt_item(
+                        "deep_nesting",
+                        f"Deep nesting detected: {brace_level} levels",
+                        file_path,
+                        "medium",
+                        {"line_number": i + 1, "nesting_level": brace_level}
+                    )
+    
+    def _scan_common_patterns(self, file_path: str, content: str, lines: List[str]):
+        """Scan for common patterns across all file types."""
+        # TODO/FIXME comments
+        for i, line in enumerate(lines):
+            for pattern_name, regex in self.comment_regexes.items():
+                match = regex.search(line)
+                if match:
+                    if pattern_name == "todo":
+                        self._add_debt_item(
+                            "todo_comment",
+                            f"TODO/FIXME comment: {match.group(0)}",
+                            file_path,
+                            "low",
+                            {"line_number": i + 1, "comment": match.group(0).strip()}
+                        )
+        
+        # Code smells
+        for smell_name, pattern in self.smell_patterns.items():
+            matches = pattern.finditer(content)
+            for match in matches:
+                line_num = content[:match.start()].count('\n') + 1
+                self._add_debt_item(
+                    smell_name,
+                    f"Code smell detected: {smell_name}",
+                    file_path,
+                    "medium",
+                    {"line_number": line_num, "pattern": match.group(0)[:100]}
+                )
+    
+    def _detect_duplicates(self, directory: str):
+        """Detect duplicate code blocks across files."""
+        # Simple duplicate detection based on exact line matches
+        line_hashes = defaultdict(list)
+        
+        for file_path, file_info in self.file_stats.items():
+            try:
+                full_path = os.path.join(directory, file_path)
+                with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
+                    lines = f.readlines()
+                
+                for i in range(len(lines) - self.config["min_duplicate_lines"] + 1):
+                    block = ''.join(lines[i:i + self.config["min_duplicate_lines"]])
+                    block_hash = hash(block.strip())
+                    if len(block.strip()) > 50:  # Only consider substantial blocks
+                        line_hashes[block_hash].append((file_path, i + 1, block))
+            except Exception:
+                continue
+        
+        # Report duplicates
+        for block_hash, occurrences in line_hashes.items():
+            if len(occurrences) > 1:
+                for file_path, line_num, block in occurrences:
+                    self._add_debt_item(
+                        "duplicate_code",
+                        f"Duplicate code block found in {len(occurrences)} files",
+                        file_path,
+                        "medium",
+                        {
+                            "line_number": line_num,
+                            "duplicate_count": len(occurrences),
+                            "other_files": [f[0] for f in occurrences if f[0] != file_path]
+                        }
+                    )
+    
+    def _calculate_priorities(self):
+        """Calculate priority scores for debt items."""
+        severity_weights = self.config["severity_weights"]
+        
+        for item in self.debt_items:
+            base_score = severity_weights.get(item["severity"], 1)
+            
+            # Adjust based on debt type
+            type_multipliers = {
+                "syntax_error": 2.0,
+                "security_risk": 1.8,
+                "large_function": 1.5,
+                "high_complexity": 1.4,
+                "duplicate_code": 1.3,
+                "todo_comment": 0.5
+            }
+            
+            multiplier = type_multipliers.get(item["type"], 1.0)
+            item["priority_score"] = int(base_score * multiplier)
+            
+            # Set priority category
+            if item["priority_score"] >= 15:
+                item["priority"] = "critical"
+            elif item["priority_score"] >= 10:
+                item["priority"] = "high"
+            elif item["priority_score"] >= 5:
+                item["priority"] = "medium"
+            else:
+                item["priority"] = "low"
+    
+    def _add_debt_item(self, debt_type: str, description: str, file_path: str, 
+                      severity: str, metadata: Dict[str, Any]):
+        """Add a debt item to the inventory."""
+        item = {
+            "id": f"DEBT-{len(self.debt_items) + 1:04d}",
+            "type": debt_type,
+            "description": description,
+            "file_path": file_path,
+            "severity": severity,
+            "metadata": metadata,
+            "detected_date": datetime.now().isoformat(),
+            "status": "identified"
+        }
+        
+        self.debt_items.append(item)
+        self.stats[f"debt_{debt_type}"] += 1
+        self.stats["total_debt_items"] += 1
+        
+        if file_path in self.file_stats:
+            self.file_stats[file_path]["debt_count"] += 1
+    
+    def _generate_report(self, directory: str) -> Dict[str, Any]:
+        """Generate the final debt report."""
+        # Sort debt items by priority score
+        self.debt_items.sort(key=lambda x: x.get("priority_score", 0), reverse=True)
+        
+        # Calculate summary statistics
+        priority_counts = Counter(item["priority"] for item in self.debt_items)
+        type_counts = Counter(item["type"] for item in self.debt_items)
+        
+        # Calculate health score (0-100, higher is better)
+        total_files = self.stats.get("files_scanned", 1)
+        debt_density = len(self.debt_items) / total_files
+        health_score = max(0, 100 - (debt_density * 10))
+        
+        report = {
+            "scan_metadata": {
+                "directory": directory,
+                "scan_date": datetime.now().isoformat(),
+                "scanner_version": "1.0.0",
+                "config": self.config
+            },
+            "summary": {
+                "total_files_scanned": self.stats.get("files_scanned", 0),
+                "total_lines_scanned": self.stats.get("total_lines", 0),
+                "total_debt_items": len(self.debt_items),
+                "health_score": round(health_score, 1),
+                "debt_density": round(debt_density, 2),
+                "priority_breakdown": dict(priority_counts),
+                "type_breakdown": dict(type_counts)
+            },
+            "debt_items": self.debt_items,
+            "file_statistics": self.file_stats,
+            "recommendations": self._generate_recommendations()
+        }
+        
+        return report
+    
+    def _generate_recommendations(self) -> List[str]:
+        """Generate actionable recommendations based on findings."""
+        recommendations = []
+        
+        # Priority-based recommendations
+        high_priority_count = len([item for item in self.debt_items 
+                                  if item.get("priority") in ["critical", "high"]])
+        
+        if high_priority_count > 10:
+            recommendations.append(
+                f"Address {high_priority_count} high-priority debt items immediately - "
+                "they pose significant risk to code quality and maintainability."
+            )
+        
+        # Type-specific recommendations
+        type_counts = Counter(item["type"] for item in self.debt_items)
+        
+        if type_counts.get("large_function", 0) > 5:
+            recommendations.append(
+                "Consider refactoring large functions into smaller, more focused units. "
+                "This will improve readability and testability."
+            )
+        
+        if type_counts.get("duplicate_code", 0) > 3:
+            recommendations.append(
+                "Extract duplicate code into reusable functions or modules. "
+                "This reduces maintenance burden and potential for inconsistent changes."
+            )
+        
+        if type_counts.get("todo_comment", 0) > 20:
+            recommendations.append(
+                "Review and address TODO/FIXME comments. Consider creating proper "
+                "tickets for substantial work items."
+            )
+        
+        # General recommendations
+        total_files = self.stats.get("files_scanned", 1)
+        if len(self.debt_items) / total_files > 2:
+            recommendations.append(
+                "High debt density detected. Consider establishing coding standards "
+                "and regular code review processes to prevent debt accumulation."
+            )
+        
+        if not recommendations:
+            recommendations.append("Code quality looks good! Continue current practices.")
+        
+        return recommendations
+
+
+class PythonASTAnalyzer(ast.NodeVisitor):
+    """AST analyzer for Python-specific debt detection."""
+    
+    def __init__(self, config: Dict[str, Any]):
+        self.config = config
+        self.debt_items = []
+        self.current_file = ""
+        self.lines = []
+        self.function_stack = []
+    
+    def analyze(self, tree: ast.AST, file_path: str, lines: List[str]) -> List[Dict[str, Any]]:
+        """Analyze Python AST for tech debt."""
+        self.debt_items = []
+        self.current_file = file_path
+        self.lines = lines
+        self.function_stack = []
+        
+        self.visit(tree)
+        return self.debt_items
+    
+    def visit_FunctionDef(self, node: ast.FunctionDef):
+        """Analyze function definitions."""
+        self.function_stack.append(node.name)
+        
+        # Calculate function length
+        func_length = node.end_lineno - node.lineno + 1
+        if func_length > self.config["max_function_length"]:
+            self._add_debt(
+                "large_function",
+                f"Function '{node.name}' is too long: {func_length} lines",
+                node.lineno,
+                "medium",
+                {"function_name": node.name, "length": func_length}
+            )
+        
+        # Check for missing docstring
+        if not ast.get_docstring(node):
+            self._add_debt(
+                "missing_docstring",
+                f"Function '{node.name}' missing docstring",
+                node.lineno,
+                "low",
+                {"function_name": node.name}
+            )
+        
+        # Calculate cyclomatic complexity
+        complexity = self._calculate_complexity(node)
+        if complexity > self.config["max_complexity"]:
+            self._add_debt(
+                "high_complexity",
+                f"Function '{node.name}' has high complexity: {complexity}",
+                node.lineno,
+                "high",
+                {"function_name": node.name, "complexity": complexity}
+            )
+        
+        # Check parameter count
+        param_count = len(node.args.args)
+        if param_count > 5:
+            self._add_debt(
+                "too_many_parameters",
+                f"Function '{node.name}' has too many parameters: {param_count}",
+                node.lineno,
+                "medium",
+                {"function_name": node.name, "parameter_count": param_count}
+            )
+        
+        self.generic_visit(node)
+        self.function_stack.pop()
+    
+    def visit_ClassDef(self, node: ast.ClassDef):
+        """Analyze class definitions."""
+        # Check for missing docstring
+        if not ast.get_docstring(node):
+            self._add_debt(
+                "missing_docstring",
+                f"Class '{node.name}' missing docstring",
+                node.lineno,
+                "low",
+                {"class_name": node.name}
+            )
+        
+        # Check for too many methods
+        methods = [n for n in node.body if isinstance(n, ast.FunctionDef)]
+        if len(methods) > 20:
+            self._add_debt(
+                "large_class",
+                f"Class '{node.name}' has too many methods: {len(methods)}",
+                node.lineno,
+                "medium",
+                {"class_name": node.name, "method_count": len(methods)}
+            )
+        
+        self.generic_visit(node)
+    
+    def _calculate_complexity(self, node: ast.FunctionDef) -> int:
+        """Calculate cyclomatic complexity of a function."""
+        complexity = 1  # Base complexity
+        
+        for child in ast.walk(node):
+            if isinstance(child, (ast.If, ast.While, ast.For, ast.AsyncFor)):
+                complexity += 1
+            elif isinstance(child, ast.ExceptHandler):
+                complexity += 1
+            elif isinstance(child, ast.BoolOp):
+                complexity += len(child.values) - 1
+        
+        return complexity
+    
+    def _add_debt(self, debt_type: str, description: str, line_number: int,
+                 severity: str, metadata: Dict[str, Any]):
+        """Add a debt item to the collection."""
+        item = {
+            "id": f"DEBT-{len(self.debt_items) + 1:04d}",
+            "type": debt_type,
+            "description": description,
+            "file_path": self.current_file,
+            "line_number": line_number,
+            "severity": severity,
+            "metadata": metadata,
+            "detected_date": datetime.now().isoformat(),
+            "status": "identified"
+        }
+        self.debt_items.append(item)
+
+
+def format_human_readable_report(report: Dict[str, Any]) -> str:
+    """Format the report in human-readable format."""
+    output = []
+    
+    # Header
+    output.append("=" * 60)
+    output.append("TECHNICAL DEBT SCAN REPORT")
+    output.append("=" * 60)
+    output.append(f"Directory: {report['scan_metadata']['directory']}")
+    output.append(f"Scan Date: {report['scan_metadata']['scan_date']}")
+    output.append(f"Scanner Version: {report['scan_metadata']['scanner_version']}")
+    output.append("")
+    
+    # Summary
+    summary = report["summary"]
+    output.append("SUMMARY")
+    output.append("-" * 30)
+    output.append(f"Files Scanned: {summary['total_files_scanned']}")
+    output.append(f"Lines Scanned: {summary['total_lines_scanned']:,}")
+    output.append(f"Total Debt Items: {summary['total_debt_items']}")
+    output.append(f"Health Score: {summary['health_score']}/100")
+    output.append(f"Debt Density: {summary['debt_density']} items/file")
+    output.append("")
+    
+    # Priority breakdown
+    output.append("PRIORITY BREAKDOWN")
+    output.append("-" * 30)
+    for priority, count in summary["priority_breakdown"].items():
+        output.append(f"{priority.capitalize()}: {count}")
+    output.append("")
+    
+    # Top debt items
+    output.append("TOP DEBT ITEMS")
+    output.append("-" * 30)
+    top_items = report["debt_items"][:10]
+    for i, item in enumerate(top_items, 1):
+        output.append(f"{i}. [{item['priority'].upper()}] {item['description']}")
+        output.append(f"   File: {item['file_path']}")
+        if 'line_number' in item:
+            output.append(f"   Line: {item['line_number']}")
+        output.append("")
+    
+    # Recommendations
+    output.append("RECOMMENDATIONS")
+    output.append("-" * 30)
+    for i, rec in enumerate(report["recommendations"], 1):
+        output.append(f"{i}. {rec}")
+        output.append("")
+    
+    return "\n".join(output)
+
+
+def main():
+    """Main entry point for the debt scanner."""
+    parser = argparse.ArgumentParser(description="Scan codebase for technical debt")
+    parser.add_argument("directory", help="Directory to scan")
+    parser.add_argument("--config", help="Configuration file (JSON)")
+    parser.add_argument("--output", help="Output file path")
+    parser.add_argument("--format", choices=["json", "text", "both"], 
+                       default="both", help="Output format")
+    
+    args = parser.parse_args()
+    
+    # Load configuration
+    config = None
+    if args.config:
+        try:
+            with open(args.config, 'r') as f:
+                config = json.load(f)
+        except Exception as e:
+            print(f"Error loading config: {e}")
+            sys.exit(1)
+    
+    # Run scan
+    scanner = DebtScanner(config)
+    try:
+        report = scanner.scan_directory(args.directory)
+    except Exception as e:
+        print(f"Scan failed: {e}")
+        sys.exit(1)
+    
+    # Output results
+    if args.format in ["json", "both"]:
+        json_output = json.dumps(report, indent=2, default=str)
+        if args.output:
+            output_path = args.output if args.output.endswith('.json') else f"{args.output}.json"
+            with open(output_path, 'w') as f:
+                f.write(json_output)
+            print(f"JSON report written to: {output_path}")
+        else:
+            print("\nJSON REPORT:")
+            print("=" * 50)
+            print(json_output)
+    
+    if args.format in ["text", "both"]:
+        text_output = format_human_readable_report(report)
+        if args.output:
+            output_path = args.output if args.output.endswith('.txt') else f"{args.output}.txt"
+            with open(output_path, 'w') as f:
+                f.write(text_output)
+            print(f"Text report written to: {output_path}")
+        else:
+            print("\nTEXT REPORT:")
+            print("=" * 50)
+            print(text_output)
+
+
+if __name__ == "__main__":
+    main()