Files
CleanArchitecture-template/.brain/.agent/skills/engineering-advanced-skills/dependency-auditor/scripts/dep_scanner.py
2026-03-12 15:17:52 +07:00

794 lines
30 KiB
Python

#!/usr/bin/env python3
"""
Dependency Scanner - Multi-language dependency vulnerability and analysis tool.
This script parses dependency files from various package managers, extracts direct
and transitive dependencies, checks against built-in vulnerability databases,
and provides comprehensive security analysis with actionable recommendations.
Author: Claude Skills Engineering Team
License: MIT
"""
import json
import os
import re
import sys
import argparse
from typing import Dict, List, Set, Any, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass, asdict
from datetime import datetime
import hashlib
import subprocess
@dataclass
class Vulnerability:
"""Represents a security vulnerability."""
id: str
summary: str
severity: str
cvss_score: float
affected_versions: str
fixed_version: Optional[str]
published_date: str
references: List[str]
@dataclass
class Dependency:
"""Represents a project dependency."""
name: str
version: str
ecosystem: str
direct: bool
license: Optional[str] = None
description: Optional[str] = None
homepage: Optional[str] = None
vulnerabilities: List[Vulnerability] = None
def __post_init__(self):
if self.vulnerabilities is None:
self.vulnerabilities = []
class DependencyScanner:
"""Main dependency scanner class."""
def __init__(self):
self.known_vulnerabilities = self._load_vulnerability_database()
self.supported_files = {
'package.json': self._parse_package_json,
'package-lock.json': self._parse_package_lock,
'yarn.lock': self._parse_yarn_lock,
'requirements.txt': self._parse_requirements_txt,
'pyproject.toml': self._parse_pyproject_toml,
'Pipfile.lock': self._parse_pipfile_lock,
'poetry.lock': self._parse_poetry_lock,
'go.mod': self._parse_go_mod,
'go.sum': self._parse_go_sum,
'Cargo.toml': self._parse_cargo_toml,
'Cargo.lock': self._parse_cargo_lock,
'Gemfile': self._parse_gemfile,
'Gemfile.lock': self._parse_gemfile_lock,
}
def _load_vulnerability_database(self) -> Dict[str, List[Vulnerability]]:
"""Load built-in vulnerability database with common CVE patterns."""
return {
# JavaScript/Node.js vulnerabilities
'lodash': [
Vulnerability(
id='CVE-2021-23337',
summary='Prototype pollution in lodash',
severity='HIGH',
cvss_score=7.2,
affected_versions='<4.17.21',
fixed_version='4.17.21',
published_date='2021-02-15',
references=['https://nvd.nist.gov/vuln/detail/CVE-2021-23337']
)
],
'axios': [
Vulnerability(
id='CVE-2023-45857',
summary='Cross-site request forgery in axios',
severity='MEDIUM',
cvss_score=6.1,
affected_versions='>=1.0.0 <1.6.0',
fixed_version='1.6.0',
published_date='2023-10-11',
references=['https://nvd.nist.gov/vuln/detail/CVE-2023-45857']
)
],
'express': [
Vulnerability(
id='CVE-2022-24999',
summary='Open redirect in express',
severity='MEDIUM',
cvss_score=6.1,
affected_versions='<4.18.2',
fixed_version='4.18.2',
published_date='2022-11-26',
references=['https://nvd.nist.gov/vuln/detail/CVE-2022-24999']
)
],
# Python vulnerabilities
'django': [
Vulnerability(
id='CVE-2024-27351',
summary='SQL injection in Django',
severity='HIGH',
cvss_score=9.8,
affected_versions='>=3.2 <4.2.11',
fixed_version='4.2.11',
published_date='2024-02-06',
references=['https://nvd.nist.gov/vuln/detail/CVE-2024-27351']
)
],
'requests': [
Vulnerability(
id='CVE-2023-32681',
summary='Proxy-authorization header leak in requests',
severity='MEDIUM',
cvss_score=6.1,
affected_versions='>=2.3.0 <2.31.0',
fixed_version='2.31.0',
published_date='2023-05-26',
references=['https://nvd.nist.gov/vuln/detail/CVE-2023-32681']
)
],
'pillow': [
Vulnerability(
id='CVE-2023-50447',
summary='Arbitrary code execution in Pillow',
severity='HIGH',
cvss_score=8.8,
affected_versions='<10.2.0',
fixed_version='10.2.0',
published_date='2024-01-02',
references=['https://nvd.nist.gov/vuln/detail/CVE-2023-50447']
)
],
# Go vulnerabilities
'github.com/gin-gonic/gin': [
Vulnerability(
id='CVE-2023-26125',
summary='Path traversal in gin',
severity='HIGH',
cvss_score=7.5,
affected_versions='<1.9.1',
fixed_version='1.9.1',
published_date='2023-02-28',
references=['https://nvd.nist.gov/vuln/detail/CVE-2023-26125']
)
],
# Rust vulnerabilities
'serde': [
Vulnerability(
id='RUSTSEC-2022-0061',
summary='Deserialization vulnerability in serde',
severity='HIGH',
cvss_score=8.2,
affected_versions='<1.0.152',
fixed_version='1.0.152',
published_date='2022-12-07',
references=['https://rustsec.org/advisories/RUSTSEC-2022-0061']
)
],
# Ruby vulnerabilities
'rails': [
Vulnerability(
id='CVE-2023-28362',
summary='ReDoS vulnerability in Rails',
severity='HIGH',
cvss_score=7.5,
affected_versions='>=7.0.0 <7.0.4.3',
fixed_version='7.0.4.3',
published_date='2023-03-13',
references=['https://nvd.nist.gov/vuln/detail/CVE-2023-28362']
)
]
}
def scan_project(self, project_path: str) -> Dict[str, Any]:
"""Scan a project directory for dependencies and vulnerabilities."""
project_path = Path(project_path)
if not project_path.exists():
raise FileNotFoundError(f"Project path does not exist: {project_path}")
scan_results = {
'timestamp': datetime.now().isoformat(),
'project_path': str(project_path),
'dependencies': [],
'vulnerabilities_found': 0,
'high_severity_count': 0,
'medium_severity_count': 0,
'low_severity_count': 0,
'ecosystems': set(),
'scan_summary': {},
'recommendations': []
}
# Find and parse dependency files
for file_pattern, parser in self.supported_files.items():
matching_files = list(project_path.rglob(file_pattern))
for dep_file in matching_files:
try:
dependencies = parser(dep_file)
scan_results['dependencies'].extend(dependencies)
for dep in dependencies:
scan_results['ecosystems'].add(dep.ecosystem)
# Check for vulnerabilities
vulnerabilities = self._check_vulnerabilities(dep)
dep.vulnerabilities = vulnerabilities
scan_results['vulnerabilities_found'] += len(vulnerabilities)
for vuln in vulnerabilities:
if vuln.severity == 'HIGH':
scan_results['high_severity_count'] += 1
elif vuln.severity == 'MEDIUM':
scan_results['medium_severity_count'] += 1
else:
scan_results['low_severity_count'] += 1
except Exception as e:
print(f"Error parsing {dep_file}: {e}")
continue
scan_results['ecosystems'] = list(scan_results['ecosystems'])
scan_results['scan_summary'] = self._generate_scan_summary(scan_results)
scan_results['recommendations'] = self._generate_recommendations(scan_results)
return scan_results
def _check_vulnerabilities(self, dependency: Dependency) -> List[Vulnerability]:
"""Check if a dependency has known vulnerabilities."""
vulnerabilities = []
# Check package name (exact match and common variations)
package_names = [dependency.name, dependency.name.lower()]
for pkg_name in package_names:
if pkg_name in self.known_vulnerabilities:
for vuln in self.known_vulnerabilities[pkg_name]:
if self._version_matches_vulnerability(dependency.version, vuln.affected_versions):
vulnerabilities.append(vuln)
return vulnerabilities
def _version_matches_vulnerability(self, version: str, affected_pattern: str) -> bool:
"""Check if a version matches a vulnerability pattern."""
# Simple version matching - in production, use proper semver library
try:
# Handle common patterns like "<4.17.21", ">=1.0.0 <1.6.0"
if '<' in affected_pattern and '>' not in affected_pattern:
# Pattern like "<4.17.21"
max_version = affected_pattern.replace('<', '').strip()
return self._compare_versions(version, max_version) < 0
elif '>=' in affected_pattern and '<' in affected_pattern:
# Pattern like ">=1.0.0 <1.6.0"
parts = affected_pattern.split('<')
min_part = parts[0].replace('>=', '').strip()
max_part = parts[1].strip()
return (self._compare_versions(version, min_part) >= 0 and
self._compare_versions(version, max_part) < 0)
except:
pass
return False
def _compare_versions(self, v1: str, v2: str) -> int:
"""Simple version comparison. Returns -1, 0, or 1."""
try:
def normalize(v):
return [int(x) for x in re.sub(r'(\.0+)*$','', v).split('.')]
v1_parts = normalize(v1)
v2_parts = normalize(v2)
if v1_parts < v2_parts:
return -1
elif v1_parts > v2_parts:
return 1
else:
return 0
except:
return 0
# Package file parsers
def _parse_package_json(self, file_path: Path) -> List[Dependency]:
"""Parse package.json for Node.js dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
data = json.load(f)
# Parse dependencies
for dep_type in ['dependencies', 'devDependencies']:
if dep_type in data:
for name, version in data[dep_type].items():
dep = Dependency(
name=name,
version=version.replace('^', '').replace('~', '').replace('>=', '').replace('<=', ''),
ecosystem='npm',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing package.json: {e}")
return dependencies
def _parse_package_lock(self, file_path: Path) -> List[Dependency]:
"""Parse package-lock.json for Node.js transitive dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
data = json.load(f)
if 'packages' in data:
for path, pkg_info in data['packages'].items():
if path == '': # Skip root package
continue
name = path.split('/')[-1] if '/' in path else path
version = pkg_info.get('version', '')
dep = Dependency(
name=name,
version=version,
ecosystem='npm',
direct=False,
description=pkg_info.get('description', '')
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing package-lock.json: {e}")
return dependencies
def _parse_yarn_lock(self, file_path: Path) -> List[Dependency]:
"""Parse yarn.lock for Node.js dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Simple yarn.lock parsing
packages = re.findall(r'^([^#\s][^:]+):\s*\n(?:\s+.*\n)*?\s+version\s+"([^"]+)"', content, re.MULTILINE)
for package_spec, version in packages:
name = package_spec.split('@')[0] if '@' in package_spec else package_spec
name = name.strip('"')
dep = Dependency(
name=name,
version=version,
ecosystem='npm',
direct=False
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing yarn.lock: {e}")
return dependencies
def _parse_requirements_txt(self, file_path: Path) -> List[Dependency]:
"""Parse requirements.txt for Python dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if line and not line.startswith('#') and not line.startswith('-'):
# Parse package==version or package>=version patterns
match = re.match(r'^([a-zA-Z0-9_-]+)([><=!]+)(.+)$', line)
if match:
name, operator, version = match.groups()
dep = Dependency(
name=name,
version=version,
ecosystem='pypi',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing requirements.txt: {e}")
return dependencies
def _parse_pyproject_toml(self, file_path: Path) -> List[Dependency]:
"""Parse pyproject.toml for Python dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Simple TOML parsing for dependencies
dep_section = re.search(r'\[tool\.poetry\.dependencies\](.*?)(?=\[|\Z)', content, re.DOTALL)
if dep_section:
for line in dep_section.group(1).split('\n'):
match = re.match(r'^([a-zA-Z0-9_-]+)\s*=\s*["\']([^"\']+)["\']', line.strip())
if match:
name, version = match.groups()
if name != 'python':
dep = Dependency(
name=name,
version=version.replace('^', '').replace('~', ''),
ecosystem='pypi',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing pyproject.toml: {e}")
return dependencies
def _parse_pipfile_lock(self, file_path: Path) -> List[Dependency]:
"""Parse Pipfile.lock for Python dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
data = json.load(f)
for section in ['default', 'develop']:
if section in data:
for name, info in data[section].items():
version = info.get('version', '').replace('==', '')
dep = Dependency(
name=name,
version=version,
ecosystem='pypi',
direct=(section == 'default')
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing Pipfile.lock: {e}")
return dependencies
def _parse_poetry_lock(self, file_path: Path) -> List[Dependency]:
"""Parse poetry.lock for Python dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Extract package entries from TOML
packages = re.findall(r'\[\[package\]\]\nname\s*=\s*"([^"]+)"\nversion\s*=\s*"([^"]+)"', content)
for name, version in packages:
dep = Dependency(
name=name,
version=version,
ecosystem='pypi',
direct=False
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing poetry.lock: {e}")
return dependencies
def _parse_go_mod(self, file_path: Path) -> List[Dependency]:
"""Parse go.mod for Go dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Parse require block
require_match = re.search(r'require\s*\((.*?)\)', content, re.DOTALL)
if require_match:
requires = require_match.group(1)
for line in requires.split('\n'):
match = re.match(r'\s*([^\s]+)\s+v?([^\s]+)', line.strip())
if match:
name, version = match.groups()
dep = Dependency(
name=name,
version=version,
ecosystem='go',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing go.mod: {e}")
return dependencies
def _parse_go_sum(self, file_path: Path) -> List[Dependency]:
"""Parse go.sum for Go dependency checksums."""
return [] # go.sum mainly contains checksums, dependencies are in go.mod
def _parse_cargo_toml(self, file_path: Path) -> List[Dependency]:
"""Parse Cargo.toml for Rust dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Parse [dependencies] section
dep_section = re.search(r'\[dependencies\](.*?)(?=\[|\Z)', content, re.DOTALL)
if dep_section:
for line in dep_section.group(1).split('\n'):
match = re.match(r'^([a-zA-Z0-9_-]+)\s*=\s*["\']([^"\']+)["\']', line.strip())
if match:
name, version = match.groups()
dep = Dependency(
name=name,
version=version,
ecosystem='cargo',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing Cargo.toml: {e}")
return dependencies
def _parse_cargo_lock(self, file_path: Path) -> List[Dependency]:
"""Parse Cargo.lock for Rust dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Parse [[package]] entries
packages = re.findall(r'\[\[package\]\]\nname\s*=\s*"([^"]+)"\nversion\s*=\s*"([^"]+)"', content)
for name, version in packages:
dep = Dependency(
name=name,
version=version,
ecosystem='cargo',
direct=False
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing Cargo.lock: {e}")
return dependencies
def _parse_gemfile(self, file_path: Path) -> List[Dependency]:
"""Parse Gemfile for Ruby dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Parse gem declarations
gems = re.findall(r'gem\s+["\']([^"\']+)["\'](?:\s*,\s*["\']([^"\']+)["\'])?', content)
for gem_info in gems:
name = gem_info[0]
version = gem_info[1] if len(gem_info) > 1 and gem_info[1] else ''
dep = Dependency(
name=name,
version=version,
ecosystem='rubygems',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing Gemfile: {e}")
return dependencies
def _parse_gemfile_lock(self, file_path: Path) -> List[Dependency]:
"""Parse Gemfile.lock for Ruby dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Extract GEM section
gem_section = re.search(r'GEM\s*\n(.*?)(?=\n\S|\Z)', content, re.DOTALL)
if gem_section:
specs = gem_section.group(1)
gems = re.findall(r'\s+([a-zA-Z0-9_-]+)\s+\(([^)]+)\)', specs)
for name, version in gems:
dep = Dependency(
name=name,
version=version,
ecosystem='rubygems',
direct=False
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing Gemfile.lock: {e}")
return dependencies
def _generate_scan_summary(self, scan_results: Dict[str, Any]) -> Dict[str, Any]:
"""Generate a summary of the scan results."""
total_deps = len(scan_results['dependencies'])
unique_deps = len(set(dep.name for dep in scan_results['dependencies']))
return {
'total_dependencies': total_deps,
'unique_dependencies': unique_deps,
'ecosystems_found': len(scan_results['ecosystems']),
'vulnerable_dependencies': len([dep for dep in scan_results['dependencies'] if dep.vulnerabilities]),
'vulnerability_breakdown': {
'high': scan_results['high_severity_count'],
'medium': scan_results['medium_severity_count'],
'low': scan_results['low_severity_count']
}
}
def _generate_recommendations(self, scan_results: Dict[str, Any]) -> List[str]:
"""Generate actionable recommendations based on scan results."""
recommendations = []
high_count = scan_results['high_severity_count']
medium_count = scan_results['medium_severity_count']
if high_count > 0:
recommendations.append(f"URGENT: Address {high_count} high-severity vulnerabilities immediately")
if medium_count > 0:
recommendations.append(f"Schedule fixes for {medium_count} medium-severity vulnerabilities within 30 days")
vulnerable_deps = [dep for dep in scan_results['dependencies'] if dep.vulnerabilities]
if vulnerable_deps:
for dep in vulnerable_deps[:3]: # Top 3 most critical
for vuln in dep.vulnerabilities:
if vuln.fixed_version:
recommendations.append(f"Update {dep.name} from {dep.version} to {vuln.fixed_version} to fix {vuln.id}")
if len(scan_results['ecosystems']) > 3:
recommendations.append("Consider consolidating package managers to reduce complexity")
return recommendations
def generate_report(self, scan_results: Dict[str, Any], format: str = 'text') -> str:
"""Generate a human-readable or JSON report."""
if format == 'json':
# Convert Dependency objects to dicts for JSON serialization
serializable_results = scan_results.copy()
serializable_results['dependencies'] = [
{
'name': dep.name,
'version': dep.version,
'ecosystem': dep.ecosystem,
'direct': dep.direct,
'license': dep.license,
'vulnerabilities': [asdict(vuln) for vuln in dep.vulnerabilities]
}
for dep in scan_results['dependencies']
]
return json.dumps(serializable_results, indent=2, default=str)
# Text format report
report = []
report.append("=" * 60)
report.append("DEPENDENCY SECURITY SCAN REPORT")
report.append("=" * 60)
report.append(f"Scan Date: {scan_results['timestamp']}")
report.append(f"Project: {scan_results['project_path']}")
report.append("")
# Summary
summary = scan_results['scan_summary']
report.append("SUMMARY:")
report.append(f" Total Dependencies: {summary['total_dependencies']}")
report.append(f" Unique Dependencies: {summary['unique_dependencies']}")
report.append(f" Ecosystems: {', '.join(scan_results['ecosystems'])}")
report.append(f" Vulnerabilities Found: {scan_results['vulnerabilities_found']}")
report.append(f" High Severity: {summary['vulnerability_breakdown']['high']}")
report.append(f" Medium Severity: {summary['vulnerability_breakdown']['medium']}")
report.append(f" Low Severity: {summary['vulnerability_breakdown']['low']}")
report.append("")
# Vulnerable dependencies
vulnerable_deps = [dep for dep in scan_results['dependencies'] if dep.vulnerabilities]
if vulnerable_deps:
report.append("VULNERABLE DEPENDENCIES:")
report.append("-" * 30)
for dep in vulnerable_deps:
report.append(f"Package: {dep.name} v{dep.version} ({dep.ecosystem})")
for vuln in dep.vulnerabilities:
report.append(f"{vuln.id}: {vuln.summary}")
report.append(f" Severity: {vuln.severity} (CVSS: {vuln.cvss_score})")
if vuln.fixed_version:
report.append(f" Fixed in: {vuln.fixed_version}")
report.append("")
# Recommendations
if scan_results['recommendations']:
report.append("RECOMMENDATIONS:")
report.append("-" * 20)
for i, rec in enumerate(scan_results['recommendations'], 1):
report.append(f"{i}. {rec}")
report.append("")
report.append("=" * 60)
return '\n'.join(report)
def main():
"""Main entry point for the dependency scanner."""
parser = argparse.ArgumentParser(
description='Scan project dependencies for vulnerabilities and security issues',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python dep_scanner.py /path/to/project
python dep_scanner.py . --format json --output results.json
python dep_scanner.py /app --fail-on-high
"""
)
parser.add_argument('project_path',
help='Path to the project directory to scan')
parser.add_argument('--format', choices=['text', 'json'], default='text',
help='Output format (default: text)')
parser.add_argument('--output', '-o',
help='Output file path (default: stdout)')
parser.add_argument('--fail-on-high', action='store_true',
help='Exit with error code if high-severity vulnerabilities found')
parser.add_argument('--quick-scan', action='store_true',
help='Perform quick scan (skip transitive dependencies)')
args = parser.parse_args()
try:
scanner = DependencyScanner()
results = scanner.scan_project(args.project_path)
report = scanner.generate_report(results, args.format)
if args.output:
with open(args.output, 'w') as f:
f.write(report)
print(f"Report saved to {args.output}")
else:
print(report)
# Exit with error if high-severity vulnerabilities found and --fail-on-high is set
if args.fail_on_high and results['high_severity_count'] > 0:
sys.exit(1)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()