add brain

This commit is contained in:
2026-03-12 15:17:52 +07:00
parent fd9f558fa1
commit e7821a7a9d
355 changed files with 93784 additions and 24 deletions

View File

@@ -0,0 +1,794 @@
#!/usr/bin/env python3
"""
Dependency Scanner - Multi-language dependency vulnerability and analysis tool.
This script parses dependency files from various package managers, extracts direct
and transitive dependencies, checks against built-in vulnerability databases,
and provides comprehensive security analysis with actionable recommendations.
Author: Claude Skills Engineering Team
License: MIT
"""
import json
import os
import re
import sys
import argparse
from typing import Dict, List, Set, Any, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass, asdict
from datetime import datetime
import hashlib
import subprocess
@dataclass
class Vulnerability:
"""Represents a security vulnerability."""
id: str
summary: str
severity: str
cvss_score: float
affected_versions: str
fixed_version: Optional[str]
published_date: str
references: List[str]
@dataclass
class Dependency:
"""Represents a project dependency."""
name: str
version: str
ecosystem: str
direct: bool
license: Optional[str] = None
description: Optional[str] = None
homepage: Optional[str] = None
vulnerabilities: List[Vulnerability] = None
def __post_init__(self):
if self.vulnerabilities is None:
self.vulnerabilities = []
class DependencyScanner:
"""Main dependency scanner class."""
def __init__(self):
self.known_vulnerabilities = self._load_vulnerability_database()
self.supported_files = {
'package.json': self._parse_package_json,
'package-lock.json': self._parse_package_lock,
'yarn.lock': self._parse_yarn_lock,
'requirements.txt': self._parse_requirements_txt,
'pyproject.toml': self._parse_pyproject_toml,
'Pipfile.lock': self._parse_pipfile_lock,
'poetry.lock': self._parse_poetry_lock,
'go.mod': self._parse_go_mod,
'go.sum': self._parse_go_sum,
'Cargo.toml': self._parse_cargo_toml,
'Cargo.lock': self._parse_cargo_lock,
'Gemfile': self._parse_gemfile,
'Gemfile.lock': self._parse_gemfile_lock,
}
def _load_vulnerability_database(self) -> Dict[str, List[Vulnerability]]:
"""Load built-in vulnerability database with common CVE patterns."""
return {
# JavaScript/Node.js vulnerabilities
'lodash': [
Vulnerability(
id='CVE-2021-23337',
summary='Prototype pollution in lodash',
severity='HIGH',
cvss_score=7.2,
affected_versions='<4.17.21',
fixed_version='4.17.21',
published_date='2021-02-15',
references=['https://nvd.nist.gov/vuln/detail/CVE-2021-23337']
)
],
'axios': [
Vulnerability(
id='CVE-2023-45857',
summary='Cross-site request forgery in axios',
severity='MEDIUM',
cvss_score=6.1,
affected_versions='>=1.0.0 <1.6.0',
fixed_version='1.6.0',
published_date='2023-10-11',
references=['https://nvd.nist.gov/vuln/detail/CVE-2023-45857']
)
],
'express': [
Vulnerability(
id='CVE-2022-24999',
summary='Open redirect in express',
severity='MEDIUM',
cvss_score=6.1,
affected_versions='<4.18.2',
fixed_version='4.18.2',
published_date='2022-11-26',
references=['https://nvd.nist.gov/vuln/detail/CVE-2022-24999']
)
],
# Python vulnerabilities
'django': [
Vulnerability(
id='CVE-2024-27351',
summary='SQL injection in Django',
severity='HIGH',
cvss_score=9.8,
affected_versions='>=3.2 <4.2.11',
fixed_version='4.2.11',
published_date='2024-02-06',
references=['https://nvd.nist.gov/vuln/detail/CVE-2024-27351']
)
],
'requests': [
Vulnerability(
id='CVE-2023-32681',
summary='Proxy-authorization header leak in requests',
severity='MEDIUM',
cvss_score=6.1,
affected_versions='>=2.3.0 <2.31.0',
fixed_version='2.31.0',
published_date='2023-05-26',
references=['https://nvd.nist.gov/vuln/detail/CVE-2023-32681']
)
],
'pillow': [
Vulnerability(
id='CVE-2023-50447',
summary='Arbitrary code execution in Pillow',
severity='HIGH',
cvss_score=8.8,
affected_versions='<10.2.0',
fixed_version='10.2.0',
published_date='2024-01-02',
references=['https://nvd.nist.gov/vuln/detail/CVE-2023-50447']
)
],
# Go vulnerabilities
'github.com/gin-gonic/gin': [
Vulnerability(
id='CVE-2023-26125',
summary='Path traversal in gin',
severity='HIGH',
cvss_score=7.5,
affected_versions='<1.9.1',
fixed_version='1.9.1',
published_date='2023-02-28',
references=['https://nvd.nist.gov/vuln/detail/CVE-2023-26125']
)
],
# Rust vulnerabilities
'serde': [
Vulnerability(
id='RUSTSEC-2022-0061',
summary='Deserialization vulnerability in serde',
severity='HIGH',
cvss_score=8.2,
affected_versions='<1.0.152',
fixed_version='1.0.152',
published_date='2022-12-07',
references=['https://rustsec.org/advisories/RUSTSEC-2022-0061']
)
],
# Ruby vulnerabilities
'rails': [
Vulnerability(
id='CVE-2023-28362',
summary='ReDoS vulnerability in Rails',
severity='HIGH',
cvss_score=7.5,
affected_versions='>=7.0.0 <7.0.4.3',
fixed_version='7.0.4.3',
published_date='2023-03-13',
references=['https://nvd.nist.gov/vuln/detail/CVE-2023-28362']
)
]
}
def scan_project(self, project_path: str) -> Dict[str, Any]:
"""Scan a project directory for dependencies and vulnerabilities."""
project_path = Path(project_path)
if not project_path.exists():
raise FileNotFoundError(f"Project path does not exist: {project_path}")
scan_results = {
'timestamp': datetime.now().isoformat(),
'project_path': str(project_path),
'dependencies': [],
'vulnerabilities_found': 0,
'high_severity_count': 0,
'medium_severity_count': 0,
'low_severity_count': 0,
'ecosystems': set(),
'scan_summary': {},
'recommendations': []
}
# Find and parse dependency files
for file_pattern, parser in self.supported_files.items():
matching_files = list(project_path.rglob(file_pattern))
for dep_file in matching_files:
try:
dependencies = parser(dep_file)
scan_results['dependencies'].extend(dependencies)
for dep in dependencies:
scan_results['ecosystems'].add(dep.ecosystem)
# Check for vulnerabilities
vulnerabilities = self._check_vulnerabilities(dep)
dep.vulnerabilities = vulnerabilities
scan_results['vulnerabilities_found'] += len(vulnerabilities)
for vuln in vulnerabilities:
if vuln.severity == 'HIGH':
scan_results['high_severity_count'] += 1
elif vuln.severity == 'MEDIUM':
scan_results['medium_severity_count'] += 1
else:
scan_results['low_severity_count'] += 1
except Exception as e:
print(f"Error parsing {dep_file}: {e}")
continue
scan_results['ecosystems'] = list(scan_results['ecosystems'])
scan_results['scan_summary'] = self._generate_scan_summary(scan_results)
scan_results['recommendations'] = self._generate_recommendations(scan_results)
return scan_results
def _check_vulnerabilities(self, dependency: Dependency) -> List[Vulnerability]:
"""Check if a dependency has known vulnerabilities."""
vulnerabilities = []
# Check package name (exact match and common variations)
package_names = [dependency.name, dependency.name.lower()]
for pkg_name in package_names:
if pkg_name in self.known_vulnerabilities:
for vuln in self.known_vulnerabilities[pkg_name]:
if self._version_matches_vulnerability(dependency.version, vuln.affected_versions):
vulnerabilities.append(vuln)
return vulnerabilities
def _version_matches_vulnerability(self, version: str, affected_pattern: str) -> bool:
"""Check if a version matches a vulnerability pattern."""
# Simple version matching - in production, use proper semver library
try:
# Handle common patterns like "<4.17.21", ">=1.0.0 <1.6.0"
if '<' in affected_pattern and '>' not in affected_pattern:
# Pattern like "<4.17.21"
max_version = affected_pattern.replace('<', '').strip()
return self._compare_versions(version, max_version) < 0
elif '>=' in affected_pattern and '<' in affected_pattern:
# Pattern like ">=1.0.0 <1.6.0"
parts = affected_pattern.split('<')
min_part = parts[0].replace('>=', '').strip()
max_part = parts[1].strip()
return (self._compare_versions(version, min_part) >= 0 and
self._compare_versions(version, max_part) < 0)
except:
pass
return False
def _compare_versions(self, v1: str, v2: str) -> int:
"""Simple version comparison. Returns -1, 0, or 1."""
try:
def normalize(v):
return [int(x) for x in re.sub(r'(\.0+)*$','', v).split('.')]
v1_parts = normalize(v1)
v2_parts = normalize(v2)
if v1_parts < v2_parts:
return -1
elif v1_parts > v2_parts:
return 1
else:
return 0
except:
return 0
# Package file parsers
def _parse_package_json(self, file_path: Path) -> List[Dependency]:
"""Parse package.json for Node.js dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
data = json.load(f)
# Parse dependencies
for dep_type in ['dependencies', 'devDependencies']:
if dep_type in data:
for name, version in data[dep_type].items():
dep = Dependency(
name=name,
version=version.replace('^', '').replace('~', '').replace('>=', '').replace('<=', ''),
ecosystem='npm',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing package.json: {e}")
return dependencies
def _parse_package_lock(self, file_path: Path) -> List[Dependency]:
"""Parse package-lock.json for Node.js transitive dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
data = json.load(f)
if 'packages' in data:
for path, pkg_info in data['packages'].items():
if path == '': # Skip root package
continue
name = path.split('/')[-1] if '/' in path else path
version = pkg_info.get('version', '')
dep = Dependency(
name=name,
version=version,
ecosystem='npm',
direct=False,
description=pkg_info.get('description', '')
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing package-lock.json: {e}")
return dependencies
def _parse_yarn_lock(self, file_path: Path) -> List[Dependency]:
"""Parse yarn.lock for Node.js dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Simple yarn.lock parsing
packages = re.findall(r'^([^#\s][^:]+):\s*\n(?:\s+.*\n)*?\s+version\s+"([^"]+)"', content, re.MULTILINE)
for package_spec, version in packages:
name = package_spec.split('@')[0] if '@' in package_spec else package_spec
name = name.strip('"')
dep = Dependency(
name=name,
version=version,
ecosystem='npm',
direct=False
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing yarn.lock: {e}")
return dependencies
def _parse_requirements_txt(self, file_path: Path) -> List[Dependency]:
"""Parse requirements.txt for Python dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if line and not line.startswith('#') and not line.startswith('-'):
# Parse package==version or package>=version patterns
match = re.match(r'^([a-zA-Z0-9_-]+)([><=!]+)(.+)$', line)
if match:
name, operator, version = match.groups()
dep = Dependency(
name=name,
version=version,
ecosystem='pypi',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing requirements.txt: {e}")
return dependencies
def _parse_pyproject_toml(self, file_path: Path) -> List[Dependency]:
"""Parse pyproject.toml for Python dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Simple TOML parsing for dependencies
dep_section = re.search(r'\[tool\.poetry\.dependencies\](.*?)(?=\[|\Z)', content, re.DOTALL)
if dep_section:
for line in dep_section.group(1).split('\n'):
match = re.match(r'^([a-zA-Z0-9_-]+)\s*=\s*["\']([^"\']+)["\']', line.strip())
if match:
name, version = match.groups()
if name != 'python':
dep = Dependency(
name=name,
version=version.replace('^', '').replace('~', ''),
ecosystem='pypi',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing pyproject.toml: {e}")
return dependencies
def _parse_pipfile_lock(self, file_path: Path) -> List[Dependency]:
"""Parse Pipfile.lock for Python dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
data = json.load(f)
for section in ['default', 'develop']:
if section in data:
for name, info in data[section].items():
version = info.get('version', '').replace('==', '')
dep = Dependency(
name=name,
version=version,
ecosystem='pypi',
direct=(section == 'default')
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing Pipfile.lock: {e}")
return dependencies
def _parse_poetry_lock(self, file_path: Path) -> List[Dependency]:
"""Parse poetry.lock for Python dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Extract package entries from TOML
packages = re.findall(r'\[\[package\]\]\nname\s*=\s*"([^"]+)"\nversion\s*=\s*"([^"]+)"', content)
for name, version in packages:
dep = Dependency(
name=name,
version=version,
ecosystem='pypi',
direct=False
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing poetry.lock: {e}")
return dependencies
def _parse_go_mod(self, file_path: Path) -> List[Dependency]:
"""Parse go.mod for Go dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Parse require block
require_match = re.search(r'require\s*\((.*?)\)', content, re.DOTALL)
if require_match:
requires = require_match.group(1)
for line in requires.split('\n'):
match = re.match(r'\s*([^\s]+)\s+v?([^\s]+)', line.strip())
if match:
name, version = match.groups()
dep = Dependency(
name=name,
version=version,
ecosystem='go',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing go.mod: {e}")
return dependencies
def _parse_go_sum(self, file_path: Path) -> List[Dependency]:
"""Parse go.sum for Go dependency checksums."""
return [] # go.sum mainly contains checksums, dependencies are in go.mod
def _parse_cargo_toml(self, file_path: Path) -> List[Dependency]:
"""Parse Cargo.toml for Rust dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Parse [dependencies] section
dep_section = re.search(r'\[dependencies\](.*?)(?=\[|\Z)', content, re.DOTALL)
if dep_section:
for line in dep_section.group(1).split('\n'):
match = re.match(r'^([a-zA-Z0-9_-]+)\s*=\s*["\']([^"\']+)["\']', line.strip())
if match:
name, version = match.groups()
dep = Dependency(
name=name,
version=version,
ecosystem='cargo',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing Cargo.toml: {e}")
return dependencies
def _parse_cargo_lock(self, file_path: Path) -> List[Dependency]:
"""Parse Cargo.lock for Rust dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Parse [[package]] entries
packages = re.findall(r'\[\[package\]\]\nname\s*=\s*"([^"]+)"\nversion\s*=\s*"([^"]+)"', content)
for name, version in packages:
dep = Dependency(
name=name,
version=version,
ecosystem='cargo',
direct=False
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing Cargo.lock: {e}")
return dependencies
def _parse_gemfile(self, file_path: Path) -> List[Dependency]:
"""Parse Gemfile for Ruby dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Parse gem declarations
gems = re.findall(r'gem\s+["\']([^"\']+)["\'](?:\s*,\s*["\']([^"\']+)["\'])?', content)
for gem_info in gems:
name = gem_info[0]
version = gem_info[1] if len(gem_info) > 1 and gem_info[1] else ''
dep = Dependency(
name=name,
version=version,
ecosystem='rubygems',
direct=True
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing Gemfile: {e}")
return dependencies
def _parse_gemfile_lock(self, file_path: Path) -> List[Dependency]:
"""Parse Gemfile.lock for Ruby dependencies."""
dependencies = []
try:
with open(file_path, 'r') as f:
content = f.read()
# Extract GEM section
gem_section = re.search(r'GEM\s*\n(.*?)(?=\n\S|\Z)', content, re.DOTALL)
if gem_section:
specs = gem_section.group(1)
gems = re.findall(r'\s+([a-zA-Z0-9_-]+)\s+\(([^)]+)\)', specs)
for name, version in gems:
dep = Dependency(
name=name,
version=version,
ecosystem='rubygems',
direct=False
)
dependencies.append(dep)
except Exception as e:
print(f"Error parsing Gemfile.lock: {e}")
return dependencies
def _generate_scan_summary(self, scan_results: Dict[str, Any]) -> Dict[str, Any]:
"""Generate a summary of the scan results."""
total_deps = len(scan_results['dependencies'])
unique_deps = len(set(dep.name for dep in scan_results['dependencies']))
return {
'total_dependencies': total_deps,
'unique_dependencies': unique_deps,
'ecosystems_found': len(scan_results['ecosystems']),
'vulnerable_dependencies': len([dep for dep in scan_results['dependencies'] if dep.vulnerabilities]),
'vulnerability_breakdown': {
'high': scan_results['high_severity_count'],
'medium': scan_results['medium_severity_count'],
'low': scan_results['low_severity_count']
}
}
def _generate_recommendations(self, scan_results: Dict[str, Any]) -> List[str]:
"""Generate actionable recommendations based on scan results."""
recommendations = []
high_count = scan_results['high_severity_count']
medium_count = scan_results['medium_severity_count']
if high_count > 0:
recommendations.append(f"URGENT: Address {high_count} high-severity vulnerabilities immediately")
if medium_count > 0:
recommendations.append(f"Schedule fixes for {medium_count} medium-severity vulnerabilities within 30 days")
vulnerable_deps = [dep for dep in scan_results['dependencies'] if dep.vulnerabilities]
if vulnerable_deps:
for dep in vulnerable_deps[:3]: # Top 3 most critical
for vuln in dep.vulnerabilities:
if vuln.fixed_version:
recommendations.append(f"Update {dep.name} from {dep.version} to {vuln.fixed_version} to fix {vuln.id}")
if len(scan_results['ecosystems']) > 3:
recommendations.append("Consider consolidating package managers to reduce complexity")
return recommendations
def generate_report(self, scan_results: Dict[str, Any], format: str = 'text') -> str:
"""Generate a human-readable or JSON report."""
if format == 'json':
# Convert Dependency objects to dicts for JSON serialization
serializable_results = scan_results.copy()
serializable_results['dependencies'] = [
{
'name': dep.name,
'version': dep.version,
'ecosystem': dep.ecosystem,
'direct': dep.direct,
'license': dep.license,
'vulnerabilities': [asdict(vuln) for vuln in dep.vulnerabilities]
}
for dep in scan_results['dependencies']
]
return json.dumps(serializable_results, indent=2, default=str)
# Text format report
report = []
report.append("=" * 60)
report.append("DEPENDENCY SECURITY SCAN REPORT")
report.append("=" * 60)
report.append(f"Scan Date: {scan_results['timestamp']}")
report.append(f"Project: {scan_results['project_path']}")
report.append("")
# Summary
summary = scan_results['scan_summary']
report.append("SUMMARY:")
report.append(f" Total Dependencies: {summary['total_dependencies']}")
report.append(f" Unique Dependencies: {summary['unique_dependencies']}")
report.append(f" Ecosystems: {', '.join(scan_results['ecosystems'])}")
report.append(f" Vulnerabilities Found: {scan_results['vulnerabilities_found']}")
report.append(f" High Severity: {summary['vulnerability_breakdown']['high']}")
report.append(f" Medium Severity: {summary['vulnerability_breakdown']['medium']}")
report.append(f" Low Severity: {summary['vulnerability_breakdown']['low']}")
report.append("")
# Vulnerable dependencies
vulnerable_deps = [dep for dep in scan_results['dependencies'] if dep.vulnerabilities]
if vulnerable_deps:
report.append("VULNERABLE DEPENDENCIES:")
report.append("-" * 30)
for dep in vulnerable_deps:
report.append(f"Package: {dep.name} v{dep.version} ({dep.ecosystem})")
for vuln in dep.vulnerabilities:
report.append(f"{vuln.id}: {vuln.summary}")
report.append(f" Severity: {vuln.severity} (CVSS: {vuln.cvss_score})")
if vuln.fixed_version:
report.append(f" Fixed in: {vuln.fixed_version}")
report.append("")
# Recommendations
if scan_results['recommendations']:
report.append("RECOMMENDATIONS:")
report.append("-" * 20)
for i, rec in enumerate(scan_results['recommendations'], 1):
report.append(f"{i}. {rec}")
report.append("")
report.append("=" * 60)
return '\n'.join(report)
def main():
"""Main entry point for the dependency scanner."""
parser = argparse.ArgumentParser(
description='Scan project dependencies for vulnerabilities and security issues',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python dep_scanner.py /path/to/project
python dep_scanner.py . --format json --output results.json
python dep_scanner.py /app --fail-on-high
"""
)
parser.add_argument('project_path',
help='Path to the project directory to scan')
parser.add_argument('--format', choices=['text', 'json'], default='text',
help='Output format (default: text)')
parser.add_argument('--output', '-o',
help='Output file path (default: stdout)')
parser.add_argument('--fail-on-high', action='store_true',
help='Exit with error code if high-severity vulnerabilities found')
parser.add_argument('--quick-scan', action='store_true',
help='Perform quick scan (skip transitive dependencies)')
args = parser.parse_args()
try:
scanner = DependencyScanner()
results = scanner.scan_project(args.project_path)
report = scanner.generate_report(results, args.format)
if args.output:
with open(args.output, 'w') as f:
f.write(report)
print(f"Report saved to {args.output}")
else:
print(report)
# Exit with error if high-severity vulnerabilities found and --fail-on-high is set
if args.fail_on_high and results['high_severity_count'] > 0:
sys.exit(1)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,996 @@
#!/usr/bin/env python3
"""
License Checker - Dependency license compliance and conflict analysis tool.
This script analyzes dependency licenses from package metadata, classifies them
into risk categories, detects license conflicts, and generates compliance
reports with actionable recommendations for legal risk management.
Author: Claude Skills Engineering Team
License: MIT
"""
import json
import os
import sys
import argparse
from typing import Dict, List, Set, Any, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass, asdict
from datetime import datetime
import re
from enum import Enum
class LicenseType(Enum):
"""License classification types."""
PERMISSIVE = "permissive"
COPYLEFT_STRONG = "copyleft_strong"
COPYLEFT_WEAK = "copyleft_weak"
PROPRIETARY = "proprietary"
DUAL = "dual"
UNKNOWN = "unknown"
class RiskLevel(Enum):
"""Risk assessment levels."""
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
CRITICAL = "critical"
@dataclass
class LicenseInfo:
"""Represents license information for a dependency."""
name: str
spdx_id: Optional[str]
license_type: LicenseType
risk_level: RiskLevel
description: str
restrictions: List[str]
obligations: List[str]
compatibility: Dict[str, bool]
@dataclass
class DependencyLicense:
"""Represents a dependency with its license information."""
name: str
version: str
ecosystem: str
direct: bool
license_declared: Optional[str]
license_detected: Optional[LicenseInfo]
license_files: List[str]
confidence: float
@dataclass
class LicenseConflict:
"""Represents a license compatibility conflict."""
dependency1: str
license1: str
dependency2: str
license2: str
conflict_type: str
severity: RiskLevel
description: str
resolution_options: List[str]
class LicenseChecker:
"""Main license checking and compliance analysis class."""
def __init__(self):
self.license_database = self._build_license_database()
self.compatibility_matrix = self._build_compatibility_matrix()
self.license_patterns = self._build_license_patterns()
def _build_license_database(self) -> Dict[str, LicenseInfo]:
"""Build comprehensive license database with risk classifications."""
return {
# Permissive Licenses (Low Risk)
'MIT': LicenseInfo(
name='MIT License',
spdx_id='MIT',
license_type=LicenseType.PERMISSIVE,
risk_level=RiskLevel.LOW,
description='Very permissive license with minimal restrictions',
restrictions=['Include copyright notice', 'Include license text'],
obligations=['Attribution'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
'Apache-2.0': LicenseInfo(
name='Apache License 2.0',
spdx_id='Apache-2.0',
license_type=LicenseType.PERMISSIVE,
risk_level=RiskLevel.LOW,
description='Permissive license with patent protection',
restrictions=['Include copyright notice', 'Include license text',
'State changes', 'Include NOTICE file'],
obligations=['Attribution', 'Patent grant'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': True
}
),
'BSD-3-Clause': LicenseInfo(
name='BSD 3-Clause License',
spdx_id='BSD-3-Clause',
license_type=LicenseType.PERMISSIVE,
risk_level=RiskLevel.LOW,
description='Permissive license with non-endorsement clause',
restrictions=['Include copyright notice', 'Include license text',
'No endorsement using author names'],
obligations=['Attribution'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
'BSD-2-Clause': LicenseInfo(
name='BSD 2-Clause License',
spdx_id='BSD-2-Clause',
license_type=LicenseType.PERMISSIVE,
risk_level=RiskLevel.LOW,
description='Very permissive license similar to MIT',
restrictions=['Include copyright notice', 'Include license text'],
obligations=['Attribution'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
'ISC': LicenseInfo(
name='ISC License',
spdx_id='ISC',
license_type=LicenseType.PERMISSIVE,
risk_level=RiskLevel.LOW,
description='Functionally equivalent to MIT license',
restrictions=['Include copyright notice'],
obligations=['Attribution'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
# Weak Copyleft Licenses (Medium Risk)
'MPL-2.0': LicenseInfo(
name='Mozilla Public License 2.0',
spdx_id='MPL-2.0',
license_type=LicenseType.COPYLEFT_WEAK,
risk_level=RiskLevel.MEDIUM,
description='File-level copyleft license',
restrictions=['Disclose source of modified files', 'Include copyright notice',
'Include license text', 'State changes'],
obligations=['Source disclosure (modified files only)'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': True
}
),
'LGPL-2.1': LicenseInfo(
name='GNU Lesser General Public License 2.1',
spdx_id='LGPL-2.1',
license_type=LicenseType.COPYLEFT_WEAK,
risk_level=RiskLevel.MEDIUM,
description='Library-level copyleft license',
restrictions=['Disclose source of library modifications', 'Include copyright notice',
'Include license text', 'Allow relinking'],
obligations=['Source disclosure (library modifications)', 'Dynamic linking preferred'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
'LGPL-3.0': LicenseInfo(
name='GNU Lesser General Public License 3.0',
spdx_id='LGPL-3.0',
license_type=LicenseType.COPYLEFT_WEAK,
risk_level=RiskLevel.MEDIUM,
description='Library-level copyleft with patent provisions',
restrictions=['Disclose source of library modifications', 'Include copyright notice',
'Include license text', 'Allow relinking', 'Anti-tivoization'],
obligations=['Source disclosure (library modifications)', 'Patent grant'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': True
}
),
# Strong Copyleft Licenses (High Risk)
'GPL-2.0': LicenseInfo(
name='GNU General Public License 2.0',
spdx_id='GPL-2.0',
license_type=LicenseType.COPYLEFT_STRONG,
risk_level=RiskLevel.HIGH,
description='Strong copyleft requiring full source disclosure',
restrictions=['Disclose entire source code', 'Include copyright notice',
'Include license text', 'Use same license'],
obligations=['Full source disclosure', 'License compatibility'],
compatibility={
'commercial': False, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
'GPL-3.0': LicenseInfo(
name='GNU General Public License 3.0',
spdx_id='GPL-3.0',
license_type=LicenseType.COPYLEFT_STRONG,
risk_level=RiskLevel.HIGH,
description='Strong copyleft with patent and hardware provisions',
restrictions=['Disclose entire source code', 'Include copyright notice',
'Include license text', 'Use same license', 'Anti-tivoization'],
obligations=['Full source disclosure', 'Patent grant', 'License compatibility'],
compatibility={
'commercial': False, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': True
}
),
'AGPL-3.0': LicenseInfo(
name='GNU Affero General Public License 3.0',
spdx_id='AGPL-3.0',
license_type=LicenseType.COPYLEFT_STRONG,
risk_level=RiskLevel.CRITICAL,
description='Network copyleft extending GPL to SaaS',
restrictions=['Disclose entire source code', 'Include copyright notice',
'Include license text', 'Use same license', 'Network use triggers copyleft'],
obligations=['Full source disclosure', 'Network service source disclosure'],
compatibility={
'commercial': False, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': True
}
),
# Proprietary/Commercial Licenses (High Risk)
'PROPRIETARY': LicenseInfo(
name='Proprietary License',
spdx_id=None,
license_type=LicenseType.PROPRIETARY,
risk_level=RiskLevel.HIGH,
description='Commercial or custom proprietary license',
restrictions=['Varies by license', 'Often no redistribution',
'May require commercial license'],
obligations=['License agreement compliance', 'Payment obligations'],
compatibility={
'commercial': False, 'modification': False, 'distribution': False,
'private_use': True, 'patent_grant': False
}
),
# Unknown/Unlicensed (Critical Risk)
'UNKNOWN': LicenseInfo(
name='Unknown License',
spdx_id=None,
license_type=LicenseType.UNKNOWN,
risk_level=RiskLevel.CRITICAL,
description='No license detected or ambiguous licensing',
restrictions=['Unknown', 'Assume no rights granted'],
obligations=['Investigate and clarify licensing'],
compatibility={
'commercial': False, 'modification': False, 'distribution': False,
'private_use': False, 'patent_grant': False
}
)
}
def _build_compatibility_matrix(self) -> Dict[str, Dict[str, bool]]:
"""Build license compatibility matrix."""
return {
'MIT': {
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': True, 'LGPL-3.0': True,
'GPL-2.0': False, 'GPL-3.0': False, 'AGPL-3.0': False, 'PROPRIETARY': False
},
'Apache-2.0': {
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': False, 'LGPL-3.0': True,
'GPL-2.0': False, 'GPL-3.0': True, 'AGPL-3.0': True, 'PROPRIETARY': False
},
'GPL-2.0': {
'MIT': True, 'Apache-2.0': False, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
'ISC': True, 'MPL-2.0': False, 'LGPL-2.1': True, 'LGPL-3.0': False,
'GPL-2.0': True, 'GPL-3.0': False, 'AGPL-3.0': False, 'PROPRIETARY': False
},
'GPL-3.0': {
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': False, 'LGPL-3.0': True,
'GPL-2.0': False, 'GPL-3.0': True, 'AGPL-3.0': True, 'PROPRIETARY': False
},
'AGPL-3.0': {
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': False, 'LGPL-3.0': True,
'GPL-2.0': False, 'GPL-3.0': True, 'AGPL-3.0': True, 'PROPRIETARY': False
}
}
def _build_license_patterns(self) -> Dict[str, List[str]]:
"""Build license detection patterns for text analysis."""
return {
'MIT': [
r'MIT License',
r'Permission is hereby granted, free of charge',
r'THE SOFTWARE IS PROVIDED "AS IS"'
],
'Apache-2.0': [
r'Apache License, Version 2\.0',
r'Licensed under the Apache License',
r'http://www\.apache\.org/licenses/LICENSE-2\.0'
],
'GPL-2.0': [
r'GNU GENERAL PUBLIC LICENSE\s+Version 2',
r'This program is free software.*GPL.*version 2',
r'http://www\.gnu\.org/licenses/gpl-2\.0'
],
'GPL-3.0': [
r'GNU GENERAL PUBLIC LICENSE\s+Version 3',
r'This program is free software.*GPL.*version 3',
r'http://www\.gnu\.org/licenses/gpl-3\.0'
],
'BSD-3-Clause': [
r'BSD 3-Clause License',
r'Redistributions of source code must retain',
r'Neither the name.*may be used to endorse'
],
'BSD-2-Clause': [
r'BSD 2-Clause License',
r'Redistributions of source code must retain.*Redistributions in binary form'
]
}
def analyze_project(self, project_path: str, dependency_inventory: Optional[str] = None) -> Dict[str, Any]:
"""Analyze license compliance for a project."""
project_path = Path(project_path)
analysis_results = {
'timestamp': datetime.now().isoformat(),
'project_path': str(project_path),
'project_license': self._detect_project_license(project_path),
'dependencies': [],
'license_summary': {},
'conflicts': [],
'compliance_score': 0.0,
'risk_assessment': {},
'recommendations': []
}
# Load dependencies from inventory or scan project
if dependency_inventory:
dependencies = self._load_dependency_inventory(dependency_inventory)
else:
dependencies = self._scan_project_dependencies(project_path)
# Analyze each dependency's license
for dep in dependencies:
license_info = self._analyze_dependency_license(dep, project_path)
analysis_results['dependencies'].append(license_info)
# Generate license summary
analysis_results['license_summary'] = self._generate_license_summary(
analysis_results['dependencies']
)
# Detect conflicts
analysis_results['conflicts'] = self._detect_license_conflicts(
analysis_results['project_license'],
analysis_results['dependencies']
)
# Calculate compliance score
analysis_results['compliance_score'] = self._calculate_compliance_score(
analysis_results['dependencies'],
analysis_results['conflicts']
)
# Generate risk assessment
analysis_results['risk_assessment'] = self._generate_risk_assessment(
analysis_results['dependencies'],
analysis_results['conflicts']
)
# Generate recommendations
analysis_results['recommendations'] = self._generate_compliance_recommendations(
analysis_results
)
return analysis_results
def _detect_project_license(self, project_path: Path) -> Optional[str]:
"""Detect the main project license."""
license_files = ['LICENSE', 'LICENSE.txt', 'LICENSE.md', 'COPYING', 'COPYING.txt']
for license_file in license_files:
license_path = project_path / license_file
if license_path.exists():
try:
with open(license_path, 'r', encoding='utf-8') as f:
content = f.read()
# Analyze license content
detected_license = self._detect_license_from_text(content)
if detected_license:
return detected_license
except Exception as e:
print(f"Error reading license file {license_path}: {e}")
return None
def _detect_license_from_text(self, text: str) -> Optional[str]:
"""Detect license type from text content."""
text_upper = text.upper()
for license_id, patterns in self.license_patterns.items():
for pattern in patterns:
if re.search(pattern, text, re.IGNORECASE):
return license_id
# Common license text patterns
if 'MIT' in text_upper and 'PERMISSION IS HEREBY GRANTED' in text_upper:
return 'MIT'
elif 'APACHE LICENSE' in text_upper and 'VERSION 2.0' in text_upper:
return 'Apache-2.0'
elif 'GPL' in text_upper and 'VERSION 2' in text_upper:
return 'GPL-2.0'
elif 'GPL' in text_upper and 'VERSION 3' in text_upper:
return 'GPL-3.0'
return None
def _load_dependency_inventory(self, inventory_path: str) -> List[Dict[str, Any]]:
"""Load dependencies from JSON inventory file."""
try:
with open(inventory_path, 'r') as f:
data = json.load(f)
if 'dependencies' in data:
return data['dependencies']
else:
return data if isinstance(data, list) else []
except Exception as e:
print(f"Error loading dependency inventory: {e}")
return []
def _scan_project_dependencies(self, project_path: Path) -> List[Dict[str, Any]]:
"""Basic dependency scanning - in practice, would integrate with dep_scanner.py."""
dependencies = []
# Simple package.json parsing as example
package_json = project_path / 'package.json'
if package_json.exists():
try:
with open(package_json, 'r') as f:
data = json.load(f)
for dep_type in ['dependencies', 'devDependencies']:
if dep_type in data:
for name, version in data[dep_type].items():
dependencies.append({
'name': name,
'version': version,
'ecosystem': 'npm',
'direct': True
})
except Exception as e:
print(f"Error parsing package.json: {e}")
return dependencies
def _analyze_dependency_license(self, dependency: Dict[str, Any], project_path: Path) -> DependencyLicense:
"""Analyze license information for a single dependency."""
dep_license = DependencyLicense(
name=dependency['name'],
version=dependency.get('version', ''),
ecosystem=dependency.get('ecosystem', ''),
direct=dependency.get('direct', False),
license_declared=dependency.get('license'),
license_detected=None,
license_files=[],
confidence=0.0
)
# Try to detect license from various sources
declared_license = dependency.get('license')
if declared_license:
license_info = self._resolve_license_info(declared_license)
if license_info:
dep_license.license_detected = license_info
dep_license.confidence = 0.9
# For unknown licenses, try to find license files in node_modules (example)
if not dep_license.license_detected and dep_license.ecosystem == 'npm':
node_modules_path = project_path / 'node_modules' / dep_license.name
if node_modules_path.exists():
license_info = self._scan_package_directory(node_modules_path)
if license_info:
dep_license.license_detected = license_info
dep_license.confidence = 0.7
# Default to unknown if no license detected
if not dep_license.license_detected:
dep_license.license_detected = self.license_database['UNKNOWN']
dep_license.confidence = 0.0
return dep_license
def _resolve_license_info(self, license_string: str) -> Optional[LicenseInfo]:
"""Resolve license string to LicenseInfo object."""
if not license_string:
return None
license_string = license_string.strip()
# Direct SPDX ID match
if license_string in self.license_database:
return self.license_database[license_string]
# Common variations and mappings
license_mappings = {
'mit': 'MIT',
'apache': 'Apache-2.0',
'apache-2.0': 'Apache-2.0',
'apache 2.0': 'Apache-2.0',
'bsd': 'BSD-3-Clause',
'bsd-3-clause': 'BSD-3-Clause',
'bsd-2-clause': 'BSD-2-Clause',
'gpl-2.0': 'GPL-2.0',
'gpl-3.0': 'GPL-3.0',
'lgpl-2.1': 'LGPL-2.1',
'lgpl-3.0': 'LGPL-3.0',
'mpl-2.0': 'MPL-2.0',
'isc': 'ISC',
'unlicense': 'MIT', # Treat as permissive
'public domain': 'MIT', # Treat as permissive
'proprietary': 'PROPRIETARY',
'commercial': 'PROPRIETARY'
}
license_lower = license_string.lower()
for pattern, mapped_license in license_mappings.items():
if pattern in license_lower:
return self.license_database.get(mapped_license)
return None
def _scan_package_directory(self, package_path: Path) -> Optional[LicenseInfo]:
"""Scan package directory for license information."""
license_files = ['LICENSE', 'LICENSE.txt', 'LICENSE.md', 'COPYING', 'README.md', 'package.json']
for license_file in license_files:
file_path = package_path / license_file
if file_path.exists():
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
# Try to detect license from content
if license_file == 'package.json':
# Parse JSON for license field
try:
data = json.loads(content)
license_field = data.get('license')
if license_field:
return self._resolve_license_info(license_field)
except:
continue
else:
# Analyze text content
detected_license = self._detect_license_from_text(content)
if detected_license:
return self.license_database.get(detected_license)
except Exception:
continue
return None
def _generate_license_summary(self, dependencies: List[DependencyLicense]) -> Dict[str, Any]:
"""Generate summary of license distribution."""
summary = {
'total_dependencies': len(dependencies),
'license_types': {},
'risk_levels': {},
'unknown_licenses': 0,
'direct_dependencies': 0,
'transitive_dependencies': 0
}
for dep in dependencies:
# Count by license type
license_type = dep.license_detected.license_type.value
summary['license_types'][license_type] = summary['license_types'].get(license_type, 0) + 1
# Count by risk level
risk_level = dep.license_detected.risk_level.value
summary['risk_levels'][risk_level] = summary['risk_levels'].get(risk_level, 0) + 1
# Count unknowns
if dep.license_detected.license_type == LicenseType.UNKNOWN:
summary['unknown_licenses'] += 1
# Count direct vs transitive
if dep.direct:
summary['direct_dependencies'] += 1
else:
summary['transitive_dependencies'] += 1
return summary
def _detect_license_conflicts(self, project_license: Optional[str],
dependencies: List[DependencyLicense]) -> List[LicenseConflict]:
"""Detect license compatibility conflicts."""
conflicts = []
if not project_license:
# If no project license detected, flag as potential issue
for dep in dependencies:
if dep.license_detected.risk_level in [RiskLevel.HIGH, RiskLevel.CRITICAL]:
conflicts.append(LicenseConflict(
dependency1='Project',
license1='Unknown',
dependency2=dep.name,
license2=dep.license_detected.spdx_id or dep.license_detected.name,
conflict_type='Unknown project license',
severity=RiskLevel.HIGH,
description=f'Project license unknown, dependency {dep.name} has {dep.license_detected.risk_level.value} risk license',
resolution_options=['Define project license', 'Review dependency usage']
))
return conflicts
project_license_info = self.license_database.get(project_license)
if not project_license_info:
return conflicts
# Check compatibility with project license
for dep in dependencies:
dep_license_id = dep.license_detected.spdx_id or 'UNKNOWN'
# Check compatibility matrix
if project_license in self.compatibility_matrix:
compatibility = self.compatibility_matrix[project_license].get(dep_license_id, False)
if not compatibility:
severity = self._determine_conflict_severity(project_license_info, dep.license_detected)
conflicts.append(LicenseConflict(
dependency1='Project',
license1=project_license,
dependency2=dep.name,
license2=dep_license_id,
conflict_type='License incompatibility',
severity=severity,
description=f'Project license {project_license} is incompatible with dependency license {dep_license_id}',
resolution_options=self._generate_conflict_resolutions(project_license, dep_license_id)
))
# Check for GPL contamination in permissive projects
if project_license_info.license_type == LicenseType.PERMISSIVE:
for dep in dependencies:
if dep.license_detected.license_type == LicenseType.COPYLEFT_STRONG:
conflicts.append(LicenseConflict(
dependency1='Project',
license1=project_license,
dependency2=dep.name,
license2=dep.license_detected.spdx_id or dep.license_detected.name,
conflict_type='GPL contamination',
severity=RiskLevel.CRITICAL,
description=f'GPL dependency {dep.name} may contaminate permissive project',
resolution_options=['Remove GPL dependency', 'Change project license to GPL',
'Use dynamic linking', 'Find alternative dependency']
))
return conflicts
def _determine_conflict_severity(self, project_license: LicenseInfo, dep_license: LicenseInfo) -> RiskLevel:
"""Determine severity of a license conflict."""
if dep_license.license_type == LicenseType.UNKNOWN:
return RiskLevel.CRITICAL
elif (project_license.license_type == LicenseType.PERMISSIVE and
dep_license.license_type == LicenseType.COPYLEFT_STRONG):
return RiskLevel.CRITICAL
elif dep_license.license_type == LicenseType.PROPRIETARY:
return RiskLevel.HIGH
else:
return RiskLevel.MEDIUM
def _generate_conflict_resolutions(self, project_license: str, dep_license: str) -> List[str]:
"""Generate resolution options for license conflicts."""
resolutions = []
if 'GPL' in dep_license:
resolutions.extend([
'Find alternative non-GPL dependency',
'Use dynamic linking if possible',
'Consider changing project license to GPL-compatible',
'Remove the dependency if not essential'
])
elif dep_license == 'PROPRIETARY':
resolutions.extend([
'Obtain commercial license',
'Find open-source alternative',
'Remove dependency if not essential',
'Negotiate license terms'
])
else:
resolutions.extend([
'Review license compatibility carefully',
'Consult legal counsel',
'Find alternative dependency',
'Consider license exception'
])
return resolutions
def _calculate_compliance_score(self, dependencies: List[DependencyLicense],
conflicts: List[LicenseConflict]) -> float:
"""Calculate overall compliance score (0-100)."""
if not dependencies:
return 100.0
base_score = 100.0
# Deduct points for unknown licenses
unknown_count = sum(1 for dep in dependencies
if dep.license_detected.license_type == LicenseType.UNKNOWN)
base_score -= (unknown_count / len(dependencies)) * 30
# Deduct points for high-risk licenses
high_risk_count = sum(1 for dep in dependencies
if dep.license_detected.risk_level in [RiskLevel.HIGH, RiskLevel.CRITICAL])
base_score -= (high_risk_count / len(dependencies)) * 20
# Deduct points for conflicts
if conflicts:
critical_conflicts = sum(1 for c in conflicts if c.severity == RiskLevel.CRITICAL)
high_conflicts = sum(1 for c in conflicts if c.severity == RiskLevel.HIGH)
base_score -= critical_conflicts * 15
base_score -= high_conflicts * 10
return max(0.0, base_score)
def _generate_risk_assessment(self, dependencies: List[DependencyLicense],
conflicts: List[LicenseConflict]) -> Dict[str, Any]:
"""Generate comprehensive risk assessment."""
return {
'overall_risk': self._calculate_overall_risk(dependencies, conflicts),
'license_risk_breakdown': self._calculate_license_risks(dependencies),
'conflict_summary': {
'total_conflicts': len(conflicts),
'critical_conflicts': len([c for c in conflicts if c.severity == RiskLevel.CRITICAL]),
'high_conflicts': len([c for c in conflicts if c.severity == RiskLevel.HIGH])
},
'distribution_risks': self._assess_distribution_risks(dependencies),
'commercial_risks': self._assess_commercial_risks(dependencies)
}
def _calculate_overall_risk(self, dependencies: List[DependencyLicense],
conflicts: List[LicenseConflict]) -> str:
"""Calculate overall project risk level."""
if any(c.severity == RiskLevel.CRITICAL for c in conflicts):
return 'CRITICAL'
elif any(dep.license_detected.risk_level == RiskLevel.CRITICAL for dep in dependencies):
return 'CRITICAL'
elif any(c.severity == RiskLevel.HIGH for c in conflicts):
return 'HIGH'
elif any(dep.license_detected.risk_level == RiskLevel.HIGH for dep in dependencies):
return 'HIGH'
elif any(dep.license_detected.risk_level == RiskLevel.MEDIUM for dep in dependencies):
return 'MEDIUM'
else:
return 'LOW'
def _calculate_license_risks(self, dependencies: List[DependencyLicense]) -> Dict[str, int]:
"""Calculate breakdown of license risks."""
risks = {'low': 0, 'medium': 0, 'high': 0, 'critical': 0}
for dep in dependencies:
risk_level = dep.license_detected.risk_level.value
risks[risk_level] += 1
return risks
def _assess_distribution_risks(self, dependencies: List[DependencyLicense]) -> List[str]:
"""Assess risks related to software distribution."""
risks = []
gpl_deps = [dep for dep in dependencies
if dep.license_detected.license_type == LicenseType.COPYLEFT_STRONG]
if gpl_deps:
risks.append(f"GPL dependencies require source code disclosure: {[d.name for d in gpl_deps]}")
proprietary_deps = [dep for dep in dependencies
if dep.license_detected.license_type == LicenseType.PROPRIETARY]
if proprietary_deps:
risks.append(f"Proprietary dependencies may require commercial licenses: {[d.name for d in proprietary_deps]}")
unknown_deps = [dep for dep in dependencies
if dep.license_detected.license_type == LicenseType.UNKNOWN]
if unknown_deps:
risks.append(f"Unknown licenses pose legal uncertainty: {[d.name for d in unknown_deps]}")
return risks
def _assess_commercial_risks(self, dependencies: List[DependencyLicense]) -> List[str]:
"""Assess risks for commercial usage."""
risks = []
agpl_deps = [dep for dep in dependencies
if dep.license_detected.spdx_id == 'AGPL-3.0']
if agpl_deps:
risks.append(f"AGPL dependencies trigger copyleft for network services: {[d.name for d in agpl_deps]}")
return risks
def _generate_compliance_recommendations(self, analysis_results: Dict[str, Any]) -> List[str]:
"""Generate actionable compliance recommendations."""
recommendations = []
# Address critical issues first
critical_conflicts = [c for c in analysis_results['conflicts']
if c.severity == RiskLevel.CRITICAL]
if critical_conflicts:
recommendations.append("CRITICAL: Address license conflicts immediately before any distribution")
for conflict in critical_conflicts[:3]: # Top 3
recommendations.append(f"{conflict.description}")
# Unknown licenses
unknown_count = analysis_results['license_summary']['unknown_licenses']
if unknown_count > 0:
recommendations.append(f"Investigate and clarify licenses for {unknown_count} dependencies with unknown licensing")
# GPL contamination
gpl_deps = [dep for dep in analysis_results['dependencies']
if dep.license_detected.license_type == LicenseType.COPYLEFT_STRONG]
if gpl_deps and analysis_results.get('project_license') in ['MIT', 'Apache-2.0', 'BSD-3-Clause']:
recommendations.append("Consider removing GPL dependencies or changing project license for permissive project")
# Compliance score
if analysis_results['compliance_score'] < 70:
recommendations.append("Overall compliance score is low - prioritize license cleanup")
return recommendations
def generate_report(self, analysis_results: Dict[str, Any], format: str = 'text') -> str:
"""Generate compliance report in specified format."""
if format == 'json':
# Convert dataclass objects for JSON serialization
serializable_results = analysis_results.copy()
serializable_results['dependencies'] = [
{
'name': dep.name,
'version': dep.version,
'ecosystem': dep.ecosystem,
'direct': dep.direct,
'license_declared': dep.license_declared,
'license_detected': asdict(dep.license_detected) if dep.license_detected else None,
'confidence': dep.confidence
}
for dep in analysis_results['dependencies']
]
serializable_results['conflicts'] = [asdict(conflict) for conflict in analysis_results['conflicts']]
return json.dumps(serializable_results, indent=2, default=str)
# Text format report
report = []
report.append("=" * 60)
report.append("LICENSE COMPLIANCE REPORT")
report.append("=" * 60)
report.append(f"Analysis Date: {analysis_results['timestamp']}")
report.append(f"Project: {analysis_results['project_path']}")
report.append(f"Project License: {analysis_results['project_license'] or 'Unknown'}")
report.append("")
# Summary
summary = analysis_results['license_summary']
report.append("SUMMARY:")
report.append(f" Total Dependencies: {summary['total_dependencies']}")
report.append(f" Compliance Score: {analysis_results['compliance_score']:.1f}/100")
report.append(f" Overall Risk: {analysis_results['risk_assessment']['overall_risk']}")
report.append(f" License Conflicts: {len(analysis_results['conflicts'])}")
report.append("")
# License distribution
report.append("LICENSE DISTRIBUTION:")
for license_type, count in summary['license_types'].items():
report.append(f" {license_type.title()}: {count}")
report.append("")
# Risk breakdown
report.append("RISK BREAKDOWN:")
for risk_level, count in summary['risk_levels'].items():
report.append(f" {risk_level.title()}: {count}")
report.append("")
# Conflicts
if analysis_results['conflicts']:
report.append("LICENSE CONFLICTS:")
report.append("-" * 30)
for conflict in analysis_results['conflicts']:
report.append(f"Conflict: {conflict.dependency2} ({conflict.license2})")
report.append(f" Issue: {conflict.description}")
report.append(f" Severity: {conflict.severity.value.upper()}")
report.append(f" Resolutions: {', '.join(conflict.resolution_options[:2])}")
report.append("")
# High-risk dependencies
high_risk_deps = [dep for dep in analysis_results['dependencies']
if dep.license_detected.risk_level in [RiskLevel.HIGH, RiskLevel.CRITICAL]]
if high_risk_deps:
report.append("HIGH-RISK DEPENDENCIES:")
report.append("-" * 30)
for dep in high_risk_deps[:10]: # Top 10
license_name = dep.license_detected.spdx_id or dep.license_detected.name
report.append(f" {dep.name} v{dep.version}: {license_name} ({dep.license_detected.risk_level.value.upper()})")
report.append("")
# Recommendations
if analysis_results['recommendations']:
report.append("RECOMMENDATIONS:")
report.append("-" * 20)
for i, rec in enumerate(analysis_results['recommendations'], 1):
report.append(f"{i}. {rec}")
report.append("")
report.append("=" * 60)
return '\n'.join(report)
def main():
"""Main entry point for the license checker."""
parser = argparse.ArgumentParser(
description='Analyze dependency licenses for compliance and conflicts',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python license_checker.py /path/to/project
python license_checker.py . --format json --output compliance.json
python license_checker.py /app --inventory deps.json --policy strict
"""
)
parser.add_argument('project_path',
help='Path to the project directory to analyze')
parser.add_argument('--inventory',
help='Path to dependency inventory JSON file')
parser.add_argument('--format', choices=['text', 'json'], default='text',
help='Output format (default: text)')
parser.add_argument('--output', '-o',
help='Output file path (default: stdout)')
parser.add_argument('--policy', choices=['permissive', 'strict'], default='permissive',
help='License policy strictness (default: permissive)')
parser.add_argument('--warn-conflicts', action='store_true',
help='Show warnings for potential conflicts')
args = parser.parse_args()
try:
checker = LicenseChecker()
results = checker.analyze_project(args.project_path, args.inventory)
report = checker.generate_report(results, args.format)
if args.output:
with open(args.output, 'w') as f:
f.write(report)
print(f"Compliance report saved to {args.output}")
else:
print(report)
# Exit with error code for policy violations
if args.policy == 'strict' and results['compliance_score'] < 80:
sys.exit(1)
if args.warn_conflicts and results['conflicts']:
print("\nWARNING: License conflicts detected!")
sys.exit(2)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()