Files
CleanArchitecture-template/.brain/.agent/skills/engineering-advanced-skills/dependency-auditor/scripts/license_checker.py
2026-03-12 15:17:52 +07:00

996 lines
43 KiB
Python

#!/usr/bin/env python3
"""
License Checker - Dependency license compliance and conflict analysis tool.
This script analyzes dependency licenses from package metadata, classifies them
into risk categories, detects license conflicts, and generates compliance
reports with actionable recommendations for legal risk management.
Author: Claude Skills Engineering Team
License: MIT
"""
import json
import os
import sys
import argparse
from typing import Dict, List, Set, Any, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass, asdict
from datetime import datetime
import re
from enum import Enum
class LicenseType(Enum):
"""License classification types."""
PERMISSIVE = "permissive"
COPYLEFT_STRONG = "copyleft_strong"
COPYLEFT_WEAK = "copyleft_weak"
PROPRIETARY = "proprietary"
DUAL = "dual"
UNKNOWN = "unknown"
class RiskLevel(Enum):
"""Risk assessment levels."""
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
CRITICAL = "critical"
@dataclass
class LicenseInfo:
"""Represents license information for a dependency."""
name: str
spdx_id: Optional[str]
license_type: LicenseType
risk_level: RiskLevel
description: str
restrictions: List[str]
obligations: List[str]
compatibility: Dict[str, bool]
@dataclass
class DependencyLicense:
"""Represents a dependency with its license information."""
name: str
version: str
ecosystem: str
direct: bool
license_declared: Optional[str]
license_detected: Optional[LicenseInfo]
license_files: List[str]
confidence: float
@dataclass
class LicenseConflict:
"""Represents a license compatibility conflict."""
dependency1: str
license1: str
dependency2: str
license2: str
conflict_type: str
severity: RiskLevel
description: str
resolution_options: List[str]
class LicenseChecker:
"""Main license checking and compliance analysis class."""
def __init__(self):
self.license_database = self._build_license_database()
self.compatibility_matrix = self._build_compatibility_matrix()
self.license_patterns = self._build_license_patterns()
def _build_license_database(self) -> Dict[str, LicenseInfo]:
"""Build comprehensive license database with risk classifications."""
return {
# Permissive Licenses (Low Risk)
'MIT': LicenseInfo(
name='MIT License',
spdx_id='MIT',
license_type=LicenseType.PERMISSIVE,
risk_level=RiskLevel.LOW,
description='Very permissive license with minimal restrictions',
restrictions=['Include copyright notice', 'Include license text'],
obligations=['Attribution'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
'Apache-2.0': LicenseInfo(
name='Apache License 2.0',
spdx_id='Apache-2.0',
license_type=LicenseType.PERMISSIVE,
risk_level=RiskLevel.LOW,
description='Permissive license with patent protection',
restrictions=['Include copyright notice', 'Include license text',
'State changes', 'Include NOTICE file'],
obligations=['Attribution', 'Patent grant'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': True
}
),
'BSD-3-Clause': LicenseInfo(
name='BSD 3-Clause License',
spdx_id='BSD-3-Clause',
license_type=LicenseType.PERMISSIVE,
risk_level=RiskLevel.LOW,
description='Permissive license with non-endorsement clause',
restrictions=['Include copyright notice', 'Include license text',
'No endorsement using author names'],
obligations=['Attribution'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
'BSD-2-Clause': LicenseInfo(
name='BSD 2-Clause License',
spdx_id='BSD-2-Clause',
license_type=LicenseType.PERMISSIVE,
risk_level=RiskLevel.LOW,
description='Very permissive license similar to MIT',
restrictions=['Include copyright notice', 'Include license text'],
obligations=['Attribution'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
'ISC': LicenseInfo(
name='ISC License',
spdx_id='ISC',
license_type=LicenseType.PERMISSIVE,
risk_level=RiskLevel.LOW,
description='Functionally equivalent to MIT license',
restrictions=['Include copyright notice'],
obligations=['Attribution'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
# Weak Copyleft Licenses (Medium Risk)
'MPL-2.0': LicenseInfo(
name='Mozilla Public License 2.0',
spdx_id='MPL-2.0',
license_type=LicenseType.COPYLEFT_WEAK,
risk_level=RiskLevel.MEDIUM,
description='File-level copyleft license',
restrictions=['Disclose source of modified files', 'Include copyright notice',
'Include license text', 'State changes'],
obligations=['Source disclosure (modified files only)'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': True
}
),
'LGPL-2.1': LicenseInfo(
name='GNU Lesser General Public License 2.1',
spdx_id='LGPL-2.1',
license_type=LicenseType.COPYLEFT_WEAK,
risk_level=RiskLevel.MEDIUM,
description='Library-level copyleft license',
restrictions=['Disclose source of library modifications', 'Include copyright notice',
'Include license text', 'Allow relinking'],
obligations=['Source disclosure (library modifications)', 'Dynamic linking preferred'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
'LGPL-3.0': LicenseInfo(
name='GNU Lesser General Public License 3.0',
spdx_id='LGPL-3.0',
license_type=LicenseType.COPYLEFT_WEAK,
risk_level=RiskLevel.MEDIUM,
description='Library-level copyleft with patent provisions',
restrictions=['Disclose source of library modifications', 'Include copyright notice',
'Include license text', 'Allow relinking', 'Anti-tivoization'],
obligations=['Source disclosure (library modifications)', 'Patent grant'],
compatibility={
'commercial': True, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': True
}
),
# Strong Copyleft Licenses (High Risk)
'GPL-2.0': LicenseInfo(
name='GNU General Public License 2.0',
spdx_id='GPL-2.0',
license_type=LicenseType.COPYLEFT_STRONG,
risk_level=RiskLevel.HIGH,
description='Strong copyleft requiring full source disclosure',
restrictions=['Disclose entire source code', 'Include copyright notice',
'Include license text', 'Use same license'],
obligations=['Full source disclosure', 'License compatibility'],
compatibility={
'commercial': False, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': False
}
),
'GPL-3.0': LicenseInfo(
name='GNU General Public License 3.0',
spdx_id='GPL-3.0',
license_type=LicenseType.COPYLEFT_STRONG,
risk_level=RiskLevel.HIGH,
description='Strong copyleft with patent and hardware provisions',
restrictions=['Disclose entire source code', 'Include copyright notice',
'Include license text', 'Use same license', 'Anti-tivoization'],
obligations=['Full source disclosure', 'Patent grant', 'License compatibility'],
compatibility={
'commercial': False, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': True
}
),
'AGPL-3.0': LicenseInfo(
name='GNU Affero General Public License 3.0',
spdx_id='AGPL-3.0',
license_type=LicenseType.COPYLEFT_STRONG,
risk_level=RiskLevel.CRITICAL,
description='Network copyleft extending GPL to SaaS',
restrictions=['Disclose entire source code', 'Include copyright notice',
'Include license text', 'Use same license', 'Network use triggers copyleft'],
obligations=['Full source disclosure', 'Network service source disclosure'],
compatibility={
'commercial': False, 'modification': True, 'distribution': True,
'private_use': True, 'patent_grant': True
}
),
# Proprietary/Commercial Licenses (High Risk)
'PROPRIETARY': LicenseInfo(
name='Proprietary License',
spdx_id=None,
license_type=LicenseType.PROPRIETARY,
risk_level=RiskLevel.HIGH,
description='Commercial or custom proprietary license',
restrictions=['Varies by license', 'Often no redistribution',
'May require commercial license'],
obligations=['License agreement compliance', 'Payment obligations'],
compatibility={
'commercial': False, 'modification': False, 'distribution': False,
'private_use': True, 'patent_grant': False
}
),
# Unknown/Unlicensed (Critical Risk)
'UNKNOWN': LicenseInfo(
name='Unknown License',
spdx_id=None,
license_type=LicenseType.UNKNOWN,
risk_level=RiskLevel.CRITICAL,
description='No license detected or ambiguous licensing',
restrictions=['Unknown', 'Assume no rights granted'],
obligations=['Investigate and clarify licensing'],
compatibility={
'commercial': False, 'modification': False, 'distribution': False,
'private_use': False, 'patent_grant': False
}
)
}
def _build_compatibility_matrix(self) -> Dict[str, Dict[str, bool]]:
"""Build license compatibility matrix."""
return {
'MIT': {
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': True, 'LGPL-3.0': True,
'GPL-2.0': False, 'GPL-3.0': False, 'AGPL-3.0': False, 'PROPRIETARY': False
},
'Apache-2.0': {
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': False, 'LGPL-3.0': True,
'GPL-2.0': False, 'GPL-3.0': True, 'AGPL-3.0': True, 'PROPRIETARY': False
},
'GPL-2.0': {
'MIT': True, 'Apache-2.0': False, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
'ISC': True, 'MPL-2.0': False, 'LGPL-2.1': True, 'LGPL-3.0': False,
'GPL-2.0': True, 'GPL-3.0': False, 'AGPL-3.0': False, 'PROPRIETARY': False
},
'GPL-3.0': {
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': False, 'LGPL-3.0': True,
'GPL-2.0': False, 'GPL-3.0': True, 'AGPL-3.0': True, 'PROPRIETARY': False
},
'AGPL-3.0': {
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': False, 'LGPL-3.0': True,
'GPL-2.0': False, 'GPL-3.0': True, 'AGPL-3.0': True, 'PROPRIETARY': False
}
}
def _build_license_patterns(self) -> Dict[str, List[str]]:
"""Build license detection patterns for text analysis."""
return {
'MIT': [
r'MIT License',
r'Permission is hereby granted, free of charge',
r'THE SOFTWARE IS PROVIDED "AS IS"'
],
'Apache-2.0': [
r'Apache License, Version 2\.0',
r'Licensed under the Apache License',
r'http://www\.apache\.org/licenses/LICENSE-2\.0'
],
'GPL-2.0': [
r'GNU GENERAL PUBLIC LICENSE\s+Version 2',
r'This program is free software.*GPL.*version 2',
r'http://www\.gnu\.org/licenses/gpl-2\.0'
],
'GPL-3.0': [
r'GNU GENERAL PUBLIC LICENSE\s+Version 3',
r'This program is free software.*GPL.*version 3',
r'http://www\.gnu\.org/licenses/gpl-3\.0'
],
'BSD-3-Clause': [
r'BSD 3-Clause License',
r'Redistributions of source code must retain',
r'Neither the name.*may be used to endorse'
],
'BSD-2-Clause': [
r'BSD 2-Clause License',
r'Redistributions of source code must retain.*Redistributions in binary form'
]
}
def analyze_project(self, project_path: str, dependency_inventory: Optional[str] = None) -> Dict[str, Any]:
"""Analyze license compliance for a project."""
project_path = Path(project_path)
analysis_results = {
'timestamp': datetime.now().isoformat(),
'project_path': str(project_path),
'project_license': self._detect_project_license(project_path),
'dependencies': [],
'license_summary': {},
'conflicts': [],
'compliance_score': 0.0,
'risk_assessment': {},
'recommendations': []
}
# Load dependencies from inventory or scan project
if dependency_inventory:
dependencies = self._load_dependency_inventory(dependency_inventory)
else:
dependencies = self._scan_project_dependencies(project_path)
# Analyze each dependency's license
for dep in dependencies:
license_info = self._analyze_dependency_license(dep, project_path)
analysis_results['dependencies'].append(license_info)
# Generate license summary
analysis_results['license_summary'] = self._generate_license_summary(
analysis_results['dependencies']
)
# Detect conflicts
analysis_results['conflicts'] = self._detect_license_conflicts(
analysis_results['project_license'],
analysis_results['dependencies']
)
# Calculate compliance score
analysis_results['compliance_score'] = self._calculate_compliance_score(
analysis_results['dependencies'],
analysis_results['conflicts']
)
# Generate risk assessment
analysis_results['risk_assessment'] = self._generate_risk_assessment(
analysis_results['dependencies'],
analysis_results['conflicts']
)
# Generate recommendations
analysis_results['recommendations'] = self._generate_compliance_recommendations(
analysis_results
)
return analysis_results
def _detect_project_license(self, project_path: Path) -> Optional[str]:
"""Detect the main project license."""
license_files = ['LICENSE', 'LICENSE.txt', 'LICENSE.md', 'COPYING', 'COPYING.txt']
for license_file in license_files:
license_path = project_path / license_file
if license_path.exists():
try:
with open(license_path, 'r', encoding='utf-8') as f:
content = f.read()
# Analyze license content
detected_license = self._detect_license_from_text(content)
if detected_license:
return detected_license
except Exception as e:
print(f"Error reading license file {license_path}: {e}")
return None
def _detect_license_from_text(self, text: str) -> Optional[str]:
"""Detect license type from text content."""
text_upper = text.upper()
for license_id, patterns in self.license_patterns.items():
for pattern in patterns:
if re.search(pattern, text, re.IGNORECASE):
return license_id
# Common license text patterns
if 'MIT' in text_upper and 'PERMISSION IS HEREBY GRANTED' in text_upper:
return 'MIT'
elif 'APACHE LICENSE' in text_upper and 'VERSION 2.0' in text_upper:
return 'Apache-2.0'
elif 'GPL' in text_upper and 'VERSION 2' in text_upper:
return 'GPL-2.0'
elif 'GPL' in text_upper and 'VERSION 3' in text_upper:
return 'GPL-3.0'
return None
def _load_dependency_inventory(self, inventory_path: str) -> List[Dict[str, Any]]:
"""Load dependencies from JSON inventory file."""
try:
with open(inventory_path, 'r') as f:
data = json.load(f)
if 'dependencies' in data:
return data['dependencies']
else:
return data if isinstance(data, list) else []
except Exception as e:
print(f"Error loading dependency inventory: {e}")
return []
def _scan_project_dependencies(self, project_path: Path) -> List[Dict[str, Any]]:
"""Basic dependency scanning - in practice, would integrate with dep_scanner.py."""
dependencies = []
# Simple package.json parsing as example
package_json = project_path / 'package.json'
if package_json.exists():
try:
with open(package_json, 'r') as f:
data = json.load(f)
for dep_type in ['dependencies', 'devDependencies']:
if dep_type in data:
for name, version in data[dep_type].items():
dependencies.append({
'name': name,
'version': version,
'ecosystem': 'npm',
'direct': True
})
except Exception as e:
print(f"Error parsing package.json: {e}")
return dependencies
def _analyze_dependency_license(self, dependency: Dict[str, Any], project_path: Path) -> DependencyLicense:
"""Analyze license information for a single dependency."""
dep_license = DependencyLicense(
name=dependency['name'],
version=dependency.get('version', ''),
ecosystem=dependency.get('ecosystem', ''),
direct=dependency.get('direct', False),
license_declared=dependency.get('license'),
license_detected=None,
license_files=[],
confidence=0.0
)
# Try to detect license from various sources
declared_license = dependency.get('license')
if declared_license:
license_info = self._resolve_license_info(declared_license)
if license_info:
dep_license.license_detected = license_info
dep_license.confidence = 0.9
# For unknown licenses, try to find license files in node_modules (example)
if not dep_license.license_detected and dep_license.ecosystem == 'npm':
node_modules_path = project_path / 'node_modules' / dep_license.name
if node_modules_path.exists():
license_info = self._scan_package_directory(node_modules_path)
if license_info:
dep_license.license_detected = license_info
dep_license.confidence = 0.7
# Default to unknown if no license detected
if not dep_license.license_detected:
dep_license.license_detected = self.license_database['UNKNOWN']
dep_license.confidence = 0.0
return dep_license
def _resolve_license_info(self, license_string: str) -> Optional[LicenseInfo]:
"""Resolve license string to LicenseInfo object."""
if not license_string:
return None
license_string = license_string.strip()
# Direct SPDX ID match
if license_string in self.license_database:
return self.license_database[license_string]
# Common variations and mappings
license_mappings = {
'mit': 'MIT',
'apache': 'Apache-2.0',
'apache-2.0': 'Apache-2.0',
'apache 2.0': 'Apache-2.0',
'bsd': 'BSD-3-Clause',
'bsd-3-clause': 'BSD-3-Clause',
'bsd-2-clause': 'BSD-2-Clause',
'gpl-2.0': 'GPL-2.0',
'gpl-3.0': 'GPL-3.0',
'lgpl-2.1': 'LGPL-2.1',
'lgpl-3.0': 'LGPL-3.0',
'mpl-2.0': 'MPL-2.0',
'isc': 'ISC',
'unlicense': 'MIT', # Treat as permissive
'public domain': 'MIT', # Treat as permissive
'proprietary': 'PROPRIETARY',
'commercial': 'PROPRIETARY'
}
license_lower = license_string.lower()
for pattern, mapped_license in license_mappings.items():
if pattern in license_lower:
return self.license_database.get(mapped_license)
return None
def _scan_package_directory(self, package_path: Path) -> Optional[LicenseInfo]:
"""Scan package directory for license information."""
license_files = ['LICENSE', 'LICENSE.txt', 'LICENSE.md', 'COPYING', 'README.md', 'package.json']
for license_file in license_files:
file_path = package_path / license_file
if file_path.exists():
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
# Try to detect license from content
if license_file == 'package.json':
# Parse JSON for license field
try:
data = json.loads(content)
license_field = data.get('license')
if license_field:
return self._resolve_license_info(license_field)
except:
continue
else:
# Analyze text content
detected_license = self._detect_license_from_text(content)
if detected_license:
return self.license_database.get(detected_license)
except Exception:
continue
return None
def _generate_license_summary(self, dependencies: List[DependencyLicense]) -> Dict[str, Any]:
"""Generate summary of license distribution."""
summary = {
'total_dependencies': len(dependencies),
'license_types': {},
'risk_levels': {},
'unknown_licenses': 0,
'direct_dependencies': 0,
'transitive_dependencies': 0
}
for dep in dependencies:
# Count by license type
license_type = dep.license_detected.license_type.value
summary['license_types'][license_type] = summary['license_types'].get(license_type, 0) + 1
# Count by risk level
risk_level = dep.license_detected.risk_level.value
summary['risk_levels'][risk_level] = summary['risk_levels'].get(risk_level, 0) + 1
# Count unknowns
if dep.license_detected.license_type == LicenseType.UNKNOWN:
summary['unknown_licenses'] += 1
# Count direct vs transitive
if dep.direct:
summary['direct_dependencies'] += 1
else:
summary['transitive_dependencies'] += 1
return summary
def _detect_license_conflicts(self, project_license: Optional[str],
dependencies: List[DependencyLicense]) -> List[LicenseConflict]:
"""Detect license compatibility conflicts."""
conflicts = []
if not project_license:
# If no project license detected, flag as potential issue
for dep in dependencies:
if dep.license_detected.risk_level in [RiskLevel.HIGH, RiskLevel.CRITICAL]:
conflicts.append(LicenseConflict(
dependency1='Project',
license1='Unknown',
dependency2=dep.name,
license2=dep.license_detected.spdx_id or dep.license_detected.name,
conflict_type='Unknown project license',
severity=RiskLevel.HIGH,
description=f'Project license unknown, dependency {dep.name} has {dep.license_detected.risk_level.value} risk license',
resolution_options=['Define project license', 'Review dependency usage']
))
return conflicts
project_license_info = self.license_database.get(project_license)
if not project_license_info:
return conflicts
# Check compatibility with project license
for dep in dependencies:
dep_license_id = dep.license_detected.spdx_id or 'UNKNOWN'
# Check compatibility matrix
if project_license in self.compatibility_matrix:
compatibility = self.compatibility_matrix[project_license].get(dep_license_id, False)
if not compatibility:
severity = self._determine_conflict_severity(project_license_info, dep.license_detected)
conflicts.append(LicenseConflict(
dependency1='Project',
license1=project_license,
dependency2=dep.name,
license2=dep_license_id,
conflict_type='License incompatibility',
severity=severity,
description=f'Project license {project_license} is incompatible with dependency license {dep_license_id}',
resolution_options=self._generate_conflict_resolutions(project_license, dep_license_id)
))
# Check for GPL contamination in permissive projects
if project_license_info.license_type == LicenseType.PERMISSIVE:
for dep in dependencies:
if dep.license_detected.license_type == LicenseType.COPYLEFT_STRONG:
conflicts.append(LicenseConflict(
dependency1='Project',
license1=project_license,
dependency2=dep.name,
license2=dep.license_detected.spdx_id or dep.license_detected.name,
conflict_type='GPL contamination',
severity=RiskLevel.CRITICAL,
description=f'GPL dependency {dep.name} may contaminate permissive project',
resolution_options=['Remove GPL dependency', 'Change project license to GPL',
'Use dynamic linking', 'Find alternative dependency']
))
return conflicts
def _determine_conflict_severity(self, project_license: LicenseInfo, dep_license: LicenseInfo) -> RiskLevel:
"""Determine severity of a license conflict."""
if dep_license.license_type == LicenseType.UNKNOWN:
return RiskLevel.CRITICAL
elif (project_license.license_type == LicenseType.PERMISSIVE and
dep_license.license_type == LicenseType.COPYLEFT_STRONG):
return RiskLevel.CRITICAL
elif dep_license.license_type == LicenseType.PROPRIETARY:
return RiskLevel.HIGH
else:
return RiskLevel.MEDIUM
def _generate_conflict_resolutions(self, project_license: str, dep_license: str) -> List[str]:
"""Generate resolution options for license conflicts."""
resolutions = []
if 'GPL' in dep_license:
resolutions.extend([
'Find alternative non-GPL dependency',
'Use dynamic linking if possible',
'Consider changing project license to GPL-compatible',
'Remove the dependency if not essential'
])
elif dep_license == 'PROPRIETARY':
resolutions.extend([
'Obtain commercial license',
'Find open-source alternative',
'Remove dependency if not essential',
'Negotiate license terms'
])
else:
resolutions.extend([
'Review license compatibility carefully',
'Consult legal counsel',
'Find alternative dependency',
'Consider license exception'
])
return resolutions
def _calculate_compliance_score(self, dependencies: List[DependencyLicense],
conflicts: List[LicenseConflict]) -> float:
"""Calculate overall compliance score (0-100)."""
if not dependencies:
return 100.0
base_score = 100.0
# Deduct points for unknown licenses
unknown_count = sum(1 for dep in dependencies
if dep.license_detected.license_type == LicenseType.UNKNOWN)
base_score -= (unknown_count / len(dependencies)) * 30
# Deduct points for high-risk licenses
high_risk_count = sum(1 for dep in dependencies
if dep.license_detected.risk_level in [RiskLevel.HIGH, RiskLevel.CRITICAL])
base_score -= (high_risk_count / len(dependencies)) * 20
# Deduct points for conflicts
if conflicts:
critical_conflicts = sum(1 for c in conflicts if c.severity == RiskLevel.CRITICAL)
high_conflicts = sum(1 for c in conflicts if c.severity == RiskLevel.HIGH)
base_score -= critical_conflicts * 15
base_score -= high_conflicts * 10
return max(0.0, base_score)
def _generate_risk_assessment(self, dependencies: List[DependencyLicense],
conflicts: List[LicenseConflict]) -> Dict[str, Any]:
"""Generate comprehensive risk assessment."""
return {
'overall_risk': self._calculate_overall_risk(dependencies, conflicts),
'license_risk_breakdown': self._calculate_license_risks(dependencies),
'conflict_summary': {
'total_conflicts': len(conflicts),
'critical_conflicts': len([c for c in conflicts if c.severity == RiskLevel.CRITICAL]),
'high_conflicts': len([c for c in conflicts if c.severity == RiskLevel.HIGH])
},
'distribution_risks': self._assess_distribution_risks(dependencies),
'commercial_risks': self._assess_commercial_risks(dependencies)
}
def _calculate_overall_risk(self, dependencies: List[DependencyLicense],
conflicts: List[LicenseConflict]) -> str:
"""Calculate overall project risk level."""
if any(c.severity == RiskLevel.CRITICAL for c in conflicts):
return 'CRITICAL'
elif any(dep.license_detected.risk_level == RiskLevel.CRITICAL for dep in dependencies):
return 'CRITICAL'
elif any(c.severity == RiskLevel.HIGH for c in conflicts):
return 'HIGH'
elif any(dep.license_detected.risk_level == RiskLevel.HIGH for dep in dependencies):
return 'HIGH'
elif any(dep.license_detected.risk_level == RiskLevel.MEDIUM for dep in dependencies):
return 'MEDIUM'
else:
return 'LOW'
def _calculate_license_risks(self, dependencies: List[DependencyLicense]) -> Dict[str, int]:
"""Calculate breakdown of license risks."""
risks = {'low': 0, 'medium': 0, 'high': 0, 'critical': 0}
for dep in dependencies:
risk_level = dep.license_detected.risk_level.value
risks[risk_level] += 1
return risks
def _assess_distribution_risks(self, dependencies: List[DependencyLicense]) -> List[str]:
"""Assess risks related to software distribution."""
risks = []
gpl_deps = [dep for dep in dependencies
if dep.license_detected.license_type == LicenseType.COPYLEFT_STRONG]
if gpl_deps:
risks.append(f"GPL dependencies require source code disclosure: {[d.name for d in gpl_deps]}")
proprietary_deps = [dep for dep in dependencies
if dep.license_detected.license_type == LicenseType.PROPRIETARY]
if proprietary_deps:
risks.append(f"Proprietary dependencies may require commercial licenses: {[d.name for d in proprietary_deps]}")
unknown_deps = [dep for dep in dependencies
if dep.license_detected.license_type == LicenseType.UNKNOWN]
if unknown_deps:
risks.append(f"Unknown licenses pose legal uncertainty: {[d.name for d in unknown_deps]}")
return risks
def _assess_commercial_risks(self, dependencies: List[DependencyLicense]) -> List[str]:
"""Assess risks for commercial usage."""
risks = []
agpl_deps = [dep for dep in dependencies
if dep.license_detected.spdx_id == 'AGPL-3.0']
if agpl_deps:
risks.append(f"AGPL dependencies trigger copyleft for network services: {[d.name for d in agpl_deps]}")
return risks
def _generate_compliance_recommendations(self, analysis_results: Dict[str, Any]) -> List[str]:
"""Generate actionable compliance recommendations."""
recommendations = []
# Address critical issues first
critical_conflicts = [c for c in analysis_results['conflicts']
if c.severity == RiskLevel.CRITICAL]
if critical_conflicts:
recommendations.append("CRITICAL: Address license conflicts immediately before any distribution")
for conflict in critical_conflicts[:3]: # Top 3
recommendations.append(f"{conflict.description}")
# Unknown licenses
unknown_count = analysis_results['license_summary']['unknown_licenses']
if unknown_count > 0:
recommendations.append(f"Investigate and clarify licenses for {unknown_count} dependencies with unknown licensing")
# GPL contamination
gpl_deps = [dep for dep in analysis_results['dependencies']
if dep.license_detected.license_type == LicenseType.COPYLEFT_STRONG]
if gpl_deps and analysis_results.get('project_license') in ['MIT', 'Apache-2.0', 'BSD-3-Clause']:
recommendations.append("Consider removing GPL dependencies or changing project license for permissive project")
# Compliance score
if analysis_results['compliance_score'] < 70:
recommendations.append("Overall compliance score is low - prioritize license cleanup")
return recommendations
def generate_report(self, analysis_results: Dict[str, Any], format: str = 'text') -> str:
"""Generate compliance report in specified format."""
if format == 'json':
# Convert dataclass objects for JSON serialization
serializable_results = analysis_results.copy()
serializable_results['dependencies'] = [
{
'name': dep.name,
'version': dep.version,
'ecosystem': dep.ecosystem,
'direct': dep.direct,
'license_declared': dep.license_declared,
'license_detected': asdict(dep.license_detected) if dep.license_detected else None,
'confidence': dep.confidence
}
for dep in analysis_results['dependencies']
]
serializable_results['conflicts'] = [asdict(conflict) for conflict in analysis_results['conflicts']]
return json.dumps(serializable_results, indent=2, default=str)
# Text format report
report = []
report.append("=" * 60)
report.append("LICENSE COMPLIANCE REPORT")
report.append("=" * 60)
report.append(f"Analysis Date: {analysis_results['timestamp']}")
report.append(f"Project: {analysis_results['project_path']}")
report.append(f"Project License: {analysis_results['project_license'] or 'Unknown'}")
report.append("")
# Summary
summary = analysis_results['license_summary']
report.append("SUMMARY:")
report.append(f" Total Dependencies: {summary['total_dependencies']}")
report.append(f" Compliance Score: {analysis_results['compliance_score']:.1f}/100")
report.append(f" Overall Risk: {analysis_results['risk_assessment']['overall_risk']}")
report.append(f" License Conflicts: {len(analysis_results['conflicts'])}")
report.append("")
# License distribution
report.append("LICENSE DISTRIBUTION:")
for license_type, count in summary['license_types'].items():
report.append(f" {license_type.title()}: {count}")
report.append("")
# Risk breakdown
report.append("RISK BREAKDOWN:")
for risk_level, count in summary['risk_levels'].items():
report.append(f" {risk_level.title()}: {count}")
report.append("")
# Conflicts
if analysis_results['conflicts']:
report.append("LICENSE CONFLICTS:")
report.append("-" * 30)
for conflict in analysis_results['conflicts']:
report.append(f"Conflict: {conflict.dependency2} ({conflict.license2})")
report.append(f" Issue: {conflict.description}")
report.append(f" Severity: {conflict.severity.value.upper()}")
report.append(f" Resolutions: {', '.join(conflict.resolution_options[:2])}")
report.append("")
# High-risk dependencies
high_risk_deps = [dep for dep in analysis_results['dependencies']
if dep.license_detected.risk_level in [RiskLevel.HIGH, RiskLevel.CRITICAL]]
if high_risk_deps:
report.append("HIGH-RISK DEPENDENCIES:")
report.append("-" * 30)
for dep in high_risk_deps[:10]: # Top 10
license_name = dep.license_detected.spdx_id or dep.license_detected.name
report.append(f" {dep.name} v{dep.version}: {license_name} ({dep.license_detected.risk_level.value.upper()})")
report.append("")
# Recommendations
if analysis_results['recommendations']:
report.append("RECOMMENDATIONS:")
report.append("-" * 20)
for i, rec in enumerate(analysis_results['recommendations'], 1):
report.append(f"{i}. {rec}")
report.append("")
report.append("=" * 60)
return '\n'.join(report)
def main():
"""Main entry point for the license checker."""
parser = argparse.ArgumentParser(
description='Analyze dependency licenses for compliance and conflicts',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python license_checker.py /path/to/project
python license_checker.py . --format json --output compliance.json
python license_checker.py /app --inventory deps.json --policy strict
"""
)
parser.add_argument('project_path',
help='Path to the project directory to analyze')
parser.add_argument('--inventory',
help='Path to dependency inventory JSON file')
parser.add_argument('--format', choices=['text', 'json'], default='text',
help='Output format (default: text)')
parser.add_argument('--output', '-o',
help='Output file path (default: stdout)')
parser.add_argument('--policy', choices=['permissive', 'strict'], default='permissive',
help='License policy strictness (default: permissive)')
parser.add_argument('--warn-conflicts', action='store_true',
help='Show warnings for potential conflicts')
args = parser.parse_args()
try:
checker = LicenseChecker()
results = checker.analyze_project(args.project_path, args.inventory)
report = checker.generate_report(results, args.format)
if args.output:
with open(args.output, 'w') as f:
f.write(report)
print(f"Compliance report saved to {args.output}")
else:
print(report)
# Exit with error code for policy violations
if args.policy == 'strict' and results['compliance_score'] < 80:
sys.exit(1)
if args.warn_conflicts and results['conflicts']:
print("\nWARNING: License conflicts detected!")
sys.exit(2)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()