996 lines
43 KiB
Python
996 lines
43 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
License Checker - Dependency license compliance and conflict analysis tool.
|
|
|
|
This script analyzes dependency licenses from package metadata, classifies them
|
|
into risk categories, detects license conflicts, and generates compliance
|
|
reports with actionable recommendations for legal risk management.
|
|
|
|
Author: Claude Skills Engineering Team
|
|
License: MIT
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import argparse
|
|
from typing import Dict, List, Set, Any, Optional, Tuple
|
|
from pathlib import Path
|
|
from dataclasses import dataclass, asdict
|
|
from datetime import datetime
|
|
import re
|
|
from enum import Enum
|
|
|
|
class LicenseType(Enum):
|
|
"""License classification types."""
|
|
PERMISSIVE = "permissive"
|
|
COPYLEFT_STRONG = "copyleft_strong"
|
|
COPYLEFT_WEAK = "copyleft_weak"
|
|
PROPRIETARY = "proprietary"
|
|
DUAL = "dual"
|
|
UNKNOWN = "unknown"
|
|
|
|
class RiskLevel(Enum):
|
|
"""Risk assessment levels."""
|
|
LOW = "low"
|
|
MEDIUM = "medium"
|
|
HIGH = "high"
|
|
CRITICAL = "critical"
|
|
|
|
@dataclass
|
|
class LicenseInfo:
|
|
"""Represents license information for a dependency."""
|
|
name: str
|
|
spdx_id: Optional[str]
|
|
license_type: LicenseType
|
|
risk_level: RiskLevel
|
|
description: str
|
|
restrictions: List[str]
|
|
obligations: List[str]
|
|
compatibility: Dict[str, bool]
|
|
|
|
@dataclass
|
|
class DependencyLicense:
|
|
"""Represents a dependency with its license information."""
|
|
name: str
|
|
version: str
|
|
ecosystem: str
|
|
direct: bool
|
|
license_declared: Optional[str]
|
|
license_detected: Optional[LicenseInfo]
|
|
license_files: List[str]
|
|
confidence: float
|
|
|
|
@dataclass
|
|
class LicenseConflict:
|
|
"""Represents a license compatibility conflict."""
|
|
dependency1: str
|
|
license1: str
|
|
dependency2: str
|
|
license2: str
|
|
conflict_type: str
|
|
severity: RiskLevel
|
|
description: str
|
|
resolution_options: List[str]
|
|
|
|
class LicenseChecker:
|
|
"""Main license checking and compliance analysis class."""
|
|
|
|
def __init__(self):
|
|
self.license_database = self._build_license_database()
|
|
self.compatibility_matrix = self._build_compatibility_matrix()
|
|
self.license_patterns = self._build_license_patterns()
|
|
|
|
def _build_license_database(self) -> Dict[str, LicenseInfo]:
|
|
"""Build comprehensive license database with risk classifications."""
|
|
return {
|
|
# Permissive Licenses (Low Risk)
|
|
'MIT': LicenseInfo(
|
|
name='MIT License',
|
|
spdx_id='MIT',
|
|
license_type=LicenseType.PERMISSIVE,
|
|
risk_level=RiskLevel.LOW,
|
|
description='Very permissive license with minimal restrictions',
|
|
restrictions=['Include copyright notice', 'Include license text'],
|
|
obligations=['Attribution'],
|
|
compatibility={
|
|
'commercial': True, 'modification': True, 'distribution': True,
|
|
'private_use': True, 'patent_grant': False
|
|
}
|
|
),
|
|
|
|
'Apache-2.0': LicenseInfo(
|
|
name='Apache License 2.0',
|
|
spdx_id='Apache-2.0',
|
|
license_type=LicenseType.PERMISSIVE,
|
|
risk_level=RiskLevel.LOW,
|
|
description='Permissive license with patent protection',
|
|
restrictions=['Include copyright notice', 'Include license text',
|
|
'State changes', 'Include NOTICE file'],
|
|
obligations=['Attribution', 'Patent grant'],
|
|
compatibility={
|
|
'commercial': True, 'modification': True, 'distribution': True,
|
|
'private_use': True, 'patent_grant': True
|
|
}
|
|
),
|
|
|
|
'BSD-3-Clause': LicenseInfo(
|
|
name='BSD 3-Clause License',
|
|
spdx_id='BSD-3-Clause',
|
|
license_type=LicenseType.PERMISSIVE,
|
|
risk_level=RiskLevel.LOW,
|
|
description='Permissive license with non-endorsement clause',
|
|
restrictions=['Include copyright notice', 'Include license text',
|
|
'No endorsement using author names'],
|
|
obligations=['Attribution'],
|
|
compatibility={
|
|
'commercial': True, 'modification': True, 'distribution': True,
|
|
'private_use': True, 'patent_grant': False
|
|
}
|
|
),
|
|
|
|
'BSD-2-Clause': LicenseInfo(
|
|
name='BSD 2-Clause License',
|
|
spdx_id='BSD-2-Clause',
|
|
license_type=LicenseType.PERMISSIVE,
|
|
risk_level=RiskLevel.LOW,
|
|
description='Very permissive license similar to MIT',
|
|
restrictions=['Include copyright notice', 'Include license text'],
|
|
obligations=['Attribution'],
|
|
compatibility={
|
|
'commercial': True, 'modification': True, 'distribution': True,
|
|
'private_use': True, 'patent_grant': False
|
|
}
|
|
),
|
|
|
|
'ISC': LicenseInfo(
|
|
name='ISC License',
|
|
spdx_id='ISC',
|
|
license_type=LicenseType.PERMISSIVE,
|
|
risk_level=RiskLevel.LOW,
|
|
description='Functionally equivalent to MIT license',
|
|
restrictions=['Include copyright notice'],
|
|
obligations=['Attribution'],
|
|
compatibility={
|
|
'commercial': True, 'modification': True, 'distribution': True,
|
|
'private_use': True, 'patent_grant': False
|
|
}
|
|
),
|
|
|
|
# Weak Copyleft Licenses (Medium Risk)
|
|
'MPL-2.0': LicenseInfo(
|
|
name='Mozilla Public License 2.0',
|
|
spdx_id='MPL-2.0',
|
|
license_type=LicenseType.COPYLEFT_WEAK,
|
|
risk_level=RiskLevel.MEDIUM,
|
|
description='File-level copyleft license',
|
|
restrictions=['Disclose source of modified files', 'Include copyright notice',
|
|
'Include license text', 'State changes'],
|
|
obligations=['Source disclosure (modified files only)'],
|
|
compatibility={
|
|
'commercial': True, 'modification': True, 'distribution': True,
|
|
'private_use': True, 'patent_grant': True
|
|
}
|
|
),
|
|
|
|
'LGPL-2.1': LicenseInfo(
|
|
name='GNU Lesser General Public License 2.1',
|
|
spdx_id='LGPL-2.1',
|
|
license_type=LicenseType.COPYLEFT_WEAK,
|
|
risk_level=RiskLevel.MEDIUM,
|
|
description='Library-level copyleft license',
|
|
restrictions=['Disclose source of library modifications', 'Include copyright notice',
|
|
'Include license text', 'Allow relinking'],
|
|
obligations=['Source disclosure (library modifications)', 'Dynamic linking preferred'],
|
|
compatibility={
|
|
'commercial': True, 'modification': True, 'distribution': True,
|
|
'private_use': True, 'patent_grant': False
|
|
}
|
|
),
|
|
|
|
'LGPL-3.0': LicenseInfo(
|
|
name='GNU Lesser General Public License 3.0',
|
|
spdx_id='LGPL-3.0',
|
|
license_type=LicenseType.COPYLEFT_WEAK,
|
|
risk_level=RiskLevel.MEDIUM,
|
|
description='Library-level copyleft with patent provisions',
|
|
restrictions=['Disclose source of library modifications', 'Include copyright notice',
|
|
'Include license text', 'Allow relinking', 'Anti-tivoization'],
|
|
obligations=['Source disclosure (library modifications)', 'Patent grant'],
|
|
compatibility={
|
|
'commercial': True, 'modification': True, 'distribution': True,
|
|
'private_use': True, 'patent_grant': True
|
|
}
|
|
),
|
|
|
|
# Strong Copyleft Licenses (High Risk)
|
|
'GPL-2.0': LicenseInfo(
|
|
name='GNU General Public License 2.0',
|
|
spdx_id='GPL-2.0',
|
|
license_type=LicenseType.COPYLEFT_STRONG,
|
|
risk_level=RiskLevel.HIGH,
|
|
description='Strong copyleft requiring full source disclosure',
|
|
restrictions=['Disclose entire source code', 'Include copyright notice',
|
|
'Include license text', 'Use same license'],
|
|
obligations=['Full source disclosure', 'License compatibility'],
|
|
compatibility={
|
|
'commercial': False, 'modification': True, 'distribution': True,
|
|
'private_use': True, 'patent_grant': False
|
|
}
|
|
),
|
|
|
|
'GPL-3.0': LicenseInfo(
|
|
name='GNU General Public License 3.0',
|
|
spdx_id='GPL-3.0',
|
|
license_type=LicenseType.COPYLEFT_STRONG,
|
|
risk_level=RiskLevel.HIGH,
|
|
description='Strong copyleft with patent and hardware provisions',
|
|
restrictions=['Disclose entire source code', 'Include copyright notice',
|
|
'Include license text', 'Use same license', 'Anti-tivoization'],
|
|
obligations=['Full source disclosure', 'Patent grant', 'License compatibility'],
|
|
compatibility={
|
|
'commercial': False, 'modification': True, 'distribution': True,
|
|
'private_use': True, 'patent_grant': True
|
|
}
|
|
),
|
|
|
|
'AGPL-3.0': LicenseInfo(
|
|
name='GNU Affero General Public License 3.0',
|
|
spdx_id='AGPL-3.0',
|
|
license_type=LicenseType.COPYLEFT_STRONG,
|
|
risk_level=RiskLevel.CRITICAL,
|
|
description='Network copyleft extending GPL to SaaS',
|
|
restrictions=['Disclose entire source code', 'Include copyright notice',
|
|
'Include license text', 'Use same license', 'Network use triggers copyleft'],
|
|
obligations=['Full source disclosure', 'Network service source disclosure'],
|
|
compatibility={
|
|
'commercial': False, 'modification': True, 'distribution': True,
|
|
'private_use': True, 'patent_grant': True
|
|
}
|
|
),
|
|
|
|
# Proprietary/Commercial Licenses (High Risk)
|
|
'PROPRIETARY': LicenseInfo(
|
|
name='Proprietary License',
|
|
spdx_id=None,
|
|
license_type=LicenseType.PROPRIETARY,
|
|
risk_level=RiskLevel.HIGH,
|
|
description='Commercial or custom proprietary license',
|
|
restrictions=['Varies by license', 'Often no redistribution',
|
|
'May require commercial license'],
|
|
obligations=['License agreement compliance', 'Payment obligations'],
|
|
compatibility={
|
|
'commercial': False, 'modification': False, 'distribution': False,
|
|
'private_use': True, 'patent_grant': False
|
|
}
|
|
),
|
|
|
|
# Unknown/Unlicensed (Critical Risk)
|
|
'UNKNOWN': LicenseInfo(
|
|
name='Unknown License',
|
|
spdx_id=None,
|
|
license_type=LicenseType.UNKNOWN,
|
|
risk_level=RiskLevel.CRITICAL,
|
|
description='No license detected or ambiguous licensing',
|
|
restrictions=['Unknown', 'Assume no rights granted'],
|
|
obligations=['Investigate and clarify licensing'],
|
|
compatibility={
|
|
'commercial': False, 'modification': False, 'distribution': False,
|
|
'private_use': False, 'patent_grant': False
|
|
}
|
|
)
|
|
}
|
|
|
|
def _build_compatibility_matrix(self) -> Dict[str, Dict[str, bool]]:
|
|
"""Build license compatibility matrix."""
|
|
return {
|
|
'MIT': {
|
|
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
|
|
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': True, 'LGPL-3.0': True,
|
|
'GPL-2.0': False, 'GPL-3.0': False, 'AGPL-3.0': False, 'PROPRIETARY': False
|
|
},
|
|
'Apache-2.0': {
|
|
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
|
|
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': False, 'LGPL-3.0': True,
|
|
'GPL-2.0': False, 'GPL-3.0': True, 'AGPL-3.0': True, 'PROPRIETARY': False
|
|
},
|
|
'GPL-2.0': {
|
|
'MIT': True, 'Apache-2.0': False, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
|
|
'ISC': True, 'MPL-2.0': False, 'LGPL-2.1': True, 'LGPL-3.0': False,
|
|
'GPL-2.0': True, 'GPL-3.0': False, 'AGPL-3.0': False, 'PROPRIETARY': False
|
|
},
|
|
'GPL-3.0': {
|
|
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
|
|
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': False, 'LGPL-3.0': True,
|
|
'GPL-2.0': False, 'GPL-3.0': True, 'AGPL-3.0': True, 'PROPRIETARY': False
|
|
},
|
|
'AGPL-3.0': {
|
|
'MIT': True, 'Apache-2.0': True, 'BSD-3-Clause': True, 'BSD-2-Clause': True,
|
|
'ISC': True, 'MPL-2.0': True, 'LGPL-2.1': False, 'LGPL-3.0': True,
|
|
'GPL-2.0': False, 'GPL-3.0': True, 'AGPL-3.0': True, 'PROPRIETARY': False
|
|
}
|
|
}
|
|
|
|
def _build_license_patterns(self) -> Dict[str, List[str]]:
|
|
"""Build license detection patterns for text analysis."""
|
|
return {
|
|
'MIT': [
|
|
r'MIT License',
|
|
r'Permission is hereby granted, free of charge',
|
|
r'THE SOFTWARE IS PROVIDED "AS IS"'
|
|
],
|
|
'Apache-2.0': [
|
|
r'Apache License, Version 2\.0',
|
|
r'Licensed under the Apache License',
|
|
r'http://www\.apache\.org/licenses/LICENSE-2\.0'
|
|
],
|
|
'GPL-2.0': [
|
|
r'GNU GENERAL PUBLIC LICENSE\s+Version 2',
|
|
r'This program is free software.*GPL.*version 2',
|
|
r'http://www\.gnu\.org/licenses/gpl-2\.0'
|
|
],
|
|
'GPL-3.0': [
|
|
r'GNU GENERAL PUBLIC LICENSE\s+Version 3',
|
|
r'This program is free software.*GPL.*version 3',
|
|
r'http://www\.gnu\.org/licenses/gpl-3\.0'
|
|
],
|
|
'BSD-3-Clause': [
|
|
r'BSD 3-Clause License',
|
|
r'Redistributions of source code must retain',
|
|
r'Neither the name.*may be used to endorse'
|
|
],
|
|
'BSD-2-Clause': [
|
|
r'BSD 2-Clause License',
|
|
r'Redistributions of source code must retain.*Redistributions in binary form'
|
|
]
|
|
}
|
|
|
|
def analyze_project(self, project_path: str, dependency_inventory: Optional[str] = None) -> Dict[str, Any]:
|
|
"""Analyze license compliance for a project."""
|
|
project_path = Path(project_path)
|
|
|
|
analysis_results = {
|
|
'timestamp': datetime.now().isoformat(),
|
|
'project_path': str(project_path),
|
|
'project_license': self._detect_project_license(project_path),
|
|
'dependencies': [],
|
|
'license_summary': {},
|
|
'conflicts': [],
|
|
'compliance_score': 0.0,
|
|
'risk_assessment': {},
|
|
'recommendations': []
|
|
}
|
|
|
|
# Load dependencies from inventory or scan project
|
|
if dependency_inventory:
|
|
dependencies = self._load_dependency_inventory(dependency_inventory)
|
|
else:
|
|
dependencies = self._scan_project_dependencies(project_path)
|
|
|
|
# Analyze each dependency's license
|
|
for dep in dependencies:
|
|
license_info = self._analyze_dependency_license(dep, project_path)
|
|
analysis_results['dependencies'].append(license_info)
|
|
|
|
# Generate license summary
|
|
analysis_results['license_summary'] = self._generate_license_summary(
|
|
analysis_results['dependencies']
|
|
)
|
|
|
|
# Detect conflicts
|
|
analysis_results['conflicts'] = self._detect_license_conflicts(
|
|
analysis_results['project_license'],
|
|
analysis_results['dependencies']
|
|
)
|
|
|
|
# Calculate compliance score
|
|
analysis_results['compliance_score'] = self._calculate_compliance_score(
|
|
analysis_results['dependencies'],
|
|
analysis_results['conflicts']
|
|
)
|
|
|
|
# Generate risk assessment
|
|
analysis_results['risk_assessment'] = self._generate_risk_assessment(
|
|
analysis_results['dependencies'],
|
|
analysis_results['conflicts']
|
|
)
|
|
|
|
# Generate recommendations
|
|
analysis_results['recommendations'] = self._generate_compliance_recommendations(
|
|
analysis_results
|
|
)
|
|
|
|
return analysis_results
|
|
|
|
def _detect_project_license(self, project_path: Path) -> Optional[str]:
|
|
"""Detect the main project license."""
|
|
license_files = ['LICENSE', 'LICENSE.txt', 'LICENSE.md', 'COPYING', 'COPYING.txt']
|
|
|
|
for license_file in license_files:
|
|
license_path = project_path / license_file
|
|
if license_path.exists():
|
|
try:
|
|
with open(license_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Analyze license content
|
|
detected_license = self._detect_license_from_text(content)
|
|
if detected_license:
|
|
return detected_license
|
|
except Exception as e:
|
|
print(f"Error reading license file {license_path}: {e}")
|
|
|
|
return None
|
|
|
|
def _detect_license_from_text(self, text: str) -> Optional[str]:
|
|
"""Detect license type from text content."""
|
|
text_upper = text.upper()
|
|
|
|
for license_id, patterns in self.license_patterns.items():
|
|
for pattern in patterns:
|
|
if re.search(pattern, text, re.IGNORECASE):
|
|
return license_id
|
|
|
|
# Common license text patterns
|
|
if 'MIT' in text_upper and 'PERMISSION IS HEREBY GRANTED' in text_upper:
|
|
return 'MIT'
|
|
elif 'APACHE LICENSE' in text_upper and 'VERSION 2.0' in text_upper:
|
|
return 'Apache-2.0'
|
|
elif 'GPL' in text_upper and 'VERSION 2' in text_upper:
|
|
return 'GPL-2.0'
|
|
elif 'GPL' in text_upper and 'VERSION 3' in text_upper:
|
|
return 'GPL-3.0'
|
|
|
|
return None
|
|
|
|
def _load_dependency_inventory(self, inventory_path: str) -> List[Dict[str, Any]]:
|
|
"""Load dependencies from JSON inventory file."""
|
|
try:
|
|
with open(inventory_path, 'r') as f:
|
|
data = json.load(f)
|
|
|
|
if 'dependencies' in data:
|
|
return data['dependencies']
|
|
else:
|
|
return data if isinstance(data, list) else []
|
|
except Exception as e:
|
|
print(f"Error loading dependency inventory: {e}")
|
|
return []
|
|
|
|
def _scan_project_dependencies(self, project_path: Path) -> List[Dict[str, Any]]:
|
|
"""Basic dependency scanning - in practice, would integrate with dep_scanner.py."""
|
|
dependencies = []
|
|
|
|
# Simple package.json parsing as example
|
|
package_json = project_path / 'package.json'
|
|
if package_json.exists():
|
|
try:
|
|
with open(package_json, 'r') as f:
|
|
data = json.load(f)
|
|
|
|
for dep_type in ['dependencies', 'devDependencies']:
|
|
if dep_type in data:
|
|
for name, version in data[dep_type].items():
|
|
dependencies.append({
|
|
'name': name,
|
|
'version': version,
|
|
'ecosystem': 'npm',
|
|
'direct': True
|
|
})
|
|
except Exception as e:
|
|
print(f"Error parsing package.json: {e}")
|
|
|
|
return dependencies
|
|
|
|
def _analyze_dependency_license(self, dependency: Dict[str, Any], project_path: Path) -> DependencyLicense:
|
|
"""Analyze license information for a single dependency."""
|
|
dep_license = DependencyLicense(
|
|
name=dependency['name'],
|
|
version=dependency.get('version', ''),
|
|
ecosystem=dependency.get('ecosystem', ''),
|
|
direct=dependency.get('direct', False),
|
|
license_declared=dependency.get('license'),
|
|
license_detected=None,
|
|
license_files=[],
|
|
confidence=0.0
|
|
)
|
|
|
|
# Try to detect license from various sources
|
|
declared_license = dependency.get('license')
|
|
if declared_license:
|
|
license_info = self._resolve_license_info(declared_license)
|
|
if license_info:
|
|
dep_license.license_detected = license_info
|
|
dep_license.confidence = 0.9
|
|
|
|
# For unknown licenses, try to find license files in node_modules (example)
|
|
if not dep_license.license_detected and dep_license.ecosystem == 'npm':
|
|
node_modules_path = project_path / 'node_modules' / dep_license.name
|
|
if node_modules_path.exists():
|
|
license_info = self._scan_package_directory(node_modules_path)
|
|
if license_info:
|
|
dep_license.license_detected = license_info
|
|
dep_license.confidence = 0.7
|
|
|
|
# Default to unknown if no license detected
|
|
if not dep_license.license_detected:
|
|
dep_license.license_detected = self.license_database['UNKNOWN']
|
|
dep_license.confidence = 0.0
|
|
|
|
return dep_license
|
|
|
|
def _resolve_license_info(self, license_string: str) -> Optional[LicenseInfo]:
|
|
"""Resolve license string to LicenseInfo object."""
|
|
if not license_string:
|
|
return None
|
|
|
|
license_string = license_string.strip()
|
|
|
|
# Direct SPDX ID match
|
|
if license_string in self.license_database:
|
|
return self.license_database[license_string]
|
|
|
|
# Common variations and mappings
|
|
license_mappings = {
|
|
'mit': 'MIT',
|
|
'apache': 'Apache-2.0',
|
|
'apache-2.0': 'Apache-2.0',
|
|
'apache 2.0': 'Apache-2.0',
|
|
'bsd': 'BSD-3-Clause',
|
|
'bsd-3-clause': 'BSD-3-Clause',
|
|
'bsd-2-clause': 'BSD-2-Clause',
|
|
'gpl-2.0': 'GPL-2.0',
|
|
'gpl-3.0': 'GPL-3.0',
|
|
'lgpl-2.1': 'LGPL-2.1',
|
|
'lgpl-3.0': 'LGPL-3.0',
|
|
'mpl-2.0': 'MPL-2.0',
|
|
'isc': 'ISC',
|
|
'unlicense': 'MIT', # Treat as permissive
|
|
'public domain': 'MIT', # Treat as permissive
|
|
'proprietary': 'PROPRIETARY',
|
|
'commercial': 'PROPRIETARY'
|
|
}
|
|
|
|
license_lower = license_string.lower()
|
|
for pattern, mapped_license in license_mappings.items():
|
|
if pattern in license_lower:
|
|
return self.license_database.get(mapped_license)
|
|
|
|
return None
|
|
|
|
def _scan_package_directory(self, package_path: Path) -> Optional[LicenseInfo]:
|
|
"""Scan package directory for license information."""
|
|
license_files = ['LICENSE', 'LICENSE.txt', 'LICENSE.md', 'COPYING', 'README.md', 'package.json']
|
|
|
|
for license_file in license_files:
|
|
file_path = package_path / license_file
|
|
if file_path.exists():
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
content = f.read()
|
|
|
|
# Try to detect license from content
|
|
if license_file == 'package.json':
|
|
# Parse JSON for license field
|
|
try:
|
|
data = json.loads(content)
|
|
license_field = data.get('license')
|
|
if license_field:
|
|
return self._resolve_license_info(license_field)
|
|
except:
|
|
continue
|
|
else:
|
|
# Analyze text content
|
|
detected_license = self._detect_license_from_text(content)
|
|
if detected_license:
|
|
return self.license_database.get(detected_license)
|
|
except Exception:
|
|
continue
|
|
|
|
return None
|
|
|
|
def _generate_license_summary(self, dependencies: List[DependencyLicense]) -> Dict[str, Any]:
|
|
"""Generate summary of license distribution."""
|
|
summary = {
|
|
'total_dependencies': len(dependencies),
|
|
'license_types': {},
|
|
'risk_levels': {},
|
|
'unknown_licenses': 0,
|
|
'direct_dependencies': 0,
|
|
'transitive_dependencies': 0
|
|
}
|
|
|
|
for dep in dependencies:
|
|
# Count by license type
|
|
license_type = dep.license_detected.license_type.value
|
|
summary['license_types'][license_type] = summary['license_types'].get(license_type, 0) + 1
|
|
|
|
# Count by risk level
|
|
risk_level = dep.license_detected.risk_level.value
|
|
summary['risk_levels'][risk_level] = summary['risk_levels'].get(risk_level, 0) + 1
|
|
|
|
# Count unknowns
|
|
if dep.license_detected.license_type == LicenseType.UNKNOWN:
|
|
summary['unknown_licenses'] += 1
|
|
|
|
# Count direct vs transitive
|
|
if dep.direct:
|
|
summary['direct_dependencies'] += 1
|
|
else:
|
|
summary['transitive_dependencies'] += 1
|
|
|
|
return summary
|
|
|
|
def _detect_license_conflicts(self, project_license: Optional[str],
|
|
dependencies: List[DependencyLicense]) -> List[LicenseConflict]:
|
|
"""Detect license compatibility conflicts."""
|
|
conflicts = []
|
|
|
|
if not project_license:
|
|
# If no project license detected, flag as potential issue
|
|
for dep in dependencies:
|
|
if dep.license_detected.risk_level in [RiskLevel.HIGH, RiskLevel.CRITICAL]:
|
|
conflicts.append(LicenseConflict(
|
|
dependency1='Project',
|
|
license1='Unknown',
|
|
dependency2=dep.name,
|
|
license2=dep.license_detected.spdx_id or dep.license_detected.name,
|
|
conflict_type='Unknown project license',
|
|
severity=RiskLevel.HIGH,
|
|
description=f'Project license unknown, dependency {dep.name} has {dep.license_detected.risk_level.value} risk license',
|
|
resolution_options=['Define project license', 'Review dependency usage']
|
|
))
|
|
return conflicts
|
|
|
|
project_license_info = self.license_database.get(project_license)
|
|
if not project_license_info:
|
|
return conflicts
|
|
|
|
# Check compatibility with project license
|
|
for dep in dependencies:
|
|
dep_license_id = dep.license_detected.spdx_id or 'UNKNOWN'
|
|
|
|
# Check compatibility matrix
|
|
if project_license in self.compatibility_matrix:
|
|
compatibility = self.compatibility_matrix[project_license].get(dep_license_id, False)
|
|
|
|
if not compatibility:
|
|
severity = self._determine_conflict_severity(project_license_info, dep.license_detected)
|
|
|
|
conflicts.append(LicenseConflict(
|
|
dependency1='Project',
|
|
license1=project_license,
|
|
dependency2=dep.name,
|
|
license2=dep_license_id,
|
|
conflict_type='License incompatibility',
|
|
severity=severity,
|
|
description=f'Project license {project_license} is incompatible with dependency license {dep_license_id}',
|
|
resolution_options=self._generate_conflict_resolutions(project_license, dep_license_id)
|
|
))
|
|
|
|
# Check for GPL contamination in permissive projects
|
|
if project_license_info.license_type == LicenseType.PERMISSIVE:
|
|
for dep in dependencies:
|
|
if dep.license_detected.license_type == LicenseType.COPYLEFT_STRONG:
|
|
conflicts.append(LicenseConflict(
|
|
dependency1='Project',
|
|
license1=project_license,
|
|
dependency2=dep.name,
|
|
license2=dep.license_detected.spdx_id or dep.license_detected.name,
|
|
conflict_type='GPL contamination',
|
|
severity=RiskLevel.CRITICAL,
|
|
description=f'GPL dependency {dep.name} may contaminate permissive project',
|
|
resolution_options=['Remove GPL dependency', 'Change project license to GPL',
|
|
'Use dynamic linking', 'Find alternative dependency']
|
|
))
|
|
|
|
return conflicts
|
|
|
|
def _determine_conflict_severity(self, project_license: LicenseInfo, dep_license: LicenseInfo) -> RiskLevel:
|
|
"""Determine severity of a license conflict."""
|
|
if dep_license.license_type == LicenseType.UNKNOWN:
|
|
return RiskLevel.CRITICAL
|
|
elif (project_license.license_type == LicenseType.PERMISSIVE and
|
|
dep_license.license_type == LicenseType.COPYLEFT_STRONG):
|
|
return RiskLevel.CRITICAL
|
|
elif dep_license.license_type == LicenseType.PROPRIETARY:
|
|
return RiskLevel.HIGH
|
|
else:
|
|
return RiskLevel.MEDIUM
|
|
|
|
def _generate_conflict_resolutions(self, project_license: str, dep_license: str) -> List[str]:
|
|
"""Generate resolution options for license conflicts."""
|
|
resolutions = []
|
|
|
|
if 'GPL' in dep_license:
|
|
resolutions.extend([
|
|
'Find alternative non-GPL dependency',
|
|
'Use dynamic linking if possible',
|
|
'Consider changing project license to GPL-compatible',
|
|
'Remove the dependency if not essential'
|
|
])
|
|
elif dep_license == 'PROPRIETARY':
|
|
resolutions.extend([
|
|
'Obtain commercial license',
|
|
'Find open-source alternative',
|
|
'Remove dependency if not essential',
|
|
'Negotiate license terms'
|
|
])
|
|
else:
|
|
resolutions.extend([
|
|
'Review license compatibility carefully',
|
|
'Consult legal counsel',
|
|
'Find alternative dependency',
|
|
'Consider license exception'
|
|
])
|
|
|
|
return resolutions
|
|
|
|
def _calculate_compliance_score(self, dependencies: List[DependencyLicense],
|
|
conflicts: List[LicenseConflict]) -> float:
|
|
"""Calculate overall compliance score (0-100)."""
|
|
if not dependencies:
|
|
return 100.0
|
|
|
|
base_score = 100.0
|
|
|
|
# Deduct points for unknown licenses
|
|
unknown_count = sum(1 for dep in dependencies
|
|
if dep.license_detected.license_type == LicenseType.UNKNOWN)
|
|
base_score -= (unknown_count / len(dependencies)) * 30
|
|
|
|
# Deduct points for high-risk licenses
|
|
high_risk_count = sum(1 for dep in dependencies
|
|
if dep.license_detected.risk_level in [RiskLevel.HIGH, RiskLevel.CRITICAL])
|
|
base_score -= (high_risk_count / len(dependencies)) * 20
|
|
|
|
# Deduct points for conflicts
|
|
if conflicts:
|
|
critical_conflicts = sum(1 for c in conflicts if c.severity == RiskLevel.CRITICAL)
|
|
high_conflicts = sum(1 for c in conflicts if c.severity == RiskLevel.HIGH)
|
|
|
|
base_score -= critical_conflicts * 15
|
|
base_score -= high_conflicts * 10
|
|
|
|
return max(0.0, base_score)
|
|
|
|
def _generate_risk_assessment(self, dependencies: List[DependencyLicense],
|
|
conflicts: List[LicenseConflict]) -> Dict[str, Any]:
|
|
"""Generate comprehensive risk assessment."""
|
|
return {
|
|
'overall_risk': self._calculate_overall_risk(dependencies, conflicts),
|
|
'license_risk_breakdown': self._calculate_license_risks(dependencies),
|
|
'conflict_summary': {
|
|
'total_conflicts': len(conflicts),
|
|
'critical_conflicts': len([c for c in conflicts if c.severity == RiskLevel.CRITICAL]),
|
|
'high_conflicts': len([c for c in conflicts if c.severity == RiskLevel.HIGH])
|
|
},
|
|
'distribution_risks': self._assess_distribution_risks(dependencies),
|
|
'commercial_risks': self._assess_commercial_risks(dependencies)
|
|
}
|
|
|
|
def _calculate_overall_risk(self, dependencies: List[DependencyLicense],
|
|
conflicts: List[LicenseConflict]) -> str:
|
|
"""Calculate overall project risk level."""
|
|
if any(c.severity == RiskLevel.CRITICAL for c in conflicts):
|
|
return 'CRITICAL'
|
|
elif any(dep.license_detected.risk_level == RiskLevel.CRITICAL for dep in dependencies):
|
|
return 'CRITICAL'
|
|
elif any(c.severity == RiskLevel.HIGH for c in conflicts):
|
|
return 'HIGH'
|
|
elif any(dep.license_detected.risk_level == RiskLevel.HIGH for dep in dependencies):
|
|
return 'HIGH'
|
|
elif any(dep.license_detected.risk_level == RiskLevel.MEDIUM for dep in dependencies):
|
|
return 'MEDIUM'
|
|
else:
|
|
return 'LOW'
|
|
|
|
def _calculate_license_risks(self, dependencies: List[DependencyLicense]) -> Dict[str, int]:
|
|
"""Calculate breakdown of license risks."""
|
|
risks = {'low': 0, 'medium': 0, 'high': 0, 'critical': 0}
|
|
|
|
for dep in dependencies:
|
|
risk_level = dep.license_detected.risk_level.value
|
|
risks[risk_level] += 1
|
|
|
|
return risks
|
|
|
|
def _assess_distribution_risks(self, dependencies: List[DependencyLicense]) -> List[str]:
|
|
"""Assess risks related to software distribution."""
|
|
risks = []
|
|
|
|
gpl_deps = [dep for dep in dependencies
|
|
if dep.license_detected.license_type == LicenseType.COPYLEFT_STRONG]
|
|
if gpl_deps:
|
|
risks.append(f"GPL dependencies require source code disclosure: {[d.name for d in gpl_deps]}")
|
|
|
|
proprietary_deps = [dep for dep in dependencies
|
|
if dep.license_detected.license_type == LicenseType.PROPRIETARY]
|
|
if proprietary_deps:
|
|
risks.append(f"Proprietary dependencies may require commercial licenses: {[d.name for d in proprietary_deps]}")
|
|
|
|
unknown_deps = [dep for dep in dependencies
|
|
if dep.license_detected.license_type == LicenseType.UNKNOWN]
|
|
if unknown_deps:
|
|
risks.append(f"Unknown licenses pose legal uncertainty: {[d.name for d in unknown_deps]}")
|
|
|
|
return risks
|
|
|
|
def _assess_commercial_risks(self, dependencies: List[DependencyLicense]) -> List[str]:
|
|
"""Assess risks for commercial usage."""
|
|
risks = []
|
|
|
|
agpl_deps = [dep for dep in dependencies
|
|
if dep.license_detected.spdx_id == 'AGPL-3.0']
|
|
if agpl_deps:
|
|
risks.append(f"AGPL dependencies trigger copyleft for network services: {[d.name for d in agpl_deps]}")
|
|
|
|
return risks
|
|
|
|
def _generate_compliance_recommendations(self, analysis_results: Dict[str, Any]) -> List[str]:
|
|
"""Generate actionable compliance recommendations."""
|
|
recommendations = []
|
|
|
|
# Address critical issues first
|
|
critical_conflicts = [c for c in analysis_results['conflicts']
|
|
if c.severity == RiskLevel.CRITICAL]
|
|
if critical_conflicts:
|
|
recommendations.append("CRITICAL: Address license conflicts immediately before any distribution")
|
|
for conflict in critical_conflicts[:3]: # Top 3
|
|
recommendations.append(f" • {conflict.description}")
|
|
|
|
# Unknown licenses
|
|
unknown_count = analysis_results['license_summary']['unknown_licenses']
|
|
if unknown_count > 0:
|
|
recommendations.append(f"Investigate and clarify licenses for {unknown_count} dependencies with unknown licensing")
|
|
|
|
# GPL contamination
|
|
gpl_deps = [dep for dep in analysis_results['dependencies']
|
|
if dep.license_detected.license_type == LicenseType.COPYLEFT_STRONG]
|
|
if gpl_deps and analysis_results.get('project_license') in ['MIT', 'Apache-2.0', 'BSD-3-Clause']:
|
|
recommendations.append("Consider removing GPL dependencies or changing project license for permissive project")
|
|
|
|
# Compliance score
|
|
if analysis_results['compliance_score'] < 70:
|
|
recommendations.append("Overall compliance score is low - prioritize license cleanup")
|
|
|
|
return recommendations
|
|
|
|
def generate_report(self, analysis_results: Dict[str, Any], format: str = 'text') -> str:
|
|
"""Generate compliance report in specified format."""
|
|
if format == 'json':
|
|
# Convert dataclass objects for JSON serialization
|
|
serializable_results = analysis_results.copy()
|
|
serializable_results['dependencies'] = [
|
|
{
|
|
'name': dep.name,
|
|
'version': dep.version,
|
|
'ecosystem': dep.ecosystem,
|
|
'direct': dep.direct,
|
|
'license_declared': dep.license_declared,
|
|
'license_detected': asdict(dep.license_detected) if dep.license_detected else None,
|
|
'confidence': dep.confidence
|
|
}
|
|
for dep in analysis_results['dependencies']
|
|
]
|
|
serializable_results['conflicts'] = [asdict(conflict) for conflict in analysis_results['conflicts']]
|
|
return json.dumps(serializable_results, indent=2, default=str)
|
|
|
|
# Text format report
|
|
report = []
|
|
report.append("=" * 60)
|
|
report.append("LICENSE COMPLIANCE REPORT")
|
|
report.append("=" * 60)
|
|
report.append(f"Analysis Date: {analysis_results['timestamp']}")
|
|
report.append(f"Project: {analysis_results['project_path']}")
|
|
report.append(f"Project License: {analysis_results['project_license'] or 'Unknown'}")
|
|
report.append("")
|
|
|
|
# Summary
|
|
summary = analysis_results['license_summary']
|
|
report.append("SUMMARY:")
|
|
report.append(f" Total Dependencies: {summary['total_dependencies']}")
|
|
report.append(f" Compliance Score: {analysis_results['compliance_score']:.1f}/100")
|
|
report.append(f" Overall Risk: {analysis_results['risk_assessment']['overall_risk']}")
|
|
report.append(f" License Conflicts: {len(analysis_results['conflicts'])}")
|
|
report.append("")
|
|
|
|
# License distribution
|
|
report.append("LICENSE DISTRIBUTION:")
|
|
for license_type, count in summary['license_types'].items():
|
|
report.append(f" {license_type.title()}: {count}")
|
|
report.append("")
|
|
|
|
# Risk breakdown
|
|
report.append("RISK BREAKDOWN:")
|
|
for risk_level, count in summary['risk_levels'].items():
|
|
report.append(f" {risk_level.title()}: {count}")
|
|
report.append("")
|
|
|
|
# Conflicts
|
|
if analysis_results['conflicts']:
|
|
report.append("LICENSE CONFLICTS:")
|
|
report.append("-" * 30)
|
|
for conflict in analysis_results['conflicts']:
|
|
report.append(f"Conflict: {conflict.dependency2} ({conflict.license2})")
|
|
report.append(f" Issue: {conflict.description}")
|
|
report.append(f" Severity: {conflict.severity.value.upper()}")
|
|
report.append(f" Resolutions: {', '.join(conflict.resolution_options[:2])}")
|
|
report.append("")
|
|
|
|
# High-risk dependencies
|
|
high_risk_deps = [dep for dep in analysis_results['dependencies']
|
|
if dep.license_detected.risk_level in [RiskLevel.HIGH, RiskLevel.CRITICAL]]
|
|
if high_risk_deps:
|
|
report.append("HIGH-RISK DEPENDENCIES:")
|
|
report.append("-" * 30)
|
|
for dep in high_risk_deps[:10]: # Top 10
|
|
license_name = dep.license_detected.spdx_id or dep.license_detected.name
|
|
report.append(f" {dep.name} v{dep.version}: {license_name} ({dep.license_detected.risk_level.value.upper()})")
|
|
report.append("")
|
|
|
|
# Recommendations
|
|
if analysis_results['recommendations']:
|
|
report.append("RECOMMENDATIONS:")
|
|
report.append("-" * 20)
|
|
for i, rec in enumerate(analysis_results['recommendations'], 1):
|
|
report.append(f"{i}. {rec}")
|
|
report.append("")
|
|
|
|
report.append("=" * 60)
|
|
return '\n'.join(report)
|
|
|
|
def main():
|
|
"""Main entry point for the license checker."""
|
|
parser = argparse.ArgumentParser(
|
|
description='Analyze dependency licenses for compliance and conflicts',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python license_checker.py /path/to/project
|
|
python license_checker.py . --format json --output compliance.json
|
|
python license_checker.py /app --inventory deps.json --policy strict
|
|
"""
|
|
)
|
|
|
|
parser.add_argument('project_path',
|
|
help='Path to the project directory to analyze')
|
|
parser.add_argument('--inventory',
|
|
help='Path to dependency inventory JSON file')
|
|
parser.add_argument('--format', choices=['text', 'json'], default='text',
|
|
help='Output format (default: text)')
|
|
parser.add_argument('--output', '-o',
|
|
help='Output file path (default: stdout)')
|
|
parser.add_argument('--policy', choices=['permissive', 'strict'], default='permissive',
|
|
help='License policy strictness (default: permissive)')
|
|
parser.add_argument('--warn-conflicts', action='store_true',
|
|
help='Show warnings for potential conflicts')
|
|
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
checker = LicenseChecker()
|
|
results = checker.analyze_project(args.project_path, args.inventory)
|
|
report = checker.generate_report(results, args.format)
|
|
|
|
if args.output:
|
|
with open(args.output, 'w') as f:
|
|
f.write(report)
|
|
print(f"Compliance report saved to {args.output}")
|
|
else:
|
|
print(report)
|
|
|
|
# Exit with error code for policy violations
|
|
if args.policy == 'strict' and results['compliance_score'] < 80:
|
|
sys.exit(1)
|
|
|
|
if args.warn_conflicts and results['conflicts']:
|
|
print("\nWARNING: License conflicts detected!")
|
|
sys.exit(2)
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if __name__ == '__main__':
|
|
main() |