504 lines
19 KiB
Python
504 lines
19 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Changelog Generator
|
|
|
|
Parses git log output in conventional commits format and generates structured changelogs
|
|
in multiple formats (Markdown, Keep a Changelog). Groups commits by type, extracts scope,
|
|
links to PRs/issues, and highlights breaking changes.
|
|
|
|
Input: git log text (piped from git log) or JSON array of commits
|
|
Output: formatted CHANGELOG.md section + release summary stats
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
from collections import defaultdict, Counter
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional, Tuple, Union
|
|
|
|
|
|
class ConventionalCommit:
|
|
"""Represents a parsed conventional commit."""
|
|
|
|
def __init__(self, raw_message: str, commit_hash: str = "", author: str = "",
|
|
date: str = "", merge_info: Optional[str] = None):
|
|
self.raw_message = raw_message
|
|
self.commit_hash = commit_hash
|
|
self.author = author
|
|
self.date = date
|
|
self.merge_info = merge_info
|
|
|
|
# Parse the commit message
|
|
self.type = ""
|
|
self.scope = ""
|
|
self.description = ""
|
|
self.body = ""
|
|
self.footers = []
|
|
self.is_breaking = False
|
|
self.breaking_change_description = ""
|
|
|
|
self._parse_commit_message()
|
|
|
|
def _parse_commit_message(self):
|
|
"""Parse conventional commit format."""
|
|
lines = self.raw_message.split('\n')
|
|
header = lines[0] if lines else ""
|
|
|
|
# Parse header: type(scope): description
|
|
header_pattern = r'^(\w+)(\([^)]+\))?(!)?:\s*(.+)$'
|
|
match = re.match(header_pattern, header)
|
|
|
|
if match:
|
|
self.type = match.group(1).lower()
|
|
scope_match = match.group(2)
|
|
self.scope = scope_match[1:-1] if scope_match else "" # Remove parentheses
|
|
self.is_breaking = bool(match.group(3)) # ! indicates breaking change
|
|
self.description = match.group(4).strip()
|
|
else:
|
|
# Fallback for non-conventional commits
|
|
self.type = "chore"
|
|
self.description = header
|
|
|
|
# Parse body and footers
|
|
if len(lines) > 1:
|
|
body_lines = []
|
|
footer_lines = []
|
|
in_footer = False
|
|
|
|
for line in lines[1:]:
|
|
if not line.strip():
|
|
continue
|
|
|
|
# Check if this is a footer (KEY: value or KEY #value format)
|
|
footer_pattern = r'^([A-Z-]+):\s*(.+)$|^([A-Z-]+)\s+#(\d+)$'
|
|
if re.match(footer_pattern, line):
|
|
in_footer = True
|
|
footer_lines.append(line)
|
|
|
|
# Check for breaking change
|
|
if line.startswith('BREAKING CHANGE:'):
|
|
self.is_breaking = True
|
|
self.breaking_change_description = line[16:].strip()
|
|
else:
|
|
if in_footer:
|
|
# Continuation of footer
|
|
footer_lines.append(line)
|
|
else:
|
|
body_lines.append(line)
|
|
|
|
self.body = '\n'.join(body_lines).strip()
|
|
self.footers = footer_lines
|
|
|
|
def extract_issue_references(self) -> List[str]:
|
|
"""Extract issue/PR references like #123, fixes #456, etc."""
|
|
text = f"{self.description} {self.body} {' '.join(self.footers)}"
|
|
|
|
# Common patterns for issue references
|
|
patterns = [
|
|
r'#(\d+)', # Simple #123
|
|
r'(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s+#(\d+)', # closes #123
|
|
r'(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s+(\w+/\w+)?#(\d+)' # fixes repo#123
|
|
]
|
|
|
|
references = []
|
|
for pattern in patterns:
|
|
matches = re.findall(pattern, text, re.IGNORECASE)
|
|
for match in matches:
|
|
if isinstance(match, tuple):
|
|
# Handle tuple results from more complex patterns
|
|
ref = match[-1] if match[-1] else match[0]
|
|
else:
|
|
ref = match
|
|
if ref and ref not in references:
|
|
references.append(ref)
|
|
|
|
return references
|
|
|
|
def get_changelog_category(self) -> str:
|
|
"""Map commit type to changelog category."""
|
|
category_map = {
|
|
'feat': 'Added',
|
|
'add': 'Added',
|
|
'fix': 'Fixed',
|
|
'bugfix': 'Fixed',
|
|
'security': 'Security',
|
|
'perf': 'Fixed', # Performance improvements go to Fixed
|
|
'refactor': 'Changed',
|
|
'style': 'Changed',
|
|
'docs': 'Changed',
|
|
'test': None, # Tests don't appear in user-facing changelog
|
|
'ci': None,
|
|
'build': None,
|
|
'chore': None,
|
|
'revert': 'Fixed',
|
|
'remove': 'Removed',
|
|
'deprecate': 'Deprecated'
|
|
}
|
|
|
|
return category_map.get(self.type, 'Changed')
|
|
|
|
|
|
class ChangelogGenerator:
|
|
"""Main changelog generator class."""
|
|
|
|
def __init__(self):
|
|
self.commits: List[ConventionalCommit] = []
|
|
self.version = "Unreleased"
|
|
self.date = datetime.now().strftime("%Y-%m-%d")
|
|
self.base_url = ""
|
|
|
|
def parse_git_log_output(self, git_log_text: str):
|
|
"""Parse git log output into ConventionalCommit objects."""
|
|
# Try to detect format based on patterns in the text
|
|
lines = git_log_text.strip().split('\n')
|
|
|
|
if not lines or not lines[0]:
|
|
return
|
|
|
|
# Format 1: Simple oneline format (hash message)
|
|
oneline_pattern = r'^([a-f0-9]{7,40})\s+(.+)$'
|
|
|
|
# Format 2: Full format with metadata
|
|
full_pattern = r'^commit\s+([a-f0-9]+)'
|
|
|
|
current_commit = None
|
|
commit_buffer = []
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
# Check if this is a new commit (oneline format)
|
|
oneline_match = re.match(oneline_pattern, line)
|
|
if oneline_match:
|
|
# Process previous commit
|
|
if current_commit:
|
|
self.commits.append(current_commit)
|
|
|
|
# Start new commit
|
|
commit_hash = oneline_match.group(1)
|
|
message = oneline_match.group(2)
|
|
current_commit = ConventionalCommit(message, commit_hash)
|
|
continue
|
|
|
|
# Check if this is a new commit (full format)
|
|
full_match = re.match(full_pattern, line)
|
|
if full_match:
|
|
# Process previous commit
|
|
if current_commit:
|
|
commit_message = '\n'.join(commit_buffer).strip()
|
|
if commit_message:
|
|
current_commit = ConventionalCommit(commit_message, current_commit.commit_hash,
|
|
current_commit.author, current_commit.date)
|
|
self.commits.append(current_commit)
|
|
|
|
# Start new commit
|
|
commit_hash = full_match.group(1)
|
|
current_commit = ConventionalCommit("", commit_hash)
|
|
commit_buffer = []
|
|
continue
|
|
|
|
# Parse metadata lines in full format
|
|
if current_commit and not current_commit.raw_message:
|
|
if line.startswith('Author:'):
|
|
current_commit.author = line[7:].strip()
|
|
elif line.startswith('Date:'):
|
|
current_commit.date = line[5:].strip()
|
|
elif line.startswith('Merge:'):
|
|
current_commit.merge_info = line[6:].strip()
|
|
elif line.startswith(' '):
|
|
# Commit message line (indented)
|
|
commit_buffer.append(line[4:]) # Remove 4-space indent
|
|
|
|
# Process final commit
|
|
if current_commit:
|
|
if commit_buffer:
|
|
commit_message = '\n'.join(commit_buffer).strip()
|
|
current_commit = ConventionalCommit(commit_message, current_commit.commit_hash,
|
|
current_commit.author, current_commit.date)
|
|
self.commits.append(current_commit)
|
|
|
|
def parse_json_commits(self, json_data: Union[str, List[Dict]]):
|
|
"""Parse commits from JSON format."""
|
|
if isinstance(json_data, str):
|
|
data = json.loads(json_data)
|
|
else:
|
|
data = json_data
|
|
|
|
for commit_data in data:
|
|
commit = ConventionalCommit(
|
|
raw_message=commit_data.get('message', ''),
|
|
commit_hash=commit_data.get('hash', ''),
|
|
author=commit_data.get('author', ''),
|
|
date=commit_data.get('date', '')
|
|
)
|
|
self.commits.append(commit)
|
|
|
|
def group_commits_by_category(self) -> Dict[str, List[ConventionalCommit]]:
|
|
"""Group commits by changelog category."""
|
|
categories = defaultdict(list)
|
|
|
|
for commit in self.commits:
|
|
category = commit.get_changelog_category()
|
|
if category: # Skip None categories (internal changes)
|
|
categories[category].append(commit)
|
|
|
|
return dict(categories)
|
|
|
|
def generate_markdown_changelog(self, include_unreleased: bool = True) -> str:
|
|
"""Generate Keep a Changelog format markdown."""
|
|
grouped_commits = self.group_commits_by_category()
|
|
|
|
if not grouped_commits:
|
|
return "No notable changes.\n"
|
|
|
|
# Start with header
|
|
changelog = []
|
|
if include_unreleased and self.version == "Unreleased":
|
|
changelog.append(f"## [{self.version}]")
|
|
else:
|
|
changelog.append(f"## [{self.version}] - {self.date}")
|
|
|
|
changelog.append("")
|
|
|
|
# Order categories logically
|
|
category_order = ['Added', 'Changed', 'Deprecated', 'Removed', 'Fixed', 'Security']
|
|
|
|
# Separate breaking changes
|
|
breaking_changes = [commit for commit in self.commits if commit.is_breaking]
|
|
|
|
# Add breaking changes section first if any exist
|
|
if breaking_changes:
|
|
changelog.append("### Breaking Changes")
|
|
for commit in breaking_changes:
|
|
line = self._format_commit_line(commit, show_breaking=True)
|
|
changelog.append(f"- {line}")
|
|
changelog.append("")
|
|
|
|
# Add regular categories
|
|
for category in category_order:
|
|
if category not in grouped_commits:
|
|
continue
|
|
|
|
changelog.append(f"### {category}")
|
|
|
|
# Group by scope for better organization
|
|
scoped_commits = defaultdict(list)
|
|
for commit in grouped_commits[category]:
|
|
scope = commit.scope if commit.scope else "general"
|
|
scoped_commits[scope].append(commit)
|
|
|
|
# Sort scopes, with 'general' last
|
|
scopes = sorted(scoped_commits.keys())
|
|
if "general" in scopes:
|
|
scopes.remove("general")
|
|
scopes.append("general")
|
|
|
|
for scope in scopes:
|
|
if len(scoped_commits) > 1 and scope != "general":
|
|
changelog.append(f"#### {scope.title()}")
|
|
|
|
for commit in scoped_commits[scope]:
|
|
line = self._format_commit_line(commit)
|
|
changelog.append(f"- {line}")
|
|
|
|
changelog.append("")
|
|
|
|
return '\n'.join(changelog)
|
|
|
|
def _format_commit_line(self, commit: ConventionalCommit, show_breaking: bool = False) -> str:
|
|
"""Format a single commit line for the changelog."""
|
|
# Start with description
|
|
line = commit.description.capitalize()
|
|
|
|
# Add scope if present and not already in description
|
|
if commit.scope and commit.scope.lower() not in line.lower():
|
|
line = f"{commit.scope}: {line}"
|
|
|
|
# Add issue references
|
|
issue_refs = commit.extract_issue_references()
|
|
if issue_refs:
|
|
refs_str = ', '.join(f"#{ref}" for ref in issue_refs)
|
|
line += f" ({refs_str})"
|
|
|
|
# Add commit hash if available
|
|
if commit.commit_hash:
|
|
short_hash = commit.commit_hash[:7]
|
|
line += f" [{short_hash}]"
|
|
|
|
if self.base_url:
|
|
line += f"({self.base_url}/commit/{commit.commit_hash})"
|
|
|
|
# Add breaking change indicator
|
|
if show_breaking and commit.breaking_change_description:
|
|
line += f" - {commit.breaking_change_description}"
|
|
elif commit.is_breaking and not show_breaking:
|
|
line += " ⚠️ BREAKING"
|
|
|
|
return line
|
|
|
|
def generate_release_summary(self) -> Dict:
|
|
"""Generate summary statistics for the release."""
|
|
if not self.commits:
|
|
return {
|
|
'version': self.version,
|
|
'date': self.date,
|
|
'total_commits': 0,
|
|
'by_type': {},
|
|
'by_author': {},
|
|
'breaking_changes': 0,
|
|
'notable_changes': 0
|
|
}
|
|
|
|
# Count by type
|
|
type_counts = Counter(commit.type for commit in self.commits)
|
|
|
|
# Count by author
|
|
author_counts = Counter(commit.author for commit in self.commits if commit.author)
|
|
|
|
# Count breaking changes
|
|
breaking_count = sum(1 for commit in self.commits if commit.is_breaking)
|
|
|
|
# Count notable changes (excluding chore, ci, build, test)
|
|
notable_types = {'feat', 'fix', 'security', 'perf', 'refactor', 'remove', 'deprecate'}
|
|
notable_count = sum(1 for commit in self.commits if commit.type in notable_types)
|
|
|
|
return {
|
|
'version': self.version,
|
|
'date': self.date,
|
|
'total_commits': len(self.commits),
|
|
'by_type': dict(type_counts.most_common()),
|
|
'by_author': dict(author_counts.most_common(10)), # Top 10 contributors
|
|
'breaking_changes': breaking_count,
|
|
'notable_changes': notable_count,
|
|
'scopes': list(set(commit.scope for commit in self.commits if commit.scope)),
|
|
'issue_references': len(set().union(*(commit.extract_issue_references() for commit in self.commits)))
|
|
}
|
|
|
|
def generate_json_output(self) -> str:
|
|
"""Generate JSON representation of the changelog data."""
|
|
grouped_commits = self.group_commits_by_category()
|
|
|
|
# Convert commits to serializable format
|
|
json_data = {
|
|
'version': self.version,
|
|
'date': self.date,
|
|
'summary': self.generate_release_summary(),
|
|
'categories': {}
|
|
}
|
|
|
|
for category, commits in grouped_commits.items():
|
|
json_data['categories'][category] = []
|
|
for commit in commits:
|
|
commit_data = {
|
|
'type': commit.type,
|
|
'scope': commit.scope,
|
|
'description': commit.description,
|
|
'hash': commit.commit_hash,
|
|
'author': commit.author,
|
|
'date': commit.date,
|
|
'breaking': commit.is_breaking,
|
|
'breaking_description': commit.breaking_change_description,
|
|
'issue_references': commit.extract_issue_references()
|
|
}
|
|
json_data['categories'][category].append(commit_data)
|
|
|
|
return json.dumps(json_data, indent=2)
|
|
|
|
|
|
def main():
|
|
"""Main entry point with CLI argument parsing."""
|
|
parser = argparse.ArgumentParser(description="Generate changelog from conventional commits")
|
|
parser.add_argument('--input', '-i', type=str, help='Input file (default: stdin)')
|
|
parser.add_argument('--format', '-f', choices=['markdown', 'json', 'both'],
|
|
default='markdown', help='Output format')
|
|
parser.add_argument('--version', '-v', type=str, default='Unreleased',
|
|
help='Version for this release')
|
|
parser.add_argument('--date', '-d', type=str,
|
|
default=datetime.now().strftime("%Y-%m-%d"),
|
|
help='Release date (YYYY-MM-DD format)')
|
|
parser.add_argument('--base-url', '-u', type=str, default='',
|
|
help='Base URL for commit links')
|
|
parser.add_argument('--input-format', choices=['git-log', 'json'],
|
|
default='git-log', help='Input format')
|
|
parser.add_argument('--output', '-o', type=str, help='Output file (default: stdout)')
|
|
parser.add_argument('--summary', '-s', action='store_true',
|
|
help='Include release summary statistics')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Read input
|
|
if args.input:
|
|
with open(args.input, 'r', encoding='utf-8') as f:
|
|
input_data = f.read()
|
|
else:
|
|
input_data = sys.stdin.read()
|
|
|
|
if not input_data.strip():
|
|
print("No input data provided", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Initialize generator
|
|
generator = ChangelogGenerator()
|
|
generator.version = args.version
|
|
generator.date = args.date
|
|
generator.base_url = args.base_url
|
|
|
|
# Parse input
|
|
try:
|
|
if args.input_format == 'json':
|
|
generator.parse_json_commits(input_data)
|
|
else:
|
|
generator.parse_git_log_output(input_data)
|
|
except Exception as e:
|
|
print(f"Error parsing input: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if not generator.commits:
|
|
print("No valid commits found in input", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Generate output
|
|
output_lines = []
|
|
|
|
if args.format in ['markdown', 'both']:
|
|
changelog_md = generator.generate_markdown_changelog()
|
|
if args.format == 'both':
|
|
output_lines.append("# Markdown Changelog\n")
|
|
output_lines.append(changelog_md)
|
|
|
|
if args.format in ['json', 'both']:
|
|
changelog_json = generator.generate_json_output()
|
|
if args.format == 'both':
|
|
output_lines.append("\n# JSON Output\n")
|
|
output_lines.append(changelog_json)
|
|
|
|
if args.summary:
|
|
summary = generator.generate_release_summary()
|
|
output_lines.append(f"\n# Release Summary")
|
|
output_lines.append(f"- **Version:** {summary['version']}")
|
|
output_lines.append(f"- **Total Commits:** {summary['total_commits']}")
|
|
output_lines.append(f"- **Notable Changes:** {summary['notable_changes']}")
|
|
output_lines.append(f"- **Breaking Changes:** {summary['breaking_changes']}")
|
|
output_lines.append(f"- **Issue References:** {summary['issue_references']}")
|
|
|
|
if summary['by_type']:
|
|
output_lines.append("- **By Type:**")
|
|
for commit_type, count in summary['by_type'].items():
|
|
output_lines.append(f" - {commit_type}: {count}")
|
|
|
|
# Write output
|
|
final_output = '\n'.join(output_lines)
|
|
|
|
if args.output:
|
|
with open(args.output, 'w', encoding='utf-8') as f:
|
|
f.write(final_output)
|
|
else:
|
|
print(final_output)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |