add brain

This commit is contained in:
2026-03-12 15:17:52 +07:00
parent fd9f558fa1
commit e7821a7a9d
355 changed files with 93784 additions and 24 deletions

View File

@@ -0,0 +1,504 @@
#!/usr/bin/env python3
"""
Changelog Generator
Parses git log output in conventional commits format and generates structured changelogs
in multiple formats (Markdown, Keep a Changelog). Groups commits by type, extracts scope,
links to PRs/issues, and highlights breaking changes.
Input: git log text (piped from git log) or JSON array of commits
Output: formatted CHANGELOG.md section + release summary stats
"""
import argparse
import json
import re
import sys
from collections import defaultdict, Counter
from datetime import datetime
from typing import Dict, List, Optional, Tuple, Union
class ConventionalCommit:
"""Represents a parsed conventional commit."""
def __init__(self, raw_message: str, commit_hash: str = "", author: str = "",
date: str = "", merge_info: Optional[str] = None):
self.raw_message = raw_message
self.commit_hash = commit_hash
self.author = author
self.date = date
self.merge_info = merge_info
# Parse the commit message
self.type = ""
self.scope = ""
self.description = ""
self.body = ""
self.footers = []
self.is_breaking = False
self.breaking_change_description = ""
self._parse_commit_message()
def _parse_commit_message(self):
"""Parse conventional commit format."""
lines = self.raw_message.split('\n')
header = lines[0] if lines else ""
# Parse header: type(scope): description
header_pattern = r'^(\w+)(\([^)]+\))?(!)?:\s*(.+)$'
match = re.match(header_pattern, header)
if match:
self.type = match.group(1).lower()
scope_match = match.group(2)
self.scope = scope_match[1:-1] if scope_match else "" # Remove parentheses
self.is_breaking = bool(match.group(3)) # ! indicates breaking change
self.description = match.group(4).strip()
else:
# Fallback for non-conventional commits
self.type = "chore"
self.description = header
# Parse body and footers
if len(lines) > 1:
body_lines = []
footer_lines = []
in_footer = False
for line in lines[1:]:
if not line.strip():
continue
# Check if this is a footer (KEY: value or KEY #value format)
footer_pattern = r'^([A-Z-]+):\s*(.+)$|^([A-Z-]+)\s+#(\d+)$'
if re.match(footer_pattern, line):
in_footer = True
footer_lines.append(line)
# Check for breaking change
if line.startswith('BREAKING CHANGE:'):
self.is_breaking = True
self.breaking_change_description = line[16:].strip()
else:
if in_footer:
# Continuation of footer
footer_lines.append(line)
else:
body_lines.append(line)
self.body = '\n'.join(body_lines).strip()
self.footers = footer_lines
def extract_issue_references(self) -> List[str]:
"""Extract issue/PR references like #123, fixes #456, etc."""
text = f"{self.description} {self.body} {' '.join(self.footers)}"
# Common patterns for issue references
patterns = [
r'#(\d+)', # Simple #123
r'(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s+#(\d+)', # closes #123
r'(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s+(\w+/\w+)?#(\d+)' # fixes repo#123
]
references = []
for pattern in patterns:
matches = re.findall(pattern, text, re.IGNORECASE)
for match in matches:
if isinstance(match, tuple):
# Handle tuple results from more complex patterns
ref = match[-1] if match[-1] else match[0]
else:
ref = match
if ref and ref not in references:
references.append(ref)
return references
def get_changelog_category(self) -> str:
"""Map commit type to changelog category."""
category_map = {
'feat': 'Added',
'add': 'Added',
'fix': 'Fixed',
'bugfix': 'Fixed',
'security': 'Security',
'perf': 'Fixed', # Performance improvements go to Fixed
'refactor': 'Changed',
'style': 'Changed',
'docs': 'Changed',
'test': None, # Tests don't appear in user-facing changelog
'ci': None,
'build': None,
'chore': None,
'revert': 'Fixed',
'remove': 'Removed',
'deprecate': 'Deprecated'
}
return category_map.get(self.type, 'Changed')
class ChangelogGenerator:
"""Main changelog generator class."""
def __init__(self):
self.commits: List[ConventionalCommit] = []
self.version = "Unreleased"
self.date = datetime.now().strftime("%Y-%m-%d")
self.base_url = ""
def parse_git_log_output(self, git_log_text: str):
"""Parse git log output into ConventionalCommit objects."""
# Try to detect format based on patterns in the text
lines = git_log_text.strip().split('\n')
if not lines or not lines[0]:
return
# Format 1: Simple oneline format (hash message)
oneline_pattern = r'^([a-f0-9]{7,40})\s+(.+)$'
# Format 2: Full format with metadata
full_pattern = r'^commit\s+([a-f0-9]+)'
current_commit = None
commit_buffer = []
for line in lines:
line = line.strip()
if not line:
continue
# Check if this is a new commit (oneline format)
oneline_match = re.match(oneline_pattern, line)
if oneline_match:
# Process previous commit
if current_commit:
self.commits.append(current_commit)
# Start new commit
commit_hash = oneline_match.group(1)
message = oneline_match.group(2)
current_commit = ConventionalCommit(message, commit_hash)
continue
# Check if this is a new commit (full format)
full_match = re.match(full_pattern, line)
if full_match:
# Process previous commit
if current_commit:
commit_message = '\n'.join(commit_buffer).strip()
if commit_message:
current_commit = ConventionalCommit(commit_message, current_commit.commit_hash,
current_commit.author, current_commit.date)
self.commits.append(current_commit)
# Start new commit
commit_hash = full_match.group(1)
current_commit = ConventionalCommit("", commit_hash)
commit_buffer = []
continue
# Parse metadata lines in full format
if current_commit and not current_commit.raw_message:
if line.startswith('Author:'):
current_commit.author = line[7:].strip()
elif line.startswith('Date:'):
current_commit.date = line[5:].strip()
elif line.startswith('Merge:'):
current_commit.merge_info = line[6:].strip()
elif line.startswith(' '):
# Commit message line (indented)
commit_buffer.append(line[4:]) # Remove 4-space indent
# Process final commit
if current_commit:
if commit_buffer:
commit_message = '\n'.join(commit_buffer).strip()
current_commit = ConventionalCommit(commit_message, current_commit.commit_hash,
current_commit.author, current_commit.date)
self.commits.append(current_commit)
def parse_json_commits(self, json_data: Union[str, List[Dict]]):
"""Parse commits from JSON format."""
if isinstance(json_data, str):
data = json.loads(json_data)
else:
data = json_data
for commit_data in data:
commit = ConventionalCommit(
raw_message=commit_data.get('message', ''),
commit_hash=commit_data.get('hash', ''),
author=commit_data.get('author', ''),
date=commit_data.get('date', '')
)
self.commits.append(commit)
def group_commits_by_category(self) -> Dict[str, List[ConventionalCommit]]:
"""Group commits by changelog category."""
categories = defaultdict(list)
for commit in self.commits:
category = commit.get_changelog_category()
if category: # Skip None categories (internal changes)
categories[category].append(commit)
return dict(categories)
def generate_markdown_changelog(self, include_unreleased: bool = True) -> str:
"""Generate Keep a Changelog format markdown."""
grouped_commits = self.group_commits_by_category()
if not grouped_commits:
return "No notable changes.\n"
# Start with header
changelog = []
if include_unreleased and self.version == "Unreleased":
changelog.append(f"## [{self.version}]")
else:
changelog.append(f"## [{self.version}] - {self.date}")
changelog.append("")
# Order categories logically
category_order = ['Added', 'Changed', 'Deprecated', 'Removed', 'Fixed', 'Security']
# Separate breaking changes
breaking_changes = [commit for commit in self.commits if commit.is_breaking]
# Add breaking changes section first if any exist
if breaking_changes:
changelog.append("### Breaking Changes")
for commit in breaking_changes:
line = self._format_commit_line(commit, show_breaking=True)
changelog.append(f"- {line}")
changelog.append("")
# Add regular categories
for category in category_order:
if category not in grouped_commits:
continue
changelog.append(f"### {category}")
# Group by scope for better organization
scoped_commits = defaultdict(list)
for commit in grouped_commits[category]:
scope = commit.scope if commit.scope else "general"
scoped_commits[scope].append(commit)
# Sort scopes, with 'general' last
scopes = sorted(scoped_commits.keys())
if "general" in scopes:
scopes.remove("general")
scopes.append("general")
for scope in scopes:
if len(scoped_commits) > 1 and scope != "general":
changelog.append(f"#### {scope.title()}")
for commit in scoped_commits[scope]:
line = self._format_commit_line(commit)
changelog.append(f"- {line}")
changelog.append("")
return '\n'.join(changelog)
def _format_commit_line(self, commit: ConventionalCommit, show_breaking: bool = False) -> str:
"""Format a single commit line for the changelog."""
# Start with description
line = commit.description.capitalize()
# Add scope if present and not already in description
if commit.scope and commit.scope.lower() not in line.lower():
line = f"{commit.scope}: {line}"
# Add issue references
issue_refs = commit.extract_issue_references()
if issue_refs:
refs_str = ', '.join(f"#{ref}" for ref in issue_refs)
line += f" ({refs_str})"
# Add commit hash if available
if commit.commit_hash:
short_hash = commit.commit_hash[:7]
line += f" [{short_hash}]"
if self.base_url:
line += f"({self.base_url}/commit/{commit.commit_hash})"
# Add breaking change indicator
if show_breaking and commit.breaking_change_description:
line += f" - {commit.breaking_change_description}"
elif commit.is_breaking and not show_breaking:
line += " ⚠️ BREAKING"
return line
def generate_release_summary(self) -> Dict:
"""Generate summary statistics for the release."""
if not self.commits:
return {
'version': self.version,
'date': self.date,
'total_commits': 0,
'by_type': {},
'by_author': {},
'breaking_changes': 0,
'notable_changes': 0
}
# Count by type
type_counts = Counter(commit.type for commit in self.commits)
# Count by author
author_counts = Counter(commit.author for commit in self.commits if commit.author)
# Count breaking changes
breaking_count = sum(1 for commit in self.commits if commit.is_breaking)
# Count notable changes (excluding chore, ci, build, test)
notable_types = {'feat', 'fix', 'security', 'perf', 'refactor', 'remove', 'deprecate'}
notable_count = sum(1 for commit in self.commits if commit.type in notable_types)
return {
'version': self.version,
'date': self.date,
'total_commits': len(self.commits),
'by_type': dict(type_counts.most_common()),
'by_author': dict(author_counts.most_common(10)), # Top 10 contributors
'breaking_changes': breaking_count,
'notable_changes': notable_count,
'scopes': list(set(commit.scope for commit in self.commits if commit.scope)),
'issue_references': len(set().union(*(commit.extract_issue_references() for commit in self.commits)))
}
def generate_json_output(self) -> str:
"""Generate JSON representation of the changelog data."""
grouped_commits = self.group_commits_by_category()
# Convert commits to serializable format
json_data = {
'version': self.version,
'date': self.date,
'summary': self.generate_release_summary(),
'categories': {}
}
for category, commits in grouped_commits.items():
json_data['categories'][category] = []
for commit in commits:
commit_data = {
'type': commit.type,
'scope': commit.scope,
'description': commit.description,
'hash': commit.commit_hash,
'author': commit.author,
'date': commit.date,
'breaking': commit.is_breaking,
'breaking_description': commit.breaking_change_description,
'issue_references': commit.extract_issue_references()
}
json_data['categories'][category].append(commit_data)
return json.dumps(json_data, indent=2)
def main():
"""Main entry point with CLI argument parsing."""
parser = argparse.ArgumentParser(description="Generate changelog from conventional commits")
parser.add_argument('--input', '-i', type=str, help='Input file (default: stdin)')
parser.add_argument('--format', '-f', choices=['markdown', 'json', 'both'],
default='markdown', help='Output format')
parser.add_argument('--version', '-v', type=str, default='Unreleased',
help='Version for this release')
parser.add_argument('--date', '-d', type=str,
default=datetime.now().strftime("%Y-%m-%d"),
help='Release date (YYYY-MM-DD format)')
parser.add_argument('--base-url', '-u', type=str, default='',
help='Base URL for commit links')
parser.add_argument('--input-format', choices=['git-log', 'json'],
default='git-log', help='Input format')
parser.add_argument('--output', '-o', type=str, help='Output file (default: stdout)')
parser.add_argument('--summary', '-s', action='store_true',
help='Include release summary statistics')
args = parser.parse_args()
# Read input
if args.input:
with open(args.input, 'r', encoding='utf-8') as f:
input_data = f.read()
else:
input_data = sys.stdin.read()
if not input_data.strip():
print("No input data provided", file=sys.stderr)
sys.exit(1)
# Initialize generator
generator = ChangelogGenerator()
generator.version = args.version
generator.date = args.date
generator.base_url = args.base_url
# Parse input
try:
if args.input_format == 'json':
generator.parse_json_commits(input_data)
else:
generator.parse_git_log_output(input_data)
except Exception as e:
print(f"Error parsing input: {e}", file=sys.stderr)
sys.exit(1)
if not generator.commits:
print("No valid commits found in input", file=sys.stderr)
sys.exit(1)
# Generate output
output_lines = []
if args.format in ['markdown', 'both']:
changelog_md = generator.generate_markdown_changelog()
if args.format == 'both':
output_lines.append("# Markdown Changelog\n")
output_lines.append(changelog_md)
if args.format in ['json', 'both']:
changelog_json = generator.generate_json_output()
if args.format == 'both':
output_lines.append("\n# JSON Output\n")
output_lines.append(changelog_json)
if args.summary:
summary = generator.generate_release_summary()
output_lines.append(f"\n# Release Summary")
output_lines.append(f"- **Version:** {summary['version']}")
output_lines.append(f"- **Total Commits:** {summary['total_commits']}")
output_lines.append(f"- **Notable Changes:** {summary['notable_changes']}")
output_lines.append(f"- **Breaking Changes:** {summary['breaking_changes']}")
output_lines.append(f"- **Issue References:** {summary['issue_references']}")
if summary['by_type']:
output_lines.append("- **By Type:**")
for commit_type, count in summary['by_type'].items():
output_lines.append(f" - {commit_type}: {count}")
# Write output
final_output = '\n'.join(output_lines)
if args.output:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(final_output)
else:
print(final_output)
if __name__ == '__main__':
main()