358 lines
11 KiB
Python
358 lines
11 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Postmortem Onboarding 脚本
|
||
分析历史 fix commits,生成初始 postmortem 集合
|
||
|
||
使用方法:
|
||
python tools/postmortem_init.py [--since 2025-06-01] [--limit 50] [--dry-run]
|
||
"""
|
||
import argparse
|
||
import asyncio
|
||
import json
|
||
import re
|
||
import subprocess
|
||
import sys
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Dict, List, Optional
|
||
|
||
import yaml
|
||
|
||
# 添加项目根目录到 Python 路径
|
||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||
|
||
POSTMORTEM_DIR = Path(__file__).parent.parent / "postmortem"
|
||
|
||
|
||
def get_fix_commits(since: Optional[str] = None, limit: int = 100) -> List[Dict]:
|
||
"""获取 fix commits 列表"""
|
||
cmd = [
|
||
"git",
|
||
"log",
|
||
"--grep=^fix",
|
||
"-i",
|
||
"--all",
|
||
"--format=%H|%s|%aI",
|
||
]
|
||
if since:
|
||
cmd.extend(["--since", since])
|
||
|
||
result = subprocess.run(cmd, capture_output=True, text=True, cwd=POSTMORTEM_DIR.parent)
|
||
commits = []
|
||
|
||
for line in result.stdout.strip().split("\n"):
|
||
if not line:
|
||
continue
|
||
parts = line.split("|", 2)
|
||
if len(parts) >= 3:
|
||
commits.append(
|
||
{
|
||
"hash": parts[0],
|
||
"subject": parts[1],
|
||
"date": parts[2],
|
||
}
|
||
)
|
||
|
||
return commits[:limit]
|
||
|
||
|
||
def get_commit_details(commit_hash: str) -> Dict:
|
||
"""获取 commit 的详细信息"""
|
||
cwd = POSTMORTEM_DIR.parent
|
||
|
||
# 获取 body
|
||
body_cmd = ["git", "log", "-1", "--format=%b", commit_hash]
|
||
body_result = subprocess.run(body_cmd, capture_output=True, text=True, cwd=cwd)
|
||
body = body_result.stdout.strip()
|
||
|
||
# 获取修改的文件
|
||
files_cmd = ["git", "show", commit_hash, "--name-only", "--format="]
|
||
files_result = subprocess.run(files_cmd, capture_output=True, text=True, cwd=cwd)
|
||
files = [f for f in files_result.stdout.strip().split("\n") if f]
|
||
|
||
# 获取 diff 内容(限制大小,只看 .py 文件)
|
||
diff_cmd = ["git", "show", commit_hash, "--stat", "-p", "--", "*.py"]
|
||
diff_result = subprocess.run(diff_cmd, capture_output=True, text=True, cwd=cwd)
|
||
diff = diff_result.stdout[:6000] # 限制 6KB
|
||
|
||
return {"body": body, "files": files, "diff": diff}
|
||
|
||
|
||
def assess_commit_quality(commit: Dict, details: Dict) -> float:
|
||
"""评估 commit 消息质量,决定是否值得生成 postmortem"""
|
||
score = 0.0
|
||
body = details.get("body", "")
|
||
subject = commit.get("subject", "")
|
||
|
||
# 有详细描述
|
||
if len(body) > 50:
|
||
score += 0.3
|
||
if len(body) > 150:
|
||
score += 0.2
|
||
|
||
# 有问题描述关键词
|
||
problem_keywords = ["问题", "原因", "修复", "bug", "error", "issue", "cause", "fix"]
|
||
if any(kw in body.lower() for kw in problem_keywords):
|
||
score += 0.2
|
||
|
||
# 有结构化格式
|
||
if any(marker in body for marker in ["##", "- ", "1.", "*"]):
|
||
score += 0.1
|
||
|
||
# scope 清晰
|
||
if "(" in subject and ")" in subject:
|
||
score += 0.1
|
||
|
||
# 修改了重要文件
|
||
important_patterns = ["recommender", "api/index", "llm", "config"]
|
||
if any(
|
||
any(pat in f for pat in important_patterns) for f in details.get("files", [])
|
||
):
|
||
score += 0.1
|
||
|
||
return min(1.0, score)
|
||
|
||
|
||
def parse_llm_response(response: str) -> Dict:
|
||
"""健壮的 JSON 解析,处理 LLM 输出的各种格式"""
|
||
# 尝试直接解析
|
||
try:
|
||
return json.loads(response)
|
||
except json.JSONDecodeError:
|
||
pass
|
||
|
||
# 去除 markdown 代码块
|
||
cleaned = re.sub(r"^```(?:json)?\s*", "", response, flags=re.MULTILINE)
|
||
cleaned = re.sub(r"\s*```$", "", cleaned, flags=re.MULTILINE)
|
||
cleaned = cleaned.strip()
|
||
|
||
try:
|
||
return json.loads(cleaned)
|
||
except json.JSONDecodeError:
|
||
pass
|
||
|
||
# 尝试提取 JSON 对象
|
||
match = re.search(r"\{[\s\S]*\}", cleaned)
|
||
if match:
|
||
try:
|
||
return json.loads(match.group())
|
||
except json.JSONDecodeError:
|
||
pass
|
||
|
||
# 返回基础结构
|
||
return {
|
||
"title": "解析失败",
|
||
"description": response[:500],
|
||
"severity": "low",
|
||
"tags": ["parse-failed"],
|
||
}
|
||
|
||
|
||
async def generate_postmortem_with_llm(commit: Dict, details: Dict) -> Dict:
|
||
"""使用 LLM 生成 postmortem"""
|
||
try:
|
||
from app.llm import LLM
|
||
|
||
llm = LLM()
|
||
except ImportError as e:
|
||
print(f" Warning: Cannot import LLM module: {e}")
|
||
return extract_from_commit(commit, details)
|
||
except Exception as e:
|
||
print(f" Warning: LLM init failed: {e}")
|
||
return extract_from_commit(commit, details)
|
||
|
||
prompt = f"""分析以下 git fix commit,生成一个 postmortem 条目。
|
||
|
||
Commit 信息:
|
||
- Subject: {commit['subject']}
|
||
- Date: {commit['date']}
|
||
- Body: {details.get('body', '(无)')[:1000]}
|
||
|
||
修改的文件:
|
||
{chr(10).join(details['files'][:15])}
|
||
|
||
代码变更摘要:
|
||
{details['diff'][:3000]}
|
||
|
||
请生成 JSON 格式的 postmortem,包含以下字段:
|
||
1. title: 简短标题(中文,10-30字)
|
||
2. description: 问题描述(2-3句话,描述问题现象和影响)
|
||
3. root_cause: 根因分析(1-2句话)
|
||
4. severity: critical/high/medium/low(根据影响范围判断)
|
||
5. triggers: 对象,包含:
|
||
- files: 相关文件模式列表(如 "app/tool/*.py")
|
||
- functions: 相关函数名列表(从 diff 中提取)
|
||
- patterns: 正则匹配模式列表(用于匹配未来的 diff 内容)
|
||
- keywords: 关键词列表(中英文都可以)
|
||
6. fix_pattern: 对象,包含:
|
||
- approach: 修复方法描述
|
||
- key_changes: 关键变更点列表
|
||
7. verification: 验证检查点列表(未来修改相关代码时应检查的事项)
|
||
8. tags: 标签列表(用于分类,如 geocoding, ui, api 等)
|
||
|
||
只返回 JSON,不要其他文字。"""
|
||
|
||
try:
|
||
response = await llm.ask(
|
||
messages=[{"role": "user", "content": prompt}],
|
||
stream=False,
|
||
temperature=0.2,
|
||
)
|
||
return parse_llm_response(response)
|
||
except Exception as e:
|
||
print(f" Warning: LLM call failed: {e}")
|
||
return extract_from_commit(commit, details)
|
||
|
||
|
||
def extract_from_commit(commit: Dict, details: Dict) -> Dict:
|
||
"""从 commit 消息直接提取(无 LLM fallback)"""
|
||
subject = commit.get("subject", "")
|
||
body = details.get("body", "")
|
||
files = details.get("files", [])
|
||
|
||
# 从 scope 提取 tags
|
||
scope_match = re.search(r"fix\((\w+)\)", subject, re.IGNORECASE)
|
||
tags = [scope_match.group(1)] if scope_match else []
|
||
|
||
# 清理标题
|
||
title = subject
|
||
title = re.sub(r"^fix(\([^)]+\))?:\s*", "", title, flags=re.IGNORECASE)
|
||
|
||
# 提取函数名
|
||
functions = []
|
||
diff = details.get("diff", "")
|
||
func_matches = re.findall(r"def\s+(\w+)\s*\(", diff)
|
||
functions = list(set(func_matches))[:5]
|
||
|
||
return {
|
||
"title": title[:50] if title else "Fix commit",
|
||
"description": body[:300] if body else subject,
|
||
"root_cause": "See commit body for details",
|
||
"severity": "medium",
|
||
"triggers": {
|
||
"files": files[:5],
|
||
"functions": functions,
|
||
"patterns": [],
|
||
"keywords": tags or ["general"],
|
||
},
|
||
"fix_pattern": {
|
||
"approach": title,
|
||
"key_changes": [title],
|
||
},
|
||
"verification": ["Review related code changes"],
|
||
"tags": tags or ["general"],
|
||
}
|
||
|
||
|
||
def get_next_pm_id(year: int) -> str:
|
||
"""获取下一个 postmortem ID"""
|
||
POSTMORTEM_DIR.mkdir(exist_ok=True)
|
||
existing = list(POSTMORTEM_DIR.glob(f"PM-{year}-*.yaml"))
|
||
if not existing:
|
||
return f"PM-{year}-001"
|
||
|
||
max_num = max(int(f.stem.split("-")[-1]) for f in existing)
|
||
return f"PM-{year}-{max_num + 1:03d}"
|
||
|
||
|
||
def save_postmortem(pm_data: Dict, commit: Dict, details: Dict, pm_id: str) -> Path:
|
||
"""保存 postmortem 到 YAML 文件"""
|
||
POSTMORTEM_DIR.mkdir(exist_ok=True)
|
||
|
||
# 确保 triggers 有完整结构
|
||
triggers = pm_data.get("triggers", {})
|
||
if not isinstance(triggers, dict):
|
||
triggers = {}
|
||
|
||
output = {
|
||
"id": pm_id,
|
||
"created_at": datetime.utcnow().isoformat() + "Z",
|
||
"source_commit": commit["hash"][:7],
|
||
"severity": pm_data.get("severity", "medium"),
|
||
"title": pm_data.get("title", "Untitled"),
|
||
"description": pm_data.get("description", ""),
|
||
"root_cause": pm_data.get("root_cause", ""),
|
||
"triggers": {
|
||
"files": triggers.get("files", details.get("files", [])[:5]),
|
||
"functions": triggers.get("functions", []),
|
||
"patterns": triggers.get("patterns", []),
|
||
"keywords": triggers.get("keywords", []),
|
||
},
|
||
"fix_pattern": pm_data.get("fix_pattern", {}),
|
||
"verification": pm_data.get("verification", []),
|
||
"related": {
|
||
"files_changed": details.get("files", []),
|
||
},
|
||
"tags": pm_data.get("tags", []),
|
||
}
|
||
|
||
filepath = POSTMORTEM_DIR / f"{pm_id}.yaml"
|
||
with open(filepath, "w", encoding="utf-8") as f:
|
||
yaml.dump(
|
||
output, f, allow_unicode=True, default_flow_style=False, sort_keys=False
|
||
)
|
||
|
||
return filepath
|
||
|
||
|
||
async def main():
|
||
parser = argparse.ArgumentParser(description="Postmortem Onboarding")
|
||
parser.add_argument("--since", help="Start date (YYYY-MM-DD)")
|
||
parser.add_argument("--limit", type=int, default=50, help="Max commits to process")
|
||
parser.add_argument("--dry-run", action="store_true", help="Preview only, no generation")
|
||
parser.add_argument(
|
||
"--min-quality",
|
||
type=float,
|
||
default=0.2,
|
||
help="Minimum quality score to generate postmortem",
|
||
)
|
||
args = parser.parse_args()
|
||
|
||
print("Fetching fix commits...")
|
||
commits = get_fix_commits(since=args.since, limit=args.limit)
|
||
print(f"Found {len(commits)} fix commits")
|
||
|
||
if not commits:
|
||
print("No fix commits found.")
|
||
return
|
||
|
||
generated = 0
|
||
skipped = 0
|
||
|
||
for i, commit in enumerate(commits):
|
||
print(f"\n[{i + 1}/{len(commits)}] {commit['hash'][:7]}: {commit['subject'][:60]}")
|
||
|
||
details = get_commit_details(commit["hash"])
|
||
quality = assess_commit_quality(commit, details)
|
||
|
||
print(f" Quality: {quality:.2f}, Files: {len(details['files'])}")
|
||
|
||
if quality < args.min_quality:
|
||
print(f" Skipped: quality below threshold ({args.min_quality})")
|
||
skipped += 1
|
||
continue
|
||
|
||
if args.dry_run:
|
||
print(" [DRY-RUN] Would generate postmortem")
|
||
continue
|
||
|
||
# 生成 postmortem
|
||
pm_data = await generate_postmortem_with_llm(commit, details)
|
||
|
||
# 生成 ID(使用 commit 日期的年份)
|
||
year = int(commit["date"][:4])
|
||
pm_id = get_next_pm_id(year)
|
||
|
||
filepath = save_postmortem(pm_data, commit, details, pm_id)
|
||
print(f" Saved: {filepath}")
|
||
generated += 1
|
||
|
||
print(f"\n{'=' * 50}")
|
||
print(f"Summary: Generated {generated}, Skipped {skipped}")
|
||
if generated > 0:
|
||
print(f"Postmortems saved to: {POSTMORTEM_DIR}/")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main())
|