Files
WoMenQuNaJu/MeetSpot/tools/postmortem_init.py
2026-02-04 16:11:55 +08:00

358 lines
11 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Postmortem Onboarding 脚本
分析历史 fix commits生成初始 postmortem 集合
使用方法:
python tools/postmortem_init.py [--since 2025-06-01] [--limit 50] [--dry-run]
"""
import argparse
import asyncio
import json
import re
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional
import yaml
# 添加项目根目录到 Python 路径
sys.path.insert(0, str(Path(__file__).parent.parent))
POSTMORTEM_DIR = Path(__file__).parent.parent / "postmortem"
def get_fix_commits(since: Optional[str] = None, limit: int = 100) -> List[Dict]:
"""获取 fix commits 列表"""
cmd = [
"git",
"log",
"--grep=^fix",
"-i",
"--all",
"--format=%H|%s|%aI",
]
if since:
cmd.extend(["--since", since])
result = subprocess.run(cmd, capture_output=True, text=True, cwd=POSTMORTEM_DIR.parent)
commits = []
for line in result.stdout.strip().split("\n"):
if not line:
continue
parts = line.split("|", 2)
if len(parts) >= 3:
commits.append(
{
"hash": parts[0],
"subject": parts[1],
"date": parts[2],
}
)
return commits[:limit]
def get_commit_details(commit_hash: str) -> Dict:
"""获取 commit 的详细信息"""
cwd = POSTMORTEM_DIR.parent
# 获取 body
body_cmd = ["git", "log", "-1", "--format=%b", commit_hash]
body_result = subprocess.run(body_cmd, capture_output=True, text=True, cwd=cwd)
body = body_result.stdout.strip()
# 获取修改的文件
files_cmd = ["git", "show", commit_hash, "--name-only", "--format="]
files_result = subprocess.run(files_cmd, capture_output=True, text=True, cwd=cwd)
files = [f for f in files_result.stdout.strip().split("\n") if f]
# 获取 diff 内容(限制大小,只看 .py 文件)
diff_cmd = ["git", "show", commit_hash, "--stat", "-p", "--", "*.py"]
diff_result = subprocess.run(diff_cmd, capture_output=True, text=True, cwd=cwd)
diff = diff_result.stdout[:6000] # 限制 6KB
return {"body": body, "files": files, "diff": diff}
def assess_commit_quality(commit: Dict, details: Dict) -> float:
"""评估 commit 消息质量,决定是否值得生成 postmortem"""
score = 0.0
body = details.get("body", "")
subject = commit.get("subject", "")
# 有详细描述
if len(body) > 50:
score += 0.3
if len(body) > 150:
score += 0.2
# 有问题描述关键词
problem_keywords = ["问题", "原因", "修复", "bug", "error", "issue", "cause", "fix"]
if any(kw in body.lower() for kw in problem_keywords):
score += 0.2
# 有结构化格式
if any(marker in body for marker in ["##", "- ", "1.", "*"]):
score += 0.1
# scope 清晰
if "(" in subject and ")" in subject:
score += 0.1
# 修改了重要文件
important_patterns = ["recommender", "api/index", "llm", "config"]
if any(
any(pat in f for pat in important_patterns) for f in details.get("files", [])
):
score += 0.1
return min(1.0, score)
def parse_llm_response(response: str) -> Dict:
"""健壮的 JSON 解析,处理 LLM 输出的各种格式"""
# 尝试直接解析
try:
return json.loads(response)
except json.JSONDecodeError:
pass
# 去除 markdown 代码块
cleaned = re.sub(r"^```(?:json)?\s*", "", response, flags=re.MULTILINE)
cleaned = re.sub(r"\s*```$", "", cleaned, flags=re.MULTILINE)
cleaned = cleaned.strip()
try:
return json.loads(cleaned)
except json.JSONDecodeError:
pass
# 尝试提取 JSON 对象
match = re.search(r"\{[\s\S]*\}", cleaned)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError:
pass
# 返回基础结构
return {
"title": "解析失败",
"description": response[:500],
"severity": "low",
"tags": ["parse-failed"],
}
async def generate_postmortem_with_llm(commit: Dict, details: Dict) -> Dict:
"""使用 LLM 生成 postmortem"""
try:
from app.llm import LLM
llm = LLM()
except ImportError as e:
print(f" Warning: Cannot import LLM module: {e}")
return extract_from_commit(commit, details)
except Exception as e:
print(f" Warning: LLM init failed: {e}")
return extract_from_commit(commit, details)
prompt = f"""分析以下 git fix commit生成一个 postmortem 条目。
Commit 信息:
- Subject: {commit['subject']}
- Date: {commit['date']}
- Body: {details.get('body', '(无)')[:1000]}
修改的文件:
{chr(10).join(details['files'][:15])}
代码变更摘要:
{details['diff'][:3000]}
请生成 JSON 格式的 postmortem包含以下字段
1. title: 简短标题中文10-30字
2. description: 问题描述2-3句话描述问题现象和影响
3. root_cause: 根因分析1-2句话
4. severity: critical/high/medium/low根据影响范围判断
5. triggers: 对象,包含:
- files: 相关文件模式列表(如 "app/tool/*.py"
- functions: 相关函数名列表(从 diff 中提取)
- patterns: 正则匹配模式列表(用于匹配未来的 diff 内容)
- keywords: 关键词列表(中英文都可以)
6. fix_pattern: 对象,包含:
- approach: 修复方法描述
- key_changes: 关键变更点列表
7. verification: 验证检查点列表(未来修改相关代码时应检查的事项)
8. tags: 标签列表(用于分类,如 geocoding, ui, api 等)
只返回 JSON不要其他文字。"""
try:
response = await llm.ask(
messages=[{"role": "user", "content": prompt}],
stream=False,
temperature=0.2,
)
return parse_llm_response(response)
except Exception as e:
print(f" Warning: LLM call failed: {e}")
return extract_from_commit(commit, details)
def extract_from_commit(commit: Dict, details: Dict) -> Dict:
"""从 commit 消息直接提取(无 LLM fallback"""
subject = commit.get("subject", "")
body = details.get("body", "")
files = details.get("files", [])
# 从 scope 提取 tags
scope_match = re.search(r"fix\((\w+)\)", subject, re.IGNORECASE)
tags = [scope_match.group(1)] if scope_match else []
# 清理标题
title = subject
title = re.sub(r"^fix(\([^)]+\))?:\s*", "", title, flags=re.IGNORECASE)
# 提取函数名
functions = []
diff = details.get("diff", "")
func_matches = re.findall(r"def\s+(\w+)\s*\(", diff)
functions = list(set(func_matches))[:5]
return {
"title": title[:50] if title else "Fix commit",
"description": body[:300] if body else subject,
"root_cause": "See commit body for details",
"severity": "medium",
"triggers": {
"files": files[:5],
"functions": functions,
"patterns": [],
"keywords": tags or ["general"],
},
"fix_pattern": {
"approach": title,
"key_changes": [title],
},
"verification": ["Review related code changes"],
"tags": tags or ["general"],
}
def get_next_pm_id(year: int) -> str:
"""获取下一个 postmortem ID"""
POSTMORTEM_DIR.mkdir(exist_ok=True)
existing = list(POSTMORTEM_DIR.glob(f"PM-{year}-*.yaml"))
if not existing:
return f"PM-{year}-001"
max_num = max(int(f.stem.split("-")[-1]) for f in existing)
return f"PM-{year}-{max_num + 1:03d}"
def save_postmortem(pm_data: Dict, commit: Dict, details: Dict, pm_id: str) -> Path:
"""保存 postmortem 到 YAML 文件"""
POSTMORTEM_DIR.mkdir(exist_ok=True)
# 确保 triggers 有完整结构
triggers = pm_data.get("triggers", {})
if not isinstance(triggers, dict):
triggers = {}
output = {
"id": pm_id,
"created_at": datetime.utcnow().isoformat() + "Z",
"source_commit": commit["hash"][:7],
"severity": pm_data.get("severity", "medium"),
"title": pm_data.get("title", "Untitled"),
"description": pm_data.get("description", ""),
"root_cause": pm_data.get("root_cause", ""),
"triggers": {
"files": triggers.get("files", details.get("files", [])[:5]),
"functions": triggers.get("functions", []),
"patterns": triggers.get("patterns", []),
"keywords": triggers.get("keywords", []),
},
"fix_pattern": pm_data.get("fix_pattern", {}),
"verification": pm_data.get("verification", []),
"related": {
"files_changed": details.get("files", []),
},
"tags": pm_data.get("tags", []),
}
filepath = POSTMORTEM_DIR / f"{pm_id}.yaml"
with open(filepath, "w", encoding="utf-8") as f:
yaml.dump(
output, f, allow_unicode=True, default_flow_style=False, sort_keys=False
)
return filepath
async def main():
parser = argparse.ArgumentParser(description="Postmortem Onboarding")
parser.add_argument("--since", help="Start date (YYYY-MM-DD)")
parser.add_argument("--limit", type=int, default=50, help="Max commits to process")
parser.add_argument("--dry-run", action="store_true", help="Preview only, no generation")
parser.add_argument(
"--min-quality",
type=float,
default=0.2,
help="Minimum quality score to generate postmortem",
)
args = parser.parse_args()
print("Fetching fix commits...")
commits = get_fix_commits(since=args.since, limit=args.limit)
print(f"Found {len(commits)} fix commits")
if not commits:
print("No fix commits found.")
return
generated = 0
skipped = 0
for i, commit in enumerate(commits):
print(f"\n[{i + 1}/{len(commits)}] {commit['hash'][:7]}: {commit['subject'][:60]}")
details = get_commit_details(commit["hash"])
quality = assess_commit_quality(commit, details)
print(f" Quality: {quality:.2f}, Files: {len(details['files'])}")
if quality < args.min_quality:
print(f" Skipped: quality below threshold ({args.min_quality})")
skipped += 1
continue
if args.dry_run:
print(" [DRY-RUN] Would generate postmortem")
continue
# 生成 postmortem
pm_data = await generate_postmortem_with_llm(commit, details)
# 生成 ID使用 commit 日期的年份)
year = int(commit["date"][:4])
pm_id = get_next_pm_id(year)
filepath = save_postmortem(pm_data, commit, details, pm_id)
print(f" Saved: {filepath}")
generated += 1
print(f"\n{'=' * 50}")
print(f"Summary: Generated {generated}, Skipped {skipped}")
if generated > 0:
print(f"Postmortems saved to: {POSTMORTEM_DIR}/")
if __name__ == "__main__":
asyncio.run(main())