first commit

This commit is contained in:
ytc1012
2026-02-04 16:11:55 +08:00
commit 0f3ee050dc
165 changed files with 25795 additions and 0 deletions

View File

@@ -0,0 +1,357 @@
#!/usr/bin/env python3
"""
Postmortem Onboarding 脚本
分析历史 fix commits生成初始 postmortem 集合
使用方法:
python tools/postmortem_init.py [--since 2025-06-01] [--limit 50] [--dry-run]
"""
import argparse
import asyncio
import json
import re
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional
import yaml
# 添加项目根目录到 Python 路径
sys.path.insert(0, str(Path(__file__).parent.parent))
POSTMORTEM_DIR = Path(__file__).parent.parent / "postmortem"
def get_fix_commits(since: Optional[str] = None, limit: int = 100) -> List[Dict]:
"""获取 fix commits 列表"""
cmd = [
"git",
"log",
"--grep=^fix",
"-i",
"--all",
"--format=%H|%s|%aI",
]
if since:
cmd.extend(["--since", since])
result = subprocess.run(cmd, capture_output=True, text=True, cwd=POSTMORTEM_DIR.parent)
commits = []
for line in result.stdout.strip().split("\n"):
if not line:
continue
parts = line.split("|", 2)
if len(parts) >= 3:
commits.append(
{
"hash": parts[0],
"subject": parts[1],
"date": parts[2],
}
)
return commits[:limit]
def get_commit_details(commit_hash: str) -> Dict:
"""获取 commit 的详细信息"""
cwd = POSTMORTEM_DIR.parent
# 获取 body
body_cmd = ["git", "log", "-1", "--format=%b", commit_hash]
body_result = subprocess.run(body_cmd, capture_output=True, text=True, cwd=cwd)
body = body_result.stdout.strip()
# 获取修改的文件
files_cmd = ["git", "show", commit_hash, "--name-only", "--format="]
files_result = subprocess.run(files_cmd, capture_output=True, text=True, cwd=cwd)
files = [f for f in files_result.stdout.strip().split("\n") if f]
# 获取 diff 内容(限制大小,只看 .py 文件)
diff_cmd = ["git", "show", commit_hash, "--stat", "-p", "--", "*.py"]
diff_result = subprocess.run(diff_cmd, capture_output=True, text=True, cwd=cwd)
diff = diff_result.stdout[:6000] # 限制 6KB
return {"body": body, "files": files, "diff": diff}
def assess_commit_quality(commit: Dict, details: Dict) -> float:
"""评估 commit 消息质量,决定是否值得生成 postmortem"""
score = 0.0
body = details.get("body", "")
subject = commit.get("subject", "")
# 有详细描述
if len(body) > 50:
score += 0.3
if len(body) > 150:
score += 0.2
# 有问题描述关键词
problem_keywords = ["问题", "原因", "修复", "bug", "error", "issue", "cause", "fix"]
if any(kw in body.lower() for kw in problem_keywords):
score += 0.2
# 有结构化格式
if any(marker in body for marker in ["##", "- ", "1.", "*"]):
score += 0.1
# scope 清晰
if "(" in subject and ")" in subject:
score += 0.1
# 修改了重要文件
important_patterns = ["recommender", "api/index", "llm", "config"]
if any(
any(pat in f for pat in important_patterns) for f in details.get("files", [])
):
score += 0.1
return min(1.0, score)
def parse_llm_response(response: str) -> Dict:
"""健壮的 JSON 解析,处理 LLM 输出的各种格式"""
# 尝试直接解析
try:
return json.loads(response)
except json.JSONDecodeError:
pass
# 去除 markdown 代码块
cleaned = re.sub(r"^```(?:json)?\s*", "", response, flags=re.MULTILINE)
cleaned = re.sub(r"\s*```$", "", cleaned, flags=re.MULTILINE)
cleaned = cleaned.strip()
try:
return json.loads(cleaned)
except json.JSONDecodeError:
pass
# 尝试提取 JSON 对象
match = re.search(r"\{[\s\S]*\}", cleaned)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError:
pass
# 返回基础结构
return {
"title": "解析失败",
"description": response[:500],
"severity": "low",
"tags": ["parse-failed"],
}
async def generate_postmortem_with_llm(commit: Dict, details: Dict) -> Dict:
"""使用 LLM 生成 postmortem"""
try:
from app.llm import LLM
llm = LLM()
except ImportError as e:
print(f" Warning: Cannot import LLM module: {e}")
return extract_from_commit(commit, details)
except Exception as e:
print(f" Warning: LLM init failed: {e}")
return extract_from_commit(commit, details)
prompt = f"""分析以下 git fix commit生成一个 postmortem 条目。
Commit 信息:
- Subject: {commit['subject']}
- Date: {commit['date']}
- Body: {details.get('body', '(无)')[:1000]}
修改的文件:
{chr(10).join(details['files'][:15])}
代码变更摘要:
{details['diff'][:3000]}
请生成 JSON 格式的 postmortem包含以下字段
1. title: 简短标题中文10-30字
2. description: 问题描述2-3句话描述问题现象和影响
3. root_cause: 根因分析1-2句话
4. severity: critical/high/medium/low根据影响范围判断
5. triggers: 对象,包含:
- files: 相关文件模式列表(如 "app/tool/*.py"
- functions: 相关函数名列表(从 diff 中提取)
- patterns: 正则匹配模式列表(用于匹配未来的 diff 内容)
- keywords: 关键词列表(中英文都可以)
6. fix_pattern: 对象,包含:
- approach: 修复方法描述
- key_changes: 关键变更点列表
7. verification: 验证检查点列表(未来修改相关代码时应检查的事项)
8. tags: 标签列表(用于分类,如 geocoding, ui, api 等)
只返回 JSON不要其他文字。"""
try:
response = await llm.ask(
messages=[{"role": "user", "content": prompt}],
stream=False,
temperature=0.2,
)
return parse_llm_response(response)
except Exception as e:
print(f" Warning: LLM call failed: {e}")
return extract_from_commit(commit, details)
def extract_from_commit(commit: Dict, details: Dict) -> Dict:
"""从 commit 消息直接提取(无 LLM fallback"""
subject = commit.get("subject", "")
body = details.get("body", "")
files = details.get("files", [])
# 从 scope 提取 tags
scope_match = re.search(r"fix\((\w+)\)", subject, re.IGNORECASE)
tags = [scope_match.group(1)] if scope_match else []
# 清理标题
title = subject
title = re.sub(r"^fix(\([^)]+\))?:\s*", "", title, flags=re.IGNORECASE)
# 提取函数名
functions = []
diff = details.get("diff", "")
func_matches = re.findall(r"def\s+(\w+)\s*\(", diff)
functions = list(set(func_matches))[:5]
return {
"title": title[:50] if title else "Fix commit",
"description": body[:300] if body else subject,
"root_cause": "See commit body for details",
"severity": "medium",
"triggers": {
"files": files[:5],
"functions": functions,
"patterns": [],
"keywords": tags or ["general"],
},
"fix_pattern": {
"approach": title,
"key_changes": [title],
},
"verification": ["Review related code changes"],
"tags": tags or ["general"],
}
def get_next_pm_id(year: int) -> str:
"""获取下一个 postmortem ID"""
POSTMORTEM_DIR.mkdir(exist_ok=True)
existing = list(POSTMORTEM_DIR.glob(f"PM-{year}-*.yaml"))
if not existing:
return f"PM-{year}-001"
max_num = max(int(f.stem.split("-")[-1]) for f in existing)
return f"PM-{year}-{max_num + 1:03d}"
def save_postmortem(pm_data: Dict, commit: Dict, details: Dict, pm_id: str) -> Path:
"""保存 postmortem 到 YAML 文件"""
POSTMORTEM_DIR.mkdir(exist_ok=True)
# 确保 triggers 有完整结构
triggers = pm_data.get("triggers", {})
if not isinstance(triggers, dict):
triggers = {}
output = {
"id": pm_id,
"created_at": datetime.utcnow().isoformat() + "Z",
"source_commit": commit["hash"][:7],
"severity": pm_data.get("severity", "medium"),
"title": pm_data.get("title", "Untitled"),
"description": pm_data.get("description", ""),
"root_cause": pm_data.get("root_cause", ""),
"triggers": {
"files": triggers.get("files", details.get("files", [])[:5]),
"functions": triggers.get("functions", []),
"patterns": triggers.get("patterns", []),
"keywords": triggers.get("keywords", []),
},
"fix_pattern": pm_data.get("fix_pattern", {}),
"verification": pm_data.get("verification", []),
"related": {
"files_changed": details.get("files", []),
},
"tags": pm_data.get("tags", []),
}
filepath = POSTMORTEM_DIR / f"{pm_id}.yaml"
with open(filepath, "w", encoding="utf-8") as f:
yaml.dump(
output, f, allow_unicode=True, default_flow_style=False, sort_keys=False
)
return filepath
async def main():
parser = argparse.ArgumentParser(description="Postmortem Onboarding")
parser.add_argument("--since", help="Start date (YYYY-MM-DD)")
parser.add_argument("--limit", type=int, default=50, help="Max commits to process")
parser.add_argument("--dry-run", action="store_true", help="Preview only, no generation")
parser.add_argument(
"--min-quality",
type=float,
default=0.2,
help="Minimum quality score to generate postmortem",
)
args = parser.parse_args()
print("Fetching fix commits...")
commits = get_fix_commits(since=args.since, limit=args.limit)
print(f"Found {len(commits)} fix commits")
if not commits:
print("No fix commits found.")
return
generated = 0
skipped = 0
for i, commit in enumerate(commits):
print(f"\n[{i + 1}/{len(commits)}] {commit['hash'][:7]}: {commit['subject'][:60]}")
details = get_commit_details(commit["hash"])
quality = assess_commit_quality(commit, details)
print(f" Quality: {quality:.2f}, Files: {len(details['files'])}")
if quality < args.min_quality:
print(f" Skipped: quality below threshold ({args.min_quality})")
skipped += 1
continue
if args.dry_run:
print(" [DRY-RUN] Would generate postmortem")
continue
# 生成 postmortem
pm_data = await generate_postmortem_with_llm(commit, details)
# 生成 ID使用 commit 日期的年份)
year = int(commit["date"][:4])
pm_id = get_next_pm_id(year)
filepath = save_postmortem(pm_data, commit, details, pm_id)
print(f" Saved: {filepath}")
generated += 1
print(f"\n{'=' * 50}")
print(f"Summary: Generated {generated}, Skipped {skipped}")
if generated > 0:
print(f"Postmortems saved to: {POSTMORTEM_DIR}/")
if __name__ == "__main__":
asyncio.run(main())