first commit
This commit is contained in:
357
MeetSpot/tools/postmortem_init.py
Normal file
357
MeetSpot/tools/postmortem_init.py
Normal file
@@ -0,0 +1,357 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Postmortem Onboarding 脚本
|
||||
分析历史 fix commits,生成初始 postmortem 集合
|
||||
|
||||
使用方法:
|
||||
python tools/postmortem_init.py [--since 2025-06-01] [--limit 50] [--dry-run]
|
||||
"""
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
# 添加项目根目录到 Python 路径
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
POSTMORTEM_DIR = Path(__file__).parent.parent / "postmortem"
|
||||
|
||||
|
||||
def get_fix_commits(since: Optional[str] = None, limit: int = 100) -> List[Dict]:
|
||||
"""获取 fix commits 列表"""
|
||||
cmd = [
|
||||
"git",
|
||||
"log",
|
||||
"--grep=^fix",
|
||||
"-i",
|
||||
"--all",
|
||||
"--format=%H|%s|%aI",
|
||||
]
|
||||
if since:
|
||||
cmd.extend(["--since", since])
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, cwd=POSTMORTEM_DIR.parent)
|
||||
commits = []
|
||||
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split("|", 2)
|
||||
if len(parts) >= 3:
|
||||
commits.append(
|
||||
{
|
||||
"hash": parts[0],
|
||||
"subject": parts[1],
|
||||
"date": parts[2],
|
||||
}
|
||||
)
|
||||
|
||||
return commits[:limit]
|
||||
|
||||
|
||||
def get_commit_details(commit_hash: str) -> Dict:
|
||||
"""获取 commit 的详细信息"""
|
||||
cwd = POSTMORTEM_DIR.parent
|
||||
|
||||
# 获取 body
|
||||
body_cmd = ["git", "log", "-1", "--format=%b", commit_hash]
|
||||
body_result = subprocess.run(body_cmd, capture_output=True, text=True, cwd=cwd)
|
||||
body = body_result.stdout.strip()
|
||||
|
||||
# 获取修改的文件
|
||||
files_cmd = ["git", "show", commit_hash, "--name-only", "--format="]
|
||||
files_result = subprocess.run(files_cmd, capture_output=True, text=True, cwd=cwd)
|
||||
files = [f for f in files_result.stdout.strip().split("\n") if f]
|
||||
|
||||
# 获取 diff 内容(限制大小,只看 .py 文件)
|
||||
diff_cmd = ["git", "show", commit_hash, "--stat", "-p", "--", "*.py"]
|
||||
diff_result = subprocess.run(diff_cmd, capture_output=True, text=True, cwd=cwd)
|
||||
diff = diff_result.stdout[:6000] # 限制 6KB
|
||||
|
||||
return {"body": body, "files": files, "diff": diff}
|
||||
|
||||
|
||||
def assess_commit_quality(commit: Dict, details: Dict) -> float:
|
||||
"""评估 commit 消息质量,决定是否值得生成 postmortem"""
|
||||
score = 0.0
|
||||
body = details.get("body", "")
|
||||
subject = commit.get("subject", "")
|
||||
|
||||
# 有详细描述
|
||||
if len(body) > 50:
|
||||
score += 0.3
|
||||
if len(body) > 150:
|
||||
score += 0.2
|
||||
|
||||
# 有问题描述关键词
|
||||
problem_keywords = ["问题", "原因", "修复", "bug", "error", "issue", "cause", "fix"]
|
||||
if any(kw in body.lower() for kw in problem_keywords):
|
||||
score += 0.2
|
||||
|
||||
# 有结构化格式
|
||||
if any(marker in body for marker in ["##", "- ", "1.", "*"]):
|
||||
score += 0.1
|
||||
|
||||
# scope 清晰
|
||||
if "(" in subject and ")" in subject:
|
||||
score += 0.1
|
||||
|
||||
# 修改了重要文件
|
||||
important_patterns = ["recommender", "api/index", "llm", "config"]
|
||||
if any(
|
||||
any(pat in f for pat in important_patterns) for f in details.get("files", [])
|
||||
):
|
||||
score += 0.1
|
||||
|
||||
return min(1.0, score)
|
||||
|
||||
|
||||
def parse_llm_response(response: str) -> Dict:
|
||||
"""健壮的 JSON 解析,处理 LLM 输出的各种格式"""
|
||||
# 尝试直接解析
|
||||
try:
|
||||
return json.loads(response)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 去除 markdown 代码块
|
||||
cleaned = re.sub(r"^```(?:json)?\s*", "", response, flags=re.MULTILINE)
|
||||
cleaned = re.sub(r"\s*```$", "", cleaned, flags=re.MULTILINE)
|
||||
cleaned = cleaned.strip()
|
||||
|
||||
try:
|
||||
return json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 尝试提取 JSON 对象
|
||||
match = re.search(r"\{[\s\S]*\}", cleaned)
|
||||
if match:
|
||||
try:
|
||||
return json.loads(match.group())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 返回基础结构
|
||||
return {
|
||||
"title": "解析失败",
|
||||
"description": response[:500],
|
||||
"severity": "low",
|
||||
"tags": ["parse-failed"],
|
||||
}
|
||||
|
||||
|
||||
async def generate_postmortem_with_llm(commit: Dict, details: Dict) -> Dict:
|
||||
"""使用 LLM 生成 postmortem"""
|
||||
try:
|
||||
from app.llm import LLM
|
||||
|
||||
llm = LLM()
|
||||
except ImportError as e:
|
||||
print(f" Warning: Cannot import LLM module: {e}")
|
||||
return extract_from_commit(commit, details)
|
||||
except Exception as e:
|
||||
print(f" Warning: LLM init failed: {e}")
|
||||
return extract_from_commit(commit, details)
|
||||
|
||||
prompt = f"""分析以下 git fix commit,生成一个 postmortem 条目。
|
||||
|
||||
Commit 信息:
|
||||
- Subject: {commit['subject']}
|
||||
- Date: {commit['date']}
|
||||
- Body: {details.get('body', '(无)')[:1000]}
|
||||
|
||||
修改的文件:
|
||||
{chr(10).join(details['files'][:15])}
|
||||
|
||||
代码变更摘要:
|
||||
{details['diff'][:3000]}
|
||||
|
||||
请生成 JSON 格式的 postmortem,包含以下字段:
|
||||
1. title: 简短标题(中文,10-30字)
|
||||
2. description: 问题描述(2-3句话,描述问题现象和影响)
|
||||
3. root_cause: 根因分析(1-2句话)
|
||||
4. severity: critical/high/medium/low(根据影响范围判断)
|
||||
5. triggers: 对象,包含:
|
||||
- files: 相关文件模式列表(如 "app/tool/*.py")
|
||||
- functions: 相关函数名列表(从 diff 中提取)
|
||||
- patterns: 正则匹配模式列表(用于匹配未来的 diff 内容)
|
||||
- keywords: 关键词列表(中英文都可以)
|
||||
6. fix_pattern: 对象,包含:
|
||||
- approach: 修复方法描述
|
||||
- key_changes: 关键变更点列表
|
||||
7. verification: 验证检查点列表(未来修改相关代码时应检查的事项)
|
||||
8. tags: 标签列表(用于分类,如 geocoding, ui, api 等)
|
||||
|
||||
只返回 JSON,不要其他文字。"""
|
||||
|
||||
try:
|
||||
response = await llm.ask(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
stream=False,
|
||||
temperature=0.2,
|
||||
)
|
||||
return parse_llm_response(response)
|
||||
except Exception as e:
|
||||
print(f" Warning: LLM call failed: {e}")
|
||||
return extract_from_commit(commit, details)
|
||||
|
||||
|
||||
def extract_from_commit(commit: Dict, details: Dict) -> Dict:
|
||||
"""从 commit 消息直接提取(无 LLM fallback)"""
|
||||
subject = commit.get("subject", "")
|
||||
body = details.get("body", "")
|
||||
files = details.get("files", [])
|
||||
|
||||
# 从 scope 提取 tags
|
||||
scope_match = re.search(r"fix\((\w+)\)", subject, re.IGNORECASE)
|
||||
tags = [scope_match.group(1)] if scope_match else []
|
||||
|
||||
# 清理标题
|
||||
title = subject
|
||||
title = re.sub(r"^fix(\([^)]+\))?:\s*", "", title, flags=re.IGNORECASE)
|
||||
|
||||
# 提取函数名
|
||||
functions = []
|
||||
diff = details.get("diff", "")
|
||||
func_matches = re.findall(r"def\s+(\w+)\s*\(", diff)
|
||||
functions = list(set(func_matches))[:5]
|
||||
|
||||
return {
|
||||
"title": title[:50] if title else "Fix commit",
|
||||
"description": body[:300] if body else subject,
|
||||
"root_cause": "See commit body for details",
|
||||
"severity": "medium",
|
||||
"triggers": {
|
||||
"files": files[:5],
|
||||
"functions": functions,
|
||||
"patterns": [],
|
||||
"keywords": tags or ["general"],
|
||||
},
|
||||
"fix_pattern": {
|
||||
"approach": title,
|
||||
"key_changes": [title],
|
||||
},
|
||||
"verification": ["Review related code changes"],
|
||||
"tags": tags or ["general"],
|
||||
}
|
||||
|
||||
|
||||
def get_next_pm_id(year: int) -> str:
|
||||
"""获取下一个 postmortem ID"""
|
||||
POSTMORTEM_DIR.mkdir(exist_ok=True)
|
||||
existing = list(POSTMORTEM_DIR.glob(f"PM-{year}-*.yaml"))
|
||||
if not existing:
|
||||
return f"PM-{year}-001"
|
||||
|
||||
max_num = max(int(f.stem.split("-")[-1]) for f in existing)
|
||||
return f"PM-{year}-{max_num + 1:03d}"
|
||||
|
||||
|
||||
def save_postmortem(pm_data: Dict, commit: Dict, details: Dict, pm_id: str) -> Path:
|
||||
"""保存 postmortem 到 YAML 文件"""
|
||||
POSTMORTEM_DIR.mkdir(exist_ok=True)
|
||||
|
||||
# 确保 triggers 有完整结构
|
||||
triggers = pm_data.get("triggers", {})
|
||||
if not isinstance(triggers, dict):
|
||||
triggers = {}
|
||||
|
||||
output = {
|
||||
"id": pm_id,
|
||||
"created_at": datetime.utcnow().isoformat() + "Z",
|
||||
"source_commit": commit["hash"][:7],
|
||||
"severity": pm_data.get("severity", "medium"),
|
||||
"title": pm_data.get("title", "Untitled"),
|
||||
"description": pm_data.get("description", ""),
|
||||
"root_cause": pm_data.get("root_cause", ""),
|
||||
"triggers": {
|
||||
"files": triggers.get("files", details.get("files", [])[:5]),
|
||||
"functions": triggers.get("functions", []),
|
||||
"patterns": triggers.get("patterns", []),
|
||||
"keywords": triggers.get("keywords", []),
|
||||
},
|
||||
"fix_pattern": pm_data.get("fix_pattern", {}),
|
||||
"verification": pm_data.get("verification", []),
|
||||
"related": {
|
||||
"files_changed": details.get("files", []),
|
||||
},
|
||||
"tags": pm_data.get("tags", []),
|
||||
}
|
||||
|
||||
filepath = POSTMORTEM_DIR / f"{pm_id}.yaml"
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
yaml.dump(
|
||||
output, f, allow_unicode=True, default_flow_style=False, sort_keys=False
|
||||
)
|
||||
|
||||
return filepath
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(description="Postmortem Onboarding")
|
||||
parser.add_argument("--since", help="Start date (YYYY-MM-DD)")
|
||||
parser.add_argument("--limit", type=int, default=50, help="Max commits to process")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Preview only, no generation")
|
||||
parser.add_argument(
|
||||
"--min-quality",
|
||||
type=float,
|
||||
default=0.2,
|
||||
help="Minimum quality score to generate postmortem",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Fetching fix commits...")
|
||||
commits = get_fix_commits(since=args.since, limit=args.limit)
|
||||
print(f"Found {len(commits)} fix commits")
|
||||
|
||||
if not commits:
|
||||
print("No fix commits found.")
|
||||
return
|
||||
|
||||
generated = 0
|
||||
skipped = 0
|
||||
|
||||
for i, commit in enumerate(commits):
|
||||
print(f"\n[{i + 1}/{len(commits)}] {commit['hash'][:7]}: {commit['subject'][:60]}")
|
||||
|
||||
details = get_commit_details(commit["hash"])
|
||||
quality = assess_commit_quality(commit, details)
|
||||
|
||||
print(f" Quality: {quality:.2f}, Files: {len(details['files'])}")
|
||||
|
||||
if quality < args.min_quality:
|
||||
print(f" Skipped: quality below threshold ({args.min_quality})")
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
if args.dry_run:
|
||||
print(" [DRY-RUN] Would generate postmortem")
|
||||
continue
|
||||
|
||||
# 生成 postmortem
|
||||
pm_data = await generate_postmortem_with_llm(commit, details)
|
||||
|
||||
# 生成 ID(使用 commit 日期的年份)
|
||||
year = int(commit["date"][:4])
|
||||
pm_id = get_next_pm_id(year)
|
||||
|
||||
filepath = save_postmortem(pm_data, commit, details, pm_id)
|
||||
print(f" Saved: {filepath}")
|
||||
generated += 1
|
||||
|
||||
print(f"\n{'=' * 50}")
|
||||
print(f"Summary: Generated {generated}, Skipped {skipped}")
|
||||
if generated > 0:
|
||||
print(f"Postmortems saved to: {POSTMORTEM_DIR}/")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user