#!/usr/bin/env python3 """ 为单个 fix commit 生成 postmortem 使用方法: python tools/postmortem_generate.py --commit abc1234 [--output postmortem/] 用于 GitHub Actions 中自动为新的 fix commit 生成 postmortem。 """ import argparse import asyncio import json import re import subprocess import sys from datetime import datetime, timezone from pathlib import Path from typing import Dict, Optional import yaml # 添加项目根目录到 Python 路径 sys.path.insert(0, str(Path(__file__).parent.parent)) POSTMORTEM_DIR = Path(__file__).parent.parent / "postmortem" def is_fix_commit(commit_hash: str) -> bool: """检查是否是 fix commit""" cwd = POSTMORTEM_DIR.parent cmd = ["git", "log", "-1", "--format=%s", commit_hash] result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd) subject = result.stdout.strip().lower() return subject.startswith("fix") def get_commit_info(commit_hash: str) -> Dict: """获取 commit 详细信息""" cwd = POSTMORTEM_DIR.parent # subject cmd = ["git", "log", "-1", "--format=%s", commit_hash] result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd) subject = result.stdout.strip() # body cmd = ["git", "log", "-1", "--format=%b", commit_hash] result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd) body = result.stdout.strip() # date cmd = ["git", "log", "-1", "--format=%aI", commit_hash] result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd) date = result.stdout.strip() # files changed cmd = ["git", "show", commit_hash, "--name-only", "--format="] result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd) files = [f for f in result.stdout.strip().split("\n") if f] # diff (仅 Python 文件) cmd = ["git", "show", commit_hash, "-p", "--", "*.py"] result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd) diff = result.stdout[:6000] return { "hash": commit_hash[:7], "full_hash": commit_hash, "subject": subject, "body": body, "date": date, "files": files, "diff": diff, } def get_next_pm_id(output_dir: Path) -> str: """获取下一个 postmortem ID""" year = datetime.now().year output_dir.mkdir(exist_ok=True) existing = list(output_dir.glob(f"PM-{year}-*.yaml")) if not existing: return f"PM-{year}-001" max_num = max(int(f.stem.split("-")[-1]) for f in existing) return f"PM-{year}-{max_num + 1:03d}" def parse_llm_response(response: str) -> Dict: """解析 LLM 响应""" # 尝试直接解析 try: return json.loads(response) except json.JSONDecodeError: pass # 去除 markdown 代码块 cleaned = re.sub(r"^```(?:json)?\s*", "", response, flags=re.MULTILINE) cleaned = re.sub(r"\s*```$", "", cleaned, flags=re.MULTILINE) cleaned = cleaned.strip() try: return json.loads(cleaned) except json.JSONDecodeError: pass # 尝试提取 JSON 对象 match = re.search(r"\{[\s\S]*\}", cleaned) if match: try: return json.loads(match.group()) except json.JSONDecodeError: pass return {"title": "Parse failed", "severity": "low", "tags": ["parse-error"]} def extract_from_commit(info: Dict) -> Dict: """从 commit 消息直接提取(无 LLM fallback)""" subject = info.get("subject", "") body = info.get("body", "") files = info.get("files", []) diff = info.get("diff", "") # 从 scope 提取 tags scope_match = re.search(r"fix\((\w+)\)", subject, re.IGNORECASE) tags = [scope_match.group(1)] if scope_match else [] # 清理标题 title = subject title = re.sub(r"^fix(\([^)]+\))?:\s*", "", title, flags=re.IGNORECASE) # 提取函数名 func_matches = re.findall(r"def\s+(\w+)\s*\(", diff) functions = list(set(func_matches))[:5] return { "title": title[:50] if title else "Fix commit", "description": body[:300] if body else subject, "root_cause": "See commit body for details", "severity": "medium", "triggers": { "files": files[:5], "functions": functions, "patterns": [], "keywords": tags or ["general"], }, "fix_pattern": { "approach": title, "key_changes": [title], }, "verification": ["Review related code changes"], "tags": tags or ["general"], } async def generate_with_llm(info: Dict) -> Dict: """使用 LLM 生成 postmortem""" try: from app.llm import LLM llm = LLM() except ImportError as e: print(f"Warning: Cannot import LLM module: {e}") return extract_from_commit(info) except Exception as e: print(f"Warning: LLM init failed: {e}") return extract_from_commit(info) prompt = f"""分析这个 fix commit,生成 postmortem JSON: Commit: {info['subject']} Body: {info['body'][:1000]} Files: {', '.join(info['files'][:10])} Diff preview: {info['diff'][:2500]} 返回 JSON 格式: {{ "title": "简短标题(中文)", "description": "问题描述(2-3句话)", "root_cause": "根因分析", "severity": "medium", "triggers": {{ "files": ["相关文件模式"], "functions": ["相关函数名"], "patterns": ["正则模式"], "keywords": ["关键词"] }}, "fix_pattern": {{ "approach": "修复方法", "key_changes": ["关键变更"] }}, "verification": ["验证点"], "tags": ["标签"] }} 只返回 JSON,不要其他文字。""" try: response = await llm.ask( messages=[{"role": "user", "content": prompt}], stream=False, temperature=0.2, ) return parse_llm_response(response) except Exception as e: print(f"Warning: LLM call failed: {e}") return extract_from_commit(info) def save_postmortem(data: Dict, info: Dict, pm_id: str, output_dir: Path) -> Path: """保存 postmortem""" output_dir.mkdir(exist_ok=True) # 确保 triggers 有完整结构 triggers = data.get("triggers", {}) if not isinstance(triggers, dict): triggers = {} pm = { "id": pm_id, "created_at": datetime.now(timezone.utc).isoformat(), "source_commit": info["hash"], "severity": data.get("severity", "medium"), "title": data.get("title", "Untitled"), "description": data.get("description", ""), "root_cause": data.get("root_cause", ""), "triggers": { "files": triggers.get("files", info.get("files", [])[:5]), "functions": triggers.get("functions", []), "patterns": triggers.get("patterns", []), "keywords": triggers.get("keywords", []), }, "fix_pattern": data.get("fix_pattern", {}), "verification": data.get("verification", []), "related": { "files_changed": info.get("files", []), }, "tags": data.get("tags", []), } filepath = output_dir / f"{pm_id}.yaml" with open(filepath, "w", encoding="utf-8") as f: yaml.dump(pm, f, allow_unicode=True, default_flow_style=False, sort_keys=False) return filepath def check_duplicate(info: Dict, output_dir: Path) -> Optional[str]: """检查是否已存在相同 commit 的 postmortem""" if not output_dir.exists(): return None for f in output_dir.glob("PM-*.yaml"): try: with open(f, encoding="utf-8") as fp: pm = yaml.safe_load(fp) if pm and pm.get("source_commit") == info["hash"]: return f.name except Exception: continue return None async def main(): parser = argparse.ArgumentParser(description="Generate postmortem for a fix commit") parser.add_argument("--commit", required=True, help="Commit hash") parser.add_argument("--output", default="postmortem", help="Output directory") parser.add_argument( "--force", action="store_true", help="Force generation even if not a fix commit", ) parser.add_argument( "--no-llm", action="store_true", help="Skip LLM, use rule-based extraction only", ) args = parser.parse_args() output_dir = Path(args.output) if not output_dir.is_absolute(): output_dir = POSTMORTEM_DIR.parent / args.output # 检查是否是 fix commit if not args.force and not is_fix_commit(args.commit): print(f"Commit {args.commit} is not a fix commit. Skipping.") print("Use --force to generate anyway.") return # 获取 commit 信息 info = get_commit_info(args.commit) print(f"Processing: {info['subject'][:60]}") # 检查重复 existing = check_duplicate(info, output_dir) if existing: print(f"Postmortem already exists: {existing}") return # 生成 postmortem if args.no_llm: data = extract_from_commit(info) else: data = await generate_with_llm(info) # 保存 pm_id = get_next_pm_id(output_dir) filepath = save_postmortem(data, info, pm_id, output_dir) print(f"Generated: {filepath}") if __name__ == "__main__": asyncio.run(main())