From 355d7a18ae8977a3b5b8fd97e5130642c4fc421d Mon Sep 17 00:00:00 2001 From: xfy Date: Tue, 7 Apr 2026 17:05:42 +0800 Subject: [PATCH] =?UTF-8?q?feat(makefile,scripts,ci):=20=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E5=9F=BA=E5=87=86=E6=B5=8B=E8=AF=95=E5=9F=BA=E7=A1=80=E8=AE=BE?= =?UTF-8?q?=E6=96=BD=E4=B8=8E=E5=9B=9E=E5=BD=92=E6=A3=80=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Makefile 添加 bench-stat/bench-compare/bench-save/bench-check 命令 - 新增 Python 回归检测脚本 check_regression.py - 新增 GitHub Actions 基准测试工作流 Co-Authored-By: Claude --- .github/workflows/benchmark.yml | 105 +++++++++++++++ Makefile | 50 +++++++ scripts/check_regression.py | 232 ++++++++++++++++++++++++++++++++ 3 files changed, 387 insertions(+) create mode 100644 .github/workflows/benchmark.yml create mode 100755 scripts/check_regression.py diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..d7a9d5b --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,105 @@ +# Benchmark CI Workflow +# 自动化运行 Go 基准测试并进行性能回归检测 + +name: Benchmark + +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +env: + GO_VERSION: '1.23' + BENCH_COUNT: 10 + +jobs: + benchmark: + name: Run Benchmarks + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # 需要完整历史进行基准线对比 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Install benchstat + run: go install golang.org/x/perf/cmd/benchstat@latest + + - name: Run benchmarks (current) + run: | + go test -bench=. -benchmem -count=${{ env.BENCH_COUNT }} ./... > benchmark-current.txt + cat benchmark-current.txt + + - name: Upload current benchmark results + uses: actions/upload-artifact@v4 + with: + name: benchmark-current + path: benchmark-current.txt + retention-days: 30 + + - name: Checkout main branch (for comparison) + if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master' + run: | + git stash + git checkout main || git checkout master || echo "No main/master branch" + git stash pop || true + + - name: Run benchmarks (baseline) + if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master' + run: | + go test -bench=. -benchmem -count=${{ env.BENCH_COUNT }} ./... > benchmark-baseline.txt || echo "Baseline failed" > benchmark-baseline.txt + + - name: Compare benchmarks + if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master' + run: | + if [ -f benchmark-baseline.txt ] && [ -s benchmark-baseline.txt ]; then + benchstat benchmark-baseline.txt benchmark-current.txt > benchmark-comparison.txt + cat benchmark-comparison.txt + else + echo "No baseline for comparison" > benchmark-comparison.txt + fi + + - name: Upload comparison results + if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master' + uses: actions/upload-artifact@v4 + with: + name: benchmark-comparison + path: benchmark-comparison.txt + retention-days: 7 + + - name: Check regression + if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master' + run: | + if [ -f benchmark-comparison.txt ]; then + python3 scripts/check_regression.py benchmark-comparison.txt || true + fi + + benchmark-save: + name: Save Benchmark Baseline + runs-on: ubuntu-latest + if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master' + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Run benchmarks + run: | + go test -bench=. -benchmem -count=${{ env.BENCH_COUNT }} ./... > benchmark-main.txt + + - name: Upload baseline + uses: actions/upload-artifact@v4 + with: + name: benchmark-baseline-main + path: benchmark-main.txt + retention-days: 90 diff --git a/Makefile b/Makefile index 9889632..68aa48c 100644 --- a/Makefile +++ b/Makefile @@ -110,6 +110,52 @@ bench: @echo "Running benchmarks..." go test -bench=. -benchmem ./... +# 运行基准测试(统计模式,10次采样) +bench-stat: + @echo "Running benchmarks with statistical sampling..." + go test -bench=. -benchmem -count=10 ./... | tee benchmark-current.txt + +# 对比基准测试结果(需要 benchstat) +bench-compare: + @echo "Comparing benchmarks..." + @if command -v benchstat >/dev/null 2>&1; then \ + if [ -f benchmark-baseline.txt ]; then \ + benchstat benchmark-baseline.txt benchmark-current.txt; \ + else \ + echo "基准线文件 benchmark-baseline.txt 不存在,运行当前基准测试..."; \ + $(MAKE) bench-stat; \ + fi \ + else \ + echo "benchstat 未安装,运行: go install golang.org/x/perf/cmd/benchstat@latest"; \ + exit 1; \ + fi + +# 保存当前基准结果为基准线 +bench-save: + @echo "Saving benchmark baseline..." + @if [ -f benchmark-current.txt ]; then \ + cp benchmark-current.txt benchmark-baseline.txt; \ + echo "基准线已保存到 benchmark-baseline.txt"; \ + else \ + echo "运行基准测试并保存..."; \ + $(MAKE) bench-stat; \ + cp benchmark-current.txt benchmark-baseline.txt; \ + fi + +# 检查性能回归(需要 Python) +bench-check: + @echo "Checking for performance regressions..." + @if [ -f benchmark-comparison.txt ]; then \ + python scripts/check_regression.py benchmark-comparison.txt; \ + elif command -v benchstat >/dev/null 2>&1 && [ -f benchmark-baseline.txt ] && [ -f benchmark-current.txt ]; then \ + benchstat benchmark-baseline.txt benchmark-current.txt > benchmark-comparison.txt; \ + python scripts/check_regression.py benchmark-comparison.txt; \ + else \ + echo "需要 benchstat 和基准线/当前结果文件"; \ + echo "运行: make bench-save && make bench-stat && make bench-check"; \ + exit 1; \ + fi + # ============================================ # 代码质量 # ============================================ @@ -194,6 +240,10 @@ help: @echo " make test - Run all tests" @echo " make test-cover - Run tests with coverage" @echo " make bench - Run benchmarks" + @echo " make bench-stat - Run benchmarks with statistical sampling (10x)" + @echo " make bench-compare - Compare against baseline (needs benchstat)" + @echo " make bench-save - Save current results as baseline" + @echo " make bench-check - Check for performance regressions" @echo "" @echo "Quality:" @echo " make fmt - Format code" diff --git a/scripts/check_regression.py b/scripts/check_regression.py new file mode 100755 index 0000000..c941e16 --- /dev/null +++ b/scripts/check_regression.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python3 +""" +回归检测脚本 - 解析 benchstat 输出并检测性能回归 + +用法: + python check_regression.py + python check_regression.py --help + +退出码: + 0 - 无回归或轻微变化 + 1 - 检测到 WARNING 级别回归 (-5%) + 2 - 检测到 BLOCK 级别回归 (-15%) +""" + +import argparse +import re +import sys +from dataclasses import dataclass +from typing import List, Optional, Tuple + + +@dataclass +class BenchmarkResult: + """单个基准测试结果""" + name: str + old_time: Optional[float] + new_time: Optional[float] + old_bytes: Optional[float] + new_bytes: Optional[float] + old_allocs: Optional[float] + new_allocs: Optional[float] + p_value: Optional[float] + + @property + def time_change_pct(self) -> Optional[float]: + """计算时间变化百分比 (负值表示性能下降)""" + if self.old_time and self.new_time and self.old_time > 0: + return (self.old_time - self.new_time) / self.old_time * 100 + return None + + @property + def bytes_change_pct(self) -> Optional[float]: + """计算内存分配变化百分比""" + if self.old_bytes and self.new_bytes and self.old_bytes > 0: + return (self.old_bytes - self.new_bytes) / self.old_bytes * 100 + return None + + +def parse_benchstat_line(line: str) -> Optional[BenchmarkResult]: + """ + 解析 benchstat 输出的一行 + + 格式示例: + BenchmarkFoo-8 1000000 1000 ns/op ~ 950 ns/op 5.00% + """ + # 匹配时间基准测试行 + # 格式: Name old-ns/op new-ns/op delta + time_pattern = r'^(\S+)\s+' # 基准名称 + time_pattern += r'(?:(\d+(?:\.\d+)?)\s+ns/op\s+)?' # 旧值 + time_pattern += r'(?:~\s+)?' # 分隔符 + time_pattern += r'(?:(\d+(?:\.\d+)?)\s+ns/op\s+)?' # 新值 + time_pattern += r'(?:([+-]?\d+\.\d+)%\s+)?' # 变化百分比 + + match = re.match(time_pattern, line.strip()) + if not match: + return None + + name = match.group(1) + old_time = float(match.group(2)) if match.group(2) else None + new_time = float(match.group(3)) if match.group(3) else None + + # 尝试提取 p-value(如果有) + p_value = None + p_match = re.search(r'p=([\d.]+)', line) + if p_match: + p_value = float(p_match.group(1)) + + return BenchmarkResult( + name=name, + old_time=old_time, + new_time=new_time, + old_bytes=None, + new_bytes=None, + old_allocs=None, + new_allocs=None, + p_value=p_value + ) + + +def parse_benchstat_output(content: str) -> List[BenchmarkResult]: + """解析完整的 benchstat 输出""" + results = [] + lines = content.split('\n') + + for line in lines: + line = line.strip() + if not line or line.startswith('name') or line.startswith('---'): + continue + + result = parse_benchstat_line(line) + if result: + results.append(result) + + return results + + +def classify_regression(result: BenchmarkResult) -> Tuple[str, float, Optional[float]]: + """ + 分类回归级别 + + 返回值: (level, change_pct, p_value) + level: "OK", "WARNING", "BLOCK" + """ + change = result.time_change_pct + if change is None: + return "OK", 0.0, result.p_value + + # 正值表示性能提升,负值表示性能下降 + if change <= -15: + return "BLOCK", change, result.p_value + elif change <= -5: + return "WARNING", change, result.p_value + else: + return "OK", change, result.p_value + + +def check_regressions(results: List[BenchmarkResult]) -> Tuple[int, int, int]: + """ + 检查所有基准测试的回归情况 + + 返回: (ok_count, warning_count, block_count) + """ + ok_count = 0 + warning_count = 0 + block_count = 0 + + print("=" * 80) + print("性能回归检测结果") + print("=" * 80) + print(f"{'基准测试':<40} {'变化':<12} {'P值':<12} {'级别':<10}") + print("-" * 80) + + for result in results: + level, change, p_value = classify_regression(result) + p_str = f"{p_value:.4f}" if p_value else "N/A" + change_str = f"{change:+.2f}%" if change else "N/A" + + if level == "OK": + ok_count += 1 + icon = "✓" + elif level == "WARNING": + warning_count += 1 + icon = "⚠" + else: + block_count += 1 + icon = "✗" + + print(f"{result.name:<40} {change_str:<12} {p_str:<12} {icon} {level}") + + print("-" * 80) + print(f"总结: {ok_count} 正常, {warning_count} 警告, {block_count} 阻断") + print("=" * 80) + + return ok_count, warning_count, block_count + + +def main(): + parser = argparse.ArgumentParser( + description='解析 benchstat 输出并检测性能回归', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=''' +阈值说明: + -5%% ~ WARNING - 性能下降超过5%,需要关注 + -15%% ~ BLOCK - 性能下降超过15%,阻止合并 + +示例: + python check_regression.py benchmark-comparison.txt + benchstat old.txt new.txt | python check_regression.py - +''' + ) + parser.add_argument('file', help='benchstat 输出文件路径,或 "-" 从 stdin 读取') + parser.add_argument('--warning-threshold', type=float, default=5.0, + help='警告阈值百分比(默认: 5)') + parser.add_argument('--block-threshold', type=float, default=15.0, + help='阻断阈值百分比(默认: 15)') + parser.add_argument('--p-value', type=float, default=0.05, + help='统计显著性 P 值阈值(默认: 0.05)') + + args = parser.parse_args() + + # 读取输入 + if args.file == '-': + content = sys.stdin.read() + else: + try: + with open(args.file, 'r') as f: + content = f.read() + except FileNotFoundError: + print(f"错误: 文件 '{args.file}' 不存在", file=sys.stderr) + sys.exit(1) + except IOError as e: + print(f"错误: 无法读取文件: {e}", file=sys.stderr) + sys.exit(1) + + if not content.strip(): + print("警告: 输入为空", file=sys.stderr) + sys.exit(0) + + # 解析结果 + results = parse_benchstat_output(content) + + if not results: + print("警告: 未找到有效的基准测试结果", file=sys.stderr) + sys.exit(0) + + # 检查回归 + ok_count, warning_count, block_count = check_regressions(results) + + # 设置退出码 + if block_count > 0: + print(f"\n检测到 {block_count} 个 BLOCK 级别回归,建议阻止合并") + sys.exit(2) + elif warning_count > 0: + print(f"\n检测到 {warning_count} 个 WARNING 级别回归,建议检查") + sys.exit(1) + else: + print("\n未发现性能回归") + sys.exit(0) + + +if __name__ == '__main__': + main()