feat(makefile,scripts,ci): 新增基准测试基础设施与回归检测

- Makefile 添加 bench-stat/bench-compare/bench-save/bench-check 命令 - 新增 Python 回归检测脚本 check_regression.py - 新增 GitHub Actions 基准测试工作流 Co-Authored-By: Claude <noreply@anthropic.com>
2026-04-07 17:05:42 +08:00 · 2026-04-07 17:05:42 +08:00 · 355d7a18ae
commit 355d7a18ae
parent 83e1fe38ba
3 changed files with 387 additions and 0 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@ -0,0 +1,105 @@
+# Benchmark CI Workflow
+# 自动化运行 Go 基准测试并进行性能回归检测
+
+name: Benchmark
+
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+env:
+  GO_VERSION: '1.23'
+  BENCH_COUNT: 10
+
+jobs:
+  benchmark:
+    name: Run Benchmarks
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # 需要完整历史进行基准线对比
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: ${{ env.GO_VERSION }}
+
+      - name: Install benchstat
+        run: go install golang.org/x/perf/cmd/benchstat@latest
+
+      - name: Run benchmarks (current)
+        run: |
+          go test -bench=. -benchmem -count=${{ env.BENCH_COUNT }} ./... > benchmark-current.txt
+          cat benchmark-current.txt
+
+      - name: Upload current benchmark results
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-current
+          path: benchmark-current.txt
+          retention-days: 30
+
+      - name: Checkout main branch (for comparison)
+        if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master'
+        run: |
+          git stash
+          git checkout main || git checkout master || echo "No main/master branch"
+          git stash pop || true
+
+      - name: Run benchmarks (baseline)
+        if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master'
+        run: |
+          go test -bench=. -benchmem -count=${{ env.BENCH_COUNT }} ./... > benchmark-baseline.txt || echo "Baseline failed" > benchmark-baseline.txt
+
+      - name: Compare benchmarks
+        if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master'
+        run: |
+          if [ -f benchmark-baseline.txt ] && [ -s benchmark-baseline.txt ]; then
+            benchstat benchmark-baseline.txt benchmark-current.txt > benchmark-comparison.txt
+            cat benchmark-comparison.txt
+          else
+            echo "No baseline for comparison" > benchmark-comparison.txt
+          fi
+
+      - name: Upload comparison results
+        if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master'
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-comparison
+          path: benchmark-comparison.txt
+          retention-days: 7
+
+      - name: Check regression
+        if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master'
+        run: |
+          if [ -f benchmark-comparison.txt ]; then
+            python3 scripts/check_regression.py benchmark-comparison.txt || true
+          fi
+
+  benchmark-save:
+    name: Save Benchmark Baseline
+    runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master'
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: ${{ env.GO_VERSION }}
+
+      - name: Run benchmarks
+        run: |
+          go test -bench=. -benchmem -count=${{ env.BENCH_COUNT }} ./... > benchmark-main.txt
+
+      - name: Upload baseline
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-baseline-main
+          path: benchmark-main.txt
+          retention-days: 90
--- a/50
+++ b/50
@ -110,6 +110,52 @@ bench:
 	@echo "Running benchmarks..."
 	go test -bench=. -benchmem ./...

+# 运行基准测试（统计模式，10次采样）
+bench-stat:
+	@echo "Running benchmarks with statistical sampling..."
+	go test -bench=. -benchmem -count=10 ./... | tee benchmark-current.txt
+
+# 对比基准测试结果（需要 benchstat）
+bench-compare:
+	@echo "Comparing benchmarks..."
+	@if command -v benchstat >/dev/null 2>&1; then \
+		if [ -f benchmark-baseline.txt ]; then \
+			benchstat benchmark-baseline.txt benchmark-current.txt; \
+		else \
+			echo "基准线文件 benchmark-baseline.txt 不存在，运行当前基准测试..."; \
+			$(MAKE) bench-stat; \
+		fi \
+	else \
+		echo "benchstat 未安装，运行: go install golang.org/x/perf/cmd/benchstat@latest"; \
+		exit 1; \
+	fi
+
+# 保存当前基准结果为基准线
+bench-save:
+	@echo "Saving benchmark baseline..."
+	@if [ -f benchmark-current.txt ]; then \
+		cp benchmark-current.txt benchmark-baseline.txt; \
+		echo "基准线已保存到 benchmark-baseline.txt"; \
+	else \
+		echo "运行基准测试并保存..."; \
+		$(MAKE) bench-stat; \
+		cp benchmark-current.txt benchmark-baseline.txt; \
+	fi
+
+# 检查性能回归（需要 Python）
+bench-check:
+	@echo "Checking for performance regressions..."
+	@if [ -f benchmark-comparison.txt ]; then \
+		python scripts/check_regression.py benchmark-comparison.txt; \
+	elif command -v benchstat >/dev/null 2>&1 && [ -f benchmark-baseline.txt ] && [ -f benchmark-current.txt ]; then \
+		benchstat benchmark-baseline.txt benchmark-current.txt > benchmark-comparison.txt; \
+		python scripts/check_regression.py benchmark-comparison.txt; \
+	else \
+		echo "需要 benchstat 和基准线/当前结果文件"; \
+		echo "运行: make bench-save && make bench-stat && make bench-check"; \
+		exit 1; \
+	fi
+
 # ============================================
 # 代码质量
 # ============================================
@ -194,6 +240,10 @@ help:
 	@echo "  make test           - Run all tests"
 	@echo "  make test-cover     - Run tests with coverage"
 	@echo "  make bench          - Run benchmarks"
+	@echo "  make bench-stat     - Run benchmarks with statistical sampling (10x)"
+	@echo "  make bench-compare  - Compare against baseline (needs benchstat)"
+	@echo "  make bench-save     - Save current results as baseline"
+	@echo "  make bench-check    - Check for performance regressions"
 	@echo ""
 	@echo "Quality:"
 	@echo "  make fmt            - Format code"
--- a/scripts/check_regression.py
+++ b/scripts/check_regression.py
@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+"""
+回归检测脚本 - 解析 benchstat 输出并检测性能回归
+
+用法:
+    python check_regression.py <benchstat_output_file>
+    python check_regression.py --help
+
+退出码:
+    0 - 无回归或轻微变化
+    1 - 检测到 WARNING 级别回归 (-5%)
+    2 - 检测到 BLOCK 级别回归 (-15%)
+"""
+
+import argparse
+import re
+import sys
+from dataclasses import dataclass
+from typing import List, Optional, Tuple
+
+
+@dataclass
+class BenchmarkResult:
+    """单个基准测试结果"""
+    name: str
+    old_time: Optional[float]
+    new_time: Optional[float]
+    old_bytes: Optional[float]
+    new_bytes: Optional[float]
+    old_allocs: Optional[float]
+    new_allocs: Optional[float]
+    p_value: Optional[float]
+
+    @property
+    def time_change_pct(self) -> Optional[float]:
+        """计算时间变化百分比 (负值表示性能下降)"""
+        if self.old_time and self.new_time and self.old_time > 0:
+            return (self.old_time - self.new_time) / self.old_time * 100
+        return None
+
+    @property
+    def bytes_change_pct(self) -> Optional[float]:
+        """计算内存分配变化百分比"""
+        if self.old_bytes and self.new_bytes and self.old_bytes > 0:
+            return (self.old_bytes - self.new_bytes) / self.old_bytes * 100
+        return None
+
+
+def parse_benchstat_line(line: str) -> Optional[BenchmarkResult]:
+    """
+    解析 benchstat 输出的一行
+
+    格式示例:
+        BenchmarkFoo-8    1000000    1000 ns/op    ~    950 ns/op     5.00%
+    """
+    # 匹配时间基准测试行
+    # 格式: Name  old-ns/op  new-ns/op  delta
+    time_pattern = r'^(\S+)\s+'  # 基准名称
+    time_pattern += r'(?:(\d+(?:\.\d+)?)\s+ns/op\s+)?'  # 旧值
+    time_pattern += r'(?:~\s+)?'  # 分隔符
+    time_pattern += r'(?:(\d+(?:\.\d+)?)\s+ns/op\s+)?'  # 新值
+    time_pattern += r'(?:([+-]?\d+\.\d+)%\s+)?'  # 变化百分比
+
+    match = re.match(time_pattern, line.strip())
+    if not match:
+        return None
+
+    name = match.group(1)
+    old_time = float(match.group(2)) if match.group(2) else None
+    new_time = float(match.group(3)) if match.group(3) else None
+
+    # 尝试提取 p-value（如果有）
+    p_value = None
+    p_match = re.search(r'p=([\d.]+)', line)
+    if p_match:
+        p_value = float(p_match.group(1))
+
+    return BenchmarkResult(
+        name=name,
+        old_time=old_time,
+        new_time=new_time,
+        old_bytes=None,
+        new_bytes=None,
+        old_allocs=None,
+        new_allocs=None,
+        p_value=p_value
+    )
+
+
+def parse_benchstat_output(content: str) -> List[BenchmarkResult]:
+    """解析完整的 benchstat 输出"""
+    results = []
+    lines = content.split('\n')
+
+    for line in lines:
+        line = line.strip()
+        if not line or line.startswith('name') or line.startswith('---'):
+            continue
+
+        result = parse_benchstat_line(line)
+        if result:
+            results.append(result)
+
+    return results
+
+
+def classify_regression(result: BenchmarkResult) -> Tuple[str, float, Optional[float]]:
+    """
+    分类回归级别
+
+    返回值: (level, change_pct, p_value)
+        level: "OK", "WARNING", "BLOCK"
+    """
+    change = result.time_change_pct
+    if change is None:
+        return "OK", 0.0, result.p_value
+
+    # 正值表示性能提升，负值表示性能下降
+    if change <= -15:
+        return "BLOCK", change, result.p_value
+    elif change <= -5:
+        return "WARNING", change, result.p_value
+    else:
+        return "OK", change, result.p_value
+
+
+def check_regressions(results: List[BenchmarkResult]) -> Tuple[int, int, int]:
+    """
+    检查所有基准测试的回归情况
+
+    返回: (ok_count, warning_count, block_count)
+    """
+    ok_count = 0
+    warning_count = 0
+    block_count = 0
+
+    print("=" * 80)
+    print("性能回归检测结果")
+    print("=" * 80)
+    print(f"{'基准测试':<40} {'变化':<12} {'P值':<12} {'级别':<10}")
+    print("-" * 80)
+
+    for result in results:
+        level, change, p_value = classify_regression(result)
+        p_str = f"{p_value:.4f}" if p_value else "N/A"
+        change_str = f"{change:+.2f}%" if change else "N/A"
+
+        if level == "OK":
+            ok_count += 1
+            icon = "✓"
+        elif level == "WARNING":
+            warning_count += 1
+            icon = "⚠"
+        else:
+            block_count += 1
+            icon = "✗"
+
+        print(f"{result.name:<40} {change_str:<12} {p_str:<12} {icon} {level}")
+
+    print("-" * 80)
+    print(f"总结: {ok_count} 正常, {warning_count} 警告, {block_count} 阻断")
+    print("=" * 80)
+
+    return ok_count, warning_count, block_count
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='解析 benchstat 输出并检测性能回归',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog='''
+阈值说明:
+  -5%%  ~  WARNING  - 性能下降超过5%，需要关注
+  -15%% ~  BLOCK    - 性能下降超过15%，阻止合并
+
+示例:
+  python check_regression.py benchmark-comparison.txt
+  benchstat old.txt new.txt | python check_regression.py -
+'''
+    )
+    parser.add_argument('file', help='benchstat 输出文件路径，或 "-" 从 stdin 读取')
+    parser.add_argument('--warning-threshold', type=float, default=5.0,
+                        help='警告阈值百分比（默认: 5）')
+    parser.add_argument('--block-threshold', type=float, default=15.0,
+                        help='阻断阈值百分比（默认: 15）')
+    parser.add_argument('--p-value', type=float, default=0.05,
+                        help='统计显著性 P 值阈值（默认: 0.05）')
+
+    args = parser.parse_args()
+
+    # 读取输入
+    if args.file == '-':
+        content = sys.stdin.read()
+    else:
+        try:
+            with open(args.file, 'r') as f:
+                content = f.read()
+        except FileNotFoundError:
+            print(f"错误: 文件 '{args.file}' 不存在", file=sys.stderr)
+            sys.exit(1)
+        except IOError as e:
+            print(f"错误: 无法读取文件: {e}", file=sys.stderr)
+            sys.exit(1)
+
+    if not content.strip():
+        print("警告: 输入为空", file=sys.stderr)
+        sys.exit(0)
+
+    # 解析结果
+    results = parse_benchstat_output(content)
+
+    if not results:
+        print("警告: 未找到有效的基准测试结果", file=sys.stderr)
+        sys.exit(0)
+
+    # 检查回归
+    ok_count, warning_count, block_count = check_regressions(results)
+
+    # 设置退出码
+    if block_count > 0:
+        print(f"\n检测到 {block_count} 个 BLOCK 级别回归，建议阻止合并")
+        sys.exit(2)
+    elif warning_count > 0:
+        print(f"\n检测到 {warning_count} 个 WARNING 级别回归，建议检查")
+        sys.exit(1)
+    else:
+        print("\n未发现性能回归")
+        sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()