feat(makefile,scripts,ci): 新增基准测试基础设施与回归检测
- Makefile 添加 bench-stat/bench-compare/bench-save/bench-check 命令 - 新增 Python 回归检测脚本 check_regression.py - 新增 GitHub Actions 基准测试工作流 Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
83e1fe38ba
commit
355d7a18ae
105
.github/workflows/benchmark.yml
vendored
Normal file
105
.github/workflows/benchmark.yml
vendored
Normal file
@ -0,0 +1,105 @@
|
||||
# Benchmark CI Workflow
|
||||
# 自动化运行 Go 基准测试并进行性能回归检测
|
||||
|
||||
name: Benchmark
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, master]
|
||||
pull_request:
|
||||
branches: [main, master]
|
||||
|
||||
env:
|
||||
GO_VERSION: '1.23'
|
||||
BENCH_COUNT: 10
|
||||
|
||||
jobs:
|
||||
benchmark:
|
||||
name: Run Benchmarks
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # 需要完整历史进行基准线对比
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GO_VERSION }}
|
||||
|
||||
- name: Install benchstat
|
||||
run: go install golang.org/x/perf/cmd/benchstat@latest
|
||||
|
||||
- name: Run benchmarks (current)
|
||||
run: |
|
||||
go test -bench=. -benchmem -count=${{ env.BENCH_COUNT }} ./... > benchmark-current.txt
|
||||
cat benchmark-current.txt
|
||||
|
||||
- name: Upload current benchmark results
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: benchmark-current
|
||||
path: benchmark-current.txt
|
||||
retention-days: 30
|
||||
|
||||
- name: Checkout main branch (for comparison)
|
||||
if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master'
|
||||
run: |
|
||||
git stash
|
||||
git checkout main || git checkout master || echo "No main/master branch"
|
||||
git stash pop || true
|
||||
|
||||
- name: Run benchmarks (baseline)
|
||||
if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master'
|
||||
run: |
|
||||
go test -bench=. -benchmem -count=${{ env.BENCH_COUNT }} ./... > benchmark-baseline.txt || echo "Baseline failed" > benchmark-baseline.txt
|
||||
|
||||
- name: Compare benchmarks
|
||||
if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master'
|
||||
run: |
|
||||
if [ -f benchmark-baseline.txt ] && [ -s benchmark-baseline.txt ]; then
|
||||
benchstat benchmark-baseline.txt benchmark-current.txt > benchmark-comparison.txt
|
||||
cat benchmark-comparison.txt
|
||||
else
|
||||
echo "No baseline for comparison" > benchmark-comparison.txt
|
||||
fi
|
||||
|
||||
- name: Upload comparison results
|
||||
if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: benchmark-comparison
|
||||
path: benchmark-comparison.txt
|
||||
retention-days: 7
|
||||
|
||||
- name: Check regression
|
||||
if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/master'
|
||||
run: |
|
||||
if [ -f benchmark-comparison.txt ]; then
|
||||
python3 scripts/check_regression.py benchmark-comparison.txt || true
|
||||
fi
|
||||
|
||||
benchmark-save:
|
||||
name: Save Benchmark Baseline
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master'
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GO_VERSION }}
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
go test -bench=. -benchmem -count=${{ env.BENCH_COUNT }} ./... > benchmark-main.txt
|
||||
|
||||
- name: Upload baseline
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: benchmark-baseline-main
|
||||
path: benchmark-main.txt
|
||||
retention-days: 90
|
||||
50
Makefile
50
Makefile
@ -110,6 +110,52 @@ bench:
|
||||
@echo "Running benchmarks..."
|
||||
go test -bench=. -benchmem ./...
|
||||
|
||||
# 运行基准测试(统计模式,10次采样)
|
||||
bench-stat:
|
||||
@echo "Running benchmarks with statistical sampling..."
|
||||
go test -bench=. -benchmem -count=10 ./... | tee benchmark-current.txt
|
||||
|
||||
# 对比基准测试结果(需要 benchstat)
|
||||
bench-compare:
|
||||
@echo "Comparing benchmarks..."
|
||||
@if command -v benchstat >/dev/null 2>&1; then \
|
||||
if [ -f benchmark-baseline.txt ]; then \
|
||||
benchstat benchmark-baseline.txt benchmark-current.txt; \
|
||||
else \
|
||||
echo "基准线文件 benchmark-baseline.txt 不存在,运行当前基准测试..."; \
|
||||
$(MAKE) bench-stat; \
|
||||
fi \
|
||||
else \
|
||||
echo "benchstat 未安装,运行: go install golang.org/x/perf/cmd/benchstat@latest"; \
|
||||
exit 1; \
|
||||
fi
|
||||
|
||||
# 保存当前基准结果为基准线
|
||||
bench-save:
|
||||
@echo "Saving benchmark baseline..."
|
||||
@if [ -f benchmark-current.txt ]; then \
|
||||
cp benchmark-current.txt benchmark-baseline.txt; \
|
||||
echo "基准线已保存到 benchmark-baseline.txt"; \
|
||||
else \
|
||||
echo "运行基准测试并保存..."; \
|
||||
$(MAKE) bench-stat; \
|
||||
cp benchmark-current.txt benchmark-baseline.txt; \
|
||||
fi
|
||||
|
||||
# 检查性能回归(需要 Python)
|
||||
bench-check:
|
||||
@echo "Checking for performance regressions..."
|
||||
@if [ -f benchmark-comparison.txt ]; then \
|
||||
python scripts/check_regression.py benchmark-comparison.txt; \
|
||||
elif command -v benchstat >/dev/null 2>&1 && [ -f benchmark-baseline.txt ] && [ -f benchmark-current.txt ]; then \
|
||||
benchstat benchmark-baseline.txt benchmark-current.txt > benchmark-comparison.txt; \
|
||||
python scripts/check_regression.py benchmark-comparison.txt; \
|
||||
else \
|
||||
echo "需要 benchstat 和基准线/当前结果文件"; \
|
||||
echo "运行: make bench-save && make bench-stat && make bench-check"; \
|
||||
exit 1; \
|
||||
fi
|
||||
|
||||
# ============================================
|
||||
# 代码质量
|
||||
# ============================================
|
||||
@ -194,6 +240,10 @@ help:
|
||||
@echo " make test - Run all tests"
|
||||
@echo " make test-cover - Run tests with coverage"
|
||||
@echo " make bench - Run benchmarks"
|
||||
@echo " make bench-stat - Run benchmarks with statistical sampling (10x)"
|
||||
@echo " make bench-compare - Compare against baseline (needs benchstat)"
|
||||
@echo " make bench-save - Save current results as baseline"
|
||||
@echo " make bench-check - Check for performance regressions"
|
||||
@echo ""
|
||||
@echo "Quality:"
|
||||
@echo " make fmt - Format code"
|
||||
|
||||
232
scripts/check_regression.py
Executable file
232
scripts/check_regression.py
Executable file
@ -0,0 +1,232 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
回归检测脚本 - 解析 benchstat 输出并检测性能回归
|
||||
|
||||
用法:
|
||||
python check_regression.py <benchstat_output_file>
|
||||
python check_regression.py --help
|
||||
|
||||
退出码:
|
||||
0 - 无回归或轻微变化
|
||||
1 - 检测到 WARNING 级别回归 (-5%)
|
||||
2 - 检测到 BLOCK 级别回归 (-15%)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkResult:
|
||||
"""单个基准测试结果"""
|
||||
name: str
|
||||
old_time: Optional[float]
|
||||
new_time: Optional[float]
|
||||
old_bytes: Optional[float]
|
||||
new_bytes: Optional[float]
|
||||
old_allocs: Optional[float]
|
||||
new_allocs: Optional[float]
|
||||
p_value: Optional[float]
|
||||
|
||||
@property
|
||||
def time_change_pct(self) -> Optional[float]:
|
||||
"""计算时间变化百分比 (负值表示性能下降)"""
|
||||
if self.old_time and self.new_time and self.old_time > 0:
|
||||
return (self.old_time - self.new_time) / self.old_time * 100
|
||||
return None
|
||||
|
||||
@property
|
||||
def bytes_change_pct(self) -> Optional[float]:
|
||||
"""计算内存分配变化百分比"""
|
||||
if self.old_bytes and self.new_bytes and self.old_bytes > 0:
|
||||
return (self.old_bytes - self.new_bytes) / self.old_bytes * 100
|
||||
return None
|
||||
|
||||
|
||||
def parse_benchstat_line(line: str) -> Optional[BenchmarkResult]:
|
||||
"""
|
||||
解析 benchstat 输出的一行
|
||||
|
||||
格式示例:
|
||||
BenchmarkFoo-8 1000000 1000 ns/op ~ 950 ns/op 5.00%
|
||||
"""
|
||||
# 匹配时间基准测试行
|
||||
# 格式: Name old-ns/op new-ns/op delta
|
||||
time_pattern = r'^(\S+)\s+' # 基准名称
|
||||
time_pattern += r'(?:(\d+(?:\.\d+)?)\s+ns/op\s+)?' # 旧值
|
||||
time_pattern += r'(?:~\s+)?' # 分隔符
|
||||
time_pattern += r'(?:(\d+(?:\.\d+)?)\s+ns/op\s+)?' # 新值
|
||||
time_pattern += r'(?:([+-]?\d+\.\d+)%\s+)?' # 变化百分比
|
||||
|
||||
match = re.match(time_pattern, line.strip())
|
||||
if not match:
|
||||
return None
|
||||
|
||||
name = match.group(1)
|
||||
old_time = float(match.group(2)) if match.group(2) else None
|
||||
new_time = float(match.group(3)) if match.group(3) else None
|
||||
|
||||
# 尝试提取 p-value(如果有)
|
||||
p_value = None
|
||||
p_match = re.search(r'p=([\d.]+)', line)
|
||||
if p_match:
|
||||
p_value = float(p_match.group(1))
|
||||
|
||||
return BenchmarkResult(
|
||||
name=name,
|
||||
old_time=old_time,
|
||||
new_time=new_time,
|
||||
old_bytes=None,
|
||||
new_bytes=None,
|
||||
old_allocs=None,
|
||||
new_allocs=None,
|
||||
p_value=p_value
|
||||
)
|
||||
|
||||
|
||||
def parse_benchstat_output(content: str) -> List[BenchmarkResult]:
|
||||
"""解析完整的 benchstat 输出"""
|
||||
results = []
|
||||
lines = content.split('\n')
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('name') or line.startswith('---'):
|
||||
continue
|
||||
|
||||
result = parse_benchstat_line(line)
|
||||
if result:
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def classify_regression(result: BenchmarkResult) -> Tuple[str, float, Optional[float]]:
|
||||
"""
|
||||
分类回归级别
|
||||
|
||||
返回值: (level, change_pct, p_value)
|
||||
level: "OK", "WARNING", "BLOCK"
|
||||
"""
|
||||
change = result.time_change_pct
|
||||
if change is None:
|
||||
return "OK", 0.0, result.p_value
|
||||
|
||||
# 正值表示性能提升,负值表示性能下降
|
||||
if change <= -15:
|
||||
return "BLOCK", change, result.p_value
|
||||
elif change <= -5:
|
||||
return "WARNING", change, result.p_value
|
||||
else:
|
||||
return "OK", change, result.p_value
|
||||
|
||||
|
||||
def check_regressions(results: List[BenchmarkResult]) -> Tuple[int, int, int]:
|
||||
"""
|
||||
检查所有基准测试的回归情况
|
||||
|
||||
返回: (ok_count, warning_count, block_count)
|
||||
"""
|
||||
ok_count = 0
|
||||
warning_count = 0
|
||||
block_count = 0
|
||||
|
||||
print("=" * 80)
|
||||
print("性能回归检测结果")
|
||||
print("=" * 80)
|
||||
print(f"{'基准测试':<40} {'变化':<12} {'P值':<12} {'级别':<10}")
|
||||
print("-" * 80)
|
||||
|
||||
for result in results:
|
||||
level, change, p_value = classify_regression(result)
|
||||
p_str = f"{p_value:.4f}" if p_value else "N/A"
|
||||
change_str = f"{change:+.2f}%" if change else "N/A"
|
||||
|
||||
if level == "OK":
|
||||
ok_count += 1
|
||||
icon = "✓"
|
||||
elif level == "WARNING":
|
||||
warning_count += 1
|
||||
icon = "⚠"
|
||||
else:
|
||||
block_count += 1
|
||||
icon = "✗"
|
||||
|
||||
print(f"{result.name:<40} {change_str:<12} {p_str:<12} {icon} {level}")
|
||||
|
||||
print("-" * 80)
|
||||
print(f"总结: {ok_count} 正常, {warning_count} 警告, {block_count} 阻断")
|
||||
print("=" * 80)
|
||||
|
||||
return ok_count, warning_count, block_count
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='解析 benchstat 输出并检测性能回归',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog='''
|
||||
阈值说明:
|
||||
-5%% ~ WARNING - 性能下降超过5%,需要关注
|
||||
-15%% ~ BLOCK - 性能下降超过15%,阻止合并
|
||||
|
||||
示例:
|
||||
python check_regression.py benchmark-comparison.txt
|
||||
benchstat old.txt new.txt | python check_regression.py -
|
||||
'''
|
||||
)
|
||||
parser.add_argument('file', help='benchstat 输出文件路径,或 "-" 从 stdin 读取')
|
||||
parser.add_argument('--warning-threshold', type=float, default=5.0,
|
||||
help='警告阈值百分比(默认: 5)')
|
||||
parser.add_argument('--block-threshold', type=float, default=15.0,
|
||||
help='阻断阈值百分比(默认: 15)')
|
||||
parser.add_argument('--p-value', type=float, default=0.05,
|
||||
help='统计显著性 P 值阈值(默认: 0.05)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 读取输入
|
||||
if args.file == '-':
|
||||
content = sys.stdin.read()
|
||||
else:
|
||||
try:
|
||||
with open(args.file, 'r') as f:
|
||||
content = f.read()
|
||||
except FileNotFoundError:
|
||||
print(f"错误: 文件 '{args.file}' 不存在", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except IOError as e:
|
||||
print(f"错误: 无法读取文件: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not content.strip():
|
||||
print("警告: 输入为空", file=sys.stderr)
|
||||
sys.exit(0)
|
||||
|
||||
# 解析结果
|
||||
results = parse_benchstat_output(content)
|
||||
|
||||
if not results:
|
||||
print("警告: 未找到有效的基准测试结果", file=sys.stderr)
|
||||
sys.exit(0)
|
||||
|
||||
# 检查回归
|
||||
ok_count, warning_count, block_count = check_regressions(results)
|
||||
|
||||
# 设置退出码
|
||||
if block_count > 0:
|
||||
print(f"\n检测到 {block_count} 个 BLOCK 级别回归,建议阻止合并")
|
||||
sys.exit(2)
|
||||
elif warning_count > 0:
|
||||
print(f"\n检测到 {warning_count} 个 WARNING 级别回归,建议检查")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\n未发现性能回归")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user