feat(e2e): 添加重试机制和测试覆盖率支持

- 新增 testutil/retry.go 提供重试工具函数
  - WaitForCondition: 等待条件满足
  - WaitForNoError: 等待操作无错误
  - WaitForHealthy: 等待服务健康
  - Poll: 简化轮询接口
- 新增 testutil/retry_test.go 单元测试
- container.go 添加 Logs() 方法用于诊断
- Makefile 新增 test-e2e-cover 和 test-e2e-short 目标
- 重构 healthcheck 和 loadbalance 测试使用重试机制

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
xfy 2026-04-27 16:07:17 +08:00
parent 0e1a826464
commit 1be6480f5c
6 changed files with 544 additions and 22 deletions

View File

@ -158,6 +158,18 @@ test-e2e:
@echo "Running L3 E2E tests (requires Docker)..."
go test -v -tags=e2e ./internal/e2e/...
# 运行 L3 E2E 测试(带覆盖率)
test-e2e-cover:
@echo "Running L3 E2E tests with coverage..."
go test -tags=e2e -coverprofile=e2e-coverage.out -coverpkg=./... ./internal/e2e/...
go tool cover -html=e2e-coverage.out -o e2e-coverage.html
@echo "E2E coverage report: e2e-coverage.html"
# 运行 L3 E2E 测试(短模式,仅运行工具测试)
test-e2e-short:
@echo "Running L3 E2E tests (short mode - testutil only)..."
go test -tags=e2e -short -v ./internal/e2e/testutil/... -timeout 60s
# 运行所有测试(单元 + 集成 + E2E
test-all: test test-integration test-e2e
@echo "All tests passed."
@ -364,6 +376,11 @@ help:
@echo "Testing:"
@echo " make test - Run all tests"
@echo " make test-cover - Run tests with coverage"
@echo " make test-integration - Run L2 integration tests"
@echo " make test-e2e - Run L3 E2E tests (requires Docker)"
@echo " make test-e2e-cover - Run E2E tests with coverage"
@echo " make test-e2e-short - Run E2E tests (short mode)"
@echo " make test-all - Run all tests (unit + integration + E2E)"
@echo " make act - Run CI locally with act"
@echo " make act-unit - Run unit tests job with act"
@echo " make bench - Run benchmarks"

View File

@ -9,6 +9,7 @@ package e2e
import (
"context"
"fmt"
"io"
"net/http"
"testing"
@ -76,8 +77,23 @@ func TestE2EHealthCheckActive(t *testing.T) {
t.Log("Backend 0 terminated, waiting for health check to detect...")
// 等待健康检查检测到故障
time.Sleep(10 * time.Second)
// 等待健康检查检测到故障(使用重试机制)
err = testutil.WaitForNoError(ctx, testutil.RetryConfig{
Interval: 1 * time.Second,
Timeout: 15 * time.Second,
}, func() error {
// 发送请求验证故障转移
resp, err := client.Get(lolly.HTTPBaseURL())
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("unexpected status: %d", resp.StatusCode)
}
return nil
})
require.NoError(t, err, "Health check should detect failure and route to healthy backend")
// 继续发送请求,应该仍然成功(路由到健康后端)
successCount := 0
@ -205,8 +221,8 @@ func TestE2EHealthCheckRecovery(t *testing.T) {
t.Log("Backend terminated, waiting for health check...")
// 等待健康检查
time.Sleep(10 * time.Second)
// 等待健康检查检测到故障
time.Sleep(5 * time.Second)
// 恢复后端
err = pool.RestartOne(ctx, 0)
@ -214,8 +230,22 @@ func TestE2EHealthCheckRecovery(t *testing.T) {
t.Log("Backend restarted, waiting for recovery detection...")
// 等待健康检查检测到恢复
time.Sleep(10 * time.Second)
// 等待健康检查检测到恢复(使用重试机制)
err = testutil.WaitForNoError(ctx, testutil.RetryConfig{
Interval: 1 * time.Second,
Timeout: 15 * time.Second,
}, func() error {
resp, err := client.Get(lolly.HTTPBaseURL())
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("unexpected status: %d", resp.StatusCode)
}
return nil
})
require.NoError(t, err, "Backend should recover and accept requests")
// 发送请求验证恢复
successCount := 0
@ -430,8 +460,22 @@ func TestE2EHealthCheckMultipleBackends(t *testing.T) {
t.Log("Backend 1 terminated")
// 等待健康检查
time.Sleep(10 * time.Second)
// 等待健康检查检测到故障(使用重试机制)
err = testutil.WaitForNoError(ctx, testutil.RetryConfig{
Interval: 1 * time.Second,
Timeout: 15 * time.Second,
}, func() error {
resp, err := client.Get(lolly.HTTPBaseURL())
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("unexpected status: %d", resp.StatusCode)
}
return nil
})
require.NoError(t, err, "Health check should detect failure and route to remaining backends")
// 继续发送请求
successCount := 0

View File

@ -9,6 +9,7 @@ package e2e
import (
"context"
"fmt"
"io"
"net/http"
"testing"
@ -310,23 +311,26 @@ func TestE2ELoadBalanceFailover(t *testing.T) {
err = pool.TerminateOne(ctx, 0)
require.NoError(t, err, "Failed to terminate backend")
// 等待健康检查检测到故障
time.Sleep(2 * time.Second)
// 继续发送请求,应该仍然成功(故障转移到另一个后端)
for i := 0; i < 5; i++ {
// 等待故障转移(使用重试机制)
err = testutil.WaitForNoError(ctx, testutil.RetryConfig{
Interval: 500 * time.Millisecond,
Timeout: 5 * time.Second,
}, func() error {
resp, err := client.Get(lolly.HTTPBaseURL())
if err == nil {
resp.Body.Close()
if resp.StatusCode == 200 {
t.Logf("Request %d succeeded after failover", i)
return
}
if err != nil {
return err
}
time.Sleep(500 * time.Millisecond)
defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("unexpected status: %d", resp.StatusCode)
}
return nil
})
if err == nil {
t.Log("Failover succeeded")
} else {
t.Logf("Failover test completed with error: %v", err)
}
t.Log("Failover test completed")
}
// TestE2ELoadBalanceHealthCheck 测试健康检查与负载均衡集成。

View File

@ -10,6 +10,7 @@ package testutil
import (
"context"
"fmt"
"io"
"net/http"
"strings"
"time"
@ -300,6 +301,28 @@ func (c *LollyContainer) WaitForHealthy(ctx context.Context, timeout time.Durati
return fmt.Errorf("service not healthy after %v", timeout)
}
// Logs 获取容器日志。
//
// 用于诊断测试失败原因。
func (c *LollyContainer) Logs(ctx context.Context) (string, error) {
if c.Container == nil {
return "", fmt.Errorf("container is nil")
}
reader, err := c.Container.Logs(ctx)
if err != nil {
return "", fmt.Errorf("failed to get container logs: %w", err)
}
defer reader.Close()
data, err := io.ReadAll(reader)
if err != nil {
return "", fmt.Errorf("failed to read container logs: %w", err)
}
return string(data), nil
}
// MockBackendContainer 启动一个模拟后端服务器容器。
func MockBackendContainer(ctx context.Context, port int) (testcontainers.Container, string, error) {
req := testcontainers.ContainerRequest{

View File

@ -0,0 +1,230 @@
//go:build e2e
// Package testutil 提供 E2E 测试的工具函数。
//
// 包含重试和等待工具,提高测试稳定性。
//
// 作者xfy
package testutil
import (
"context"
"fmt"
"time"
)
// RetryConfig 重试配置。
type RetryConfig struct {
// Interval 重试间隔
Interval time.Duration
// Timeout 总超时时间
Timeout time.Duration
// MaxRetries 最大重试次数0 表示无限制)
MaxRetries int
}
// DefaultRetryConfig 默认重试配置。
var DefaultRetryConfig = RetryConfig{
Interval: 500 * time.Millisecond,
Timeout: 30 * time.Second,
MaxRetries: 0, // 无限制
}
// WaitForCondition 等待条件满足。
//
// 定期检查条件函数,直到返回 true 或超时。
// 使用默认配置,可通过 opts 覆盖。
//
// 使用示例:
//
// err := testutil.WaitForCondition(ctx, testutil.RetryConfig{
// Interval: 1 * time.Second,
// Timeout: 30 * time.Second,
// }, func() bool {
// resp, err := client.Get(url)
// if err != nil {
// return false
// }
// defer resp.Body.Close()
// return resp.StatusCode == 200
// })
func WaitForCondition(ctx context.Context, cfg RetryConfig, condition func() bool) error {
if cfg.Interval <= 0 {
cfg.Interval = DefaultRetryConfig.Interval
}
if cfg.Timeout <= 0 {
cfg.Timeout = DefaultRetryConfig.Timeout
}
ctx, cancel := context.WithTimeout(ctx, cfg.Timeout)
defer cancel()
ticker := time.NewTicker(cfg.Interval)
defer ticker.Stop()
retries := 0
for {
select {
case <-ctx.Done():
return fmt.Errorf("condition not met after %v: %w", cfg.Timeout, ctx.Err())
case <-ticker.C:
if condition() {
return nil
}
retries++
if cfg.MaxRetries > 0 && retries >= cfg.MaxRetries {
return fmt.Errorf("condition not met after %d retries", retries)
}
}
}
}
// WaitForNoError 等待操作无错误。
//
// 定期执行函数,直到返回 nil 或超时。
// 适用于需要等待某个操作成功的场景。
//
// 使用示例:
//
// err := testutil.WaitForNoError(ctx, testutil.RetryConfig{
// Interval: 2 * time.Second,
// Timeout: 60 * time.Second,
// }, func() error {
// resp, err := client.Get(url)
// if err != nil {
// return err
// }
// defer resp.Body.Close()
// if resp.StatusCode != 200 {
// return fmt.Errorf("unexpected status: %d", resp.StatusCode)
// }
// return nil
// })
func WaitForNoError(ctx context.Context, cfg RetryConfig, fn func() error) error {
if cfg.Interval <= 0 {
cfg.Interval = DefaultRetryConfig.Interval
}
if cfg.Timeout <= 0 {
cfg.Timeout = DefaultRetryConfig.Timeout
}
ctx, cancel := context.WithTimeout(ctx, cfg.Timeout)
defer cancel()
ticker := time.NewTicker(cfg.Interval)
defer ticker.Stop()
retries := 0
var lastErr error
for {
select {
case <-ctx.Done():
if lastErr != nil {
return fmt.Errorf("operation failed after %v: %w (last error: %v)", cfg.Timeout, ctx.Err(), lastErr)
}
return fmt.Errorf("operation failed after %v: %w", cfg.Timeout, ctx.Err())
case <-ticker.C:
if err := fn(); err == nil {
return nil
} else {
lastErr = err
}
retries++
if cfg.MaxRetries > 0 && retries >= cfg.MaxRetries {
if lastErr != nil {
return fmt.Errorf("operation failed after %d retries: %w", retries, lastErr)
}
return fmt.Errorf("operation failed after %d retries", retries)
}
}
}
}
// Retry 重试操作直到成功或超时。
//
// 与 WaitForNoError 类似,但返回最后一次错误。
// 适用于需要知道具体失败原因的场景。
func Retry(ctx context.Context, cfg RetryConfig, fn func() error) error {
return WaitForNoError(ctx, cfg, fn)
}
// WaitForHealthy 等待服务健康。
//
// 便捷函数,等待 HTTP 服务返回 200 或预期状态码。
//
// 使用示例:
//
// err := testutil.WaitForHealthy(ctx, lolly.HTTPBaseURL(), 30*time.Second, 200, 404)
func WaitForHealthy(ctx context.Context, url string, timeout time.Duration, expectedCodes ...int) error {
cfg := RetryConfig{
Interval: 500 * time.Millisecond,
Timeout: timeout,
}
if len(expectedCodes) == 0 {
expectedCodes = []int{200}
}
return WaitForNoError(ctx, cfg, func() error {
client := CreateDefaultHTTPClient()
resp, err := client.Get(url)
if err != nil {
return fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close()
for _, code := range expectedCodes {
if resp.StatusCode == code {
return nil
}
}
return fmt.Errorf("unexpected status code: %d (expected one of %v)", resp.StatusCode, expectedCodes)
})
}
// WaitForBackendHealthy 等待后端服务健康。
//
// 用于等待后端池中的服务就绪。
func WaitForBackendHealthy(ctx context.Context, urls []string, timeout time.Duration) error {
cfg := RetryConfig{
Interval: 500 * time.Millisecond,
Timeout: timeout,
}
return WaitForNoError(ctx, cfg, func() error {
client := CreateDefaultHTTPClient()
for _, url := range urls {
resp, err := client.Get(url)
if err != nil {
return fmt.Errorf("backend %s not reachable: %w", url, err)
}
resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("backend %s returned status %d", url, resp.StatusCode)
}
}
return nil
})
}
// Poll 定期执行函数直到返回 true。
//
// 简化的轮询接口,适用于简单场景。
func Poll(ctx context.Context, interval, timeout time.Duration, fn func() (bool, error)) error {
cfg := RetryConfig{
Interval: interval,
Timeout: timeout,
}
return WaitForNoError(ctx, cfg, func() error {
done, err := fn()
if err != nil {
return err
}
if !done {
return fmt.Errorf("poll condition not met")
}
return nil
})
}

View File

@ -0,0 +1,204 @@
//go:build e2e
package testutil
import (
"context"
"errors"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestWaitForConditionSuccess 测试条件满足场景。
func TestWaitForConditionSuccess(t *testing.T) {
ctx := context.Background()
count := 0
err := WaitForCondition(ctx, RetryConfig{
Interval: 10 * time.Millisecond,
Timeout: 100 * time.Millisecond,
}, func() bool {
count++
return count >= 3
})
require.NoError(t, err, "Should succeed when condition is met")
assert.GreaterOrEqual(t, count, 3, "Should have retried at least 3 times")
}
// TestWaitForConditionTimeout 测试超时场景。
func TestWaitForConditionTimeout(t *testing.T) {
ctx := context.Background()
start := time.Now()
err := WaitForCondition(ctx, RetryConfig{
Interval: 10 * time.Millisecond,
Timeout: 50 * time.Millisecond,
}, func() bool {
return false // 永远不满足
})
elapsed := time.Since(start)
require.Error(t, err, "Should fail when condition is never met")
assert.Contains(t, err.Error(), "condition not met")
assert.Less(t, elapsed, 100*time.Millisecond, "Should timeout around the specified duration")
}
// TestWaitForConditionMaxRetries 测试最大重试次数。
func TestWaitForConditionMaxRetries(t *testing.T) {
ctx := context.Background()
count := 0
err := WaitForCondition(ctx, RetryConfig{
Interval: 10 * time.Millisecond,
Timeout: 1 * time.Second,
MaxRetries: 3,
}, func() bool {
count++
return false
})
require.Error(t, err, "Should fail after max retries")
assert.Contains(t, err.Error(), "3 retries")
assert.Equal(t, 3, count, "Should have retried exactly 3 times")
}
// TestWaitForConditionContextCancel 测试上下文取消。
func TestWaitForConditionContextCancel(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
// 50ms 后取消
go func() {
time.Sleep(50 * time.Millisecond)
cancel()
}()
err := WaitForCondition(ctx, RetryConfig{
Interval: 10 * time.Millisecond,
Timeout: 1 * time.Second,
}, func() bool {
return false
})
require.Error(t, err, "Should fail when context is cancelled")
assert.Contains(t, err.Error(), "context canceled")
}
// TestWaitForNoErrorSuccess 测试操作成功场景。
func TestWaitForNoErrorSuccess(t *testing.T) {
ctx := context.Background()
count := 0
err := WaitForNoError(ctx, RetryConfig{
Interval: 10 * time.Millisecond,
Timeout: 100 * time.Millisecond,
}, func() error {
count++
if count < 3 {
return errors.New("not ready")
}
return nil
})
require.NoError(t, err, "Should succeed when operation returns nil")
assert.GreaterOrEqual(t, count, 3, "Should have retried at least 3 times")
}
// TestWaitForNoErrorTimeout 测试操作超时场景。
func TestWaitForNoErrorTimeout(t *testing.T) {
ctx := context.Background()
err := WaitForNoError(ctx, RetryConfig{
Interval: 10 * time.Millisecond,
Timeout: 50 * time.Millisecond,
}, func() error {
return errors.New("always fails")
})
require.Error(t, err, "Should fail when operation always returns error")
assert.Contains(t, err.Error(), "operation failed")
assert.Contains(t, err.Error(), "always fails")
}
// TestWaitForNoErrorMaxRetries 测试最大重试次数。
func TestWaitForNoErrorMaxRetries(t *testing.T) {
ctx := context.Background()
count := 0
err := WaitForNoError(ctx, RetryConfig{
Interval: 10 * time.Millisecond,
Timeout: 1 * time.Second,
MaxRetries: 2,
}, func() error {
count++
return errors.New("fail")
})
require.Error(t, err, "Should fail after max retries")
assert.Contains(t, err.Error(), "2 retries")
assert.Equal(t, 2, count, "Should have retried exactly 2 times")
}
// TestDefaultRetryConfig 测试默认配置。
func TestDefaultRetryConfig(t *testing.T) {
assert.Equal(t, 500*time.Millisecond, DefaultRetryConfig.Interval)
assert.Equal(t, 30*time.Second, DefaultRetryConfig.Timeout)
assert.Equal(t, 0, DefaultRetryConfig.MaxRetries)
}
// TestRetryConfigZeroValues 测试零值配置使用默认值。
func TestRetryConfigZeroValues(t *testing.T) {
ctx := context.Background()
// 零值配置应该使用默认值
count := 0
err := WaitForCondition(ctx, RetryConfig{}, func() bool {
count++
return count >= 1
})
require.NoError(t, err, "Should use default config values")
}
// TestPollSuccess 测试轮询成功。
func TestPollSuccess(t *testing.T) {
ctx := context.Background()
count := 0
err := Poll(ctx, 10*time.Millisecond, 100*time.Millisecond, func() (bool, error) {
count++
return count >= 3, nil
})
require.NoError(t, err, "Poll should succeed")
assert.GreaterOrEqual(t, count, 3)
}
// TestPollError 测试轮询返回错误。
func TestPollError(t *testing.T) {
ctx := context.Background()
err := Poll(ctx, 10*time.Millisecond, 50*time.Millisecond, func() (bool, error) {
return false, errors.New("poll error")
})
require.Error(t, err, "Poll should fail with error")
assert.Contains(t, err.Error(), "poll error")
}
// TestWaitForHealthySuccess 测试等待健康检查成功。
func TestWaitForHealthySuccess(t *testing.T) {
// 这个测试需要 HTTP 服务器,在集成测试中验证
// 这里只测试函数签名和基本逻辑
t.Log("WaitForHealthy function exists and has correct signature")
}
// TestWaitForBackendHealthySuccess 测试等待后端健康。
func TestWaitForBackendHealthySuccess(t *testing.T) {
// 这个测试需要 HTTP 服务器,在集成测试中验证
t.Log("WaitForBackendHealthy function exists and has correct signature")
}