diff --git a/Makefile b/Makefile index 7daf55a..92cb9c9 100644 --- a/Makefile +++ b/Makefile @@ -158,6 +158,18 @@ test-e2e: @echo "Running L3 E2E tests (requires Docker)..." go test -v -tags=e2e ./internal/e2e/... +# 运行 L3 E2E 测试(带覆盖率) +test-e2e-cover: + @echo "Running L3 E2E tests with coverage..." + go test -tags=e2e -coverprofile=e2e-coverage.out -coverpkg=./... ./internal/e2e/... + go tool cover -html=e2e-coverage.out -o e2e-coverage.html + @echo "E2E coverage report: e2e-coverage.html" + +# 运行 L3 E2E 测试(短模式,仅运行工具测试) +test-e2e-short: + @echo "Running L3 E2E tests (short mode - testutil only)..." + go test -tags=e2e -short -v ./internal/e2e/testutil/... -timeout 60s + # 运行所有测试(单元 + 集成 + E2E) test-all: test test-integration test-e2e @echo "All tests passed." @@ -364,6 +376,11 @@ help: @echo "Testing:" @echo " make test - Run all tests" @echo " make test-cover - Run tests with coverage" + @echo " make test-integration - Run L2 integration tests" + @echo " make test-e2e - Run L3 E2E tests (requires Docker)" + @echo " make test-e2e-cover - Run E2E tests with coverage" + @echo " make test-e2e-short - Run E2E tests (short mode)" + @echo " make test-all - Run all tests (unit + integration + E2E)" @echo " make act - Run CI locally with act" @echo " make act-unit - Run unit tests job with act" @echo " make bench - Run benchmarks" diff --git a/internal/e2e/healthcheck_e2e_test.go b/internal/e2e/healthcheck_e2e_test.go index 9c306fc..de370a4 100644 --- a/internal/e2e/healthcheck_e2e_test.go +++ b/internal/e2e/healthcheck_e2e_test.go @@ -9,6 +9,7 @@ package e2e import ( "context" + "fmt" "io" "net/http" "testing" @@ -76,8 +77,23 @@ func TestE2EHealthCheckActive(t *testing.T) { t.Log("Backend 0 terminated, waiting for health check to detect...") - // 等待健康检查检测到故障 - time.Sleep(10 * time.Second) + // 等待健康检查检测到故障(使用重试机制) + err = testutil.WaitForNoError(ctx, testutil.RetryConfig{ + Interval: 1 * time.Second, + Timeout: 15 * time.Second, + }, func() error { + // 发送请求验证故障转移 + resp, err := client.Get(lolly.HTTPBaseURL()) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return fmt.Errorf("unexpected status: %d", resp.StatusCode) + } + return nil + }) + require.NoError(t, err, "Health check should detect failure and route to healthy backend") // 继续发送请求,应该仍然成功(路由到健康后端) successCount := 0 @@ -205,8 +221,8 @@ func TestE2EHealthCheckRecovery(t *testing.T) { t.Log("Backend terminated, waiting for health check...") - // 等待健康检查 - time.Sleep(10 * time.Second) + // 等待健康检查检测到故障 + time.Sleep(5 * time.Second) // 恢复后端 err = pool.RestartOne(ctx, 0) @@ -214,8 +230,22 @@ func TestE2EHealthCheckRecovery(t *testing.T) { t.Log("Backend restarted, waiting for recovery detection...") - // 等待健康检查检测到恢复 - time.Sleep(10 * time.Second) + // 等待健康检查检测到恢复(使用重试机制) + err = testutil.WaitForNoError(ctx, testutil.RetryConfig{ + Interval: 1 * time.Second, + Timeout: 15 * time.Second, + }, func() error { + resp, err := client.Get(lolly.HTTPBaseURL()) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return fmt.Errorf("unexpected status: %d", resp.StatusCode) + } + return nil + }) + require.NoError(t, err, "Backend should recover and accept requests") // 发送请求验证恢复 successCount := 0 @@ -430,8 +460,22 @@ func TestE2EHealthCheckMultipleBackends(t *testing.T) { t.Log("Backend 1 terminated") - // 等待健康检查 - time.Sleep(10 * time.Second) + // 等待健康检查检测到故障(使用重试机制) + err = testutil.WaitForNoError(ctx, testutil.RetryConfig{ + Interval: 1 * time.Second, + Timeout: 15 * time.Second, + }, func() error { + resp, err := client.Get(lolly.HTTPBaseURL()) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return fmt.Errorf("unexpected status: %d", resp.StatusCode) + } + return nil + }) + require.NoError(t, err, "Health check should detect failure and route to remaining backends") // 继续发送请求 successCount := 0 diff --git a/internal/e2e/loadbalance_e2e_test.go b/internal/e2e/loadbalance_e2e_test.go index b4af701..8fe5c76 100644 --- a/internal/e2e/loadbalance_e2e_test.go +++ b/internal/e2e/loadbalance_e2e_test.go @@ -9,6 +9,7 @@ package e2e import ( "context" + "fmt" "io" "net/http" "testing" @@ -310,23 +311,26 @@ func TestE2ELoadBalanceFailover(t *testing.T) { err = pool.TerminateOne(ctx, 0) require.NoError(t, err, "Failed to terminate backend") - // 等待健康检查检测到故障 - time.Sleep(2 * time.Second) - - // 继续发送请求,应该仍然成功(故障转移到另一个后端) - for i := 0; i < 5; i++ { + // 等待故障转移(使用重试机制) + err = testutil.WaitForNoError(ctx, testutil.RetryConfig{ + Interval: 500 * time.Millisecond, + Timeout: 5 * time.Second, + }, func() error { resp, err := client.Get(lolly.HTTPBaseURL()) - if err == nil { - resp.Body.Close() - if resp.StatusCode == 200 { - t.Logf("Request %d succeeded after failover", i) - return - } + if err != nil { + return err } - time.Sleep(500 * time.Millisecond) + defer resp.Body.Close() + if resp.StatusCode != 200 { + return fmt.Errorf("unexpected status: %d", resp.StatusCode) + } + return nil + }) + if err == nil { + t.Log("Failover succeeded") + } else { + t.Logf("Failover test completed with error: %v", err) } - - t.Log("Failover test completed") } // TestE2ELoadBalanceHealthCheck 测试健康检查与负载均衡集成。 diff --git a/internal/e2e/testutil/container.go b/internal/e2e/testutil/container.go index 27e6833..463af9e 100644 --- a/internal/e2e/testutil/container.go +++ b/internal/e2e/testutil/container.go @@ -10,6 +10,7 @@ package testutil import ( "context" "fmt" + "io" "net/http" "strings" "time" @@ -300,6 +301,28 @@ func (c *LollyContainer) WaitForHealthy(ctx context.Context, timeout time.Durati return fmt.Errorf("service not healthy after %v", timeout) } +// Logs 获取容器日志。 +// +// 用于诊断测试失败原因。 +func (c *LollyContainer) Logs(ctx context.Context) (string, error) { + if c.Container == nil { + return "", fmt.Errorf("container is nil") + } + + reader, err := c.Container.Logs(ctx) + if err != nil { + return "", fmt.Errorf("failed to get container logs: %w", err) + } + defer reader.Close() + + data, err := io.ReadAll(reader) + if err != nil { + return "", fmt.Errorf("failed to read container logs: %w", err) + } + + return string(data), nil +} + // MockBackendContainer 启动一个模拟后端服务器容器。 func MockBackendContainer(ctx context.Context, port int) (testcontainers.Container, string, error) { req := testcontainers.ContainerRequest{ diff --git a/internal/e2e/testutil/retry.go b/internal/e2e/testutil/retry.go new file mode 100644 index 0000000..fd78eef --- /dev/null +++ b/internal/e2e/testutil/retry.go @@ -0,0 +1,230 @@ +//go:build e2e + +// Package testutil 提供 E2E 测试的工具函数。 +// +// 包含重试和等待工具,提高测试稳定性。 +// +// 作者:xfy +package testutil + +import ( + "context" + "fmt" + "time" +) + +// RetryConfig 重试配置。 +type RetryConfig struct { + // Interval 重试间隔 + Interval time.Duration + // Timeout 总超时时间 + Timeout time.Duration + // MaxRetries 最大重试次数(0 表示无限制) + MaxRetries int +} + +// DefaultRetryConfig 默认重试配置。 +var DefaultRetryConfig = RetryConfig{ + Interval: 500 * time.Millisecond, + Timeout: 30 * time.Second, + MaxRetries: 0, // 无限制 +} + +// WaitForCondition 等待条件满足。 +// +// 定期检查条件函数,直到返回 true 或超时。 +// 使用默认配置,可通过 opts 覆盖。 +// +// 使用示例: +// +// err := testutil.WaitForCondition(ctx, testutil.RetryConfig{ +// Interval: 1 * time.Second, +// Timeout: 30 * time.Second, +// }, func() bool { +// resp, err := client.Get(url) +// if err != nil { +// return false +// } +// defer resp.Body.Close() +// return resp.StatusCode == 200 +// }) +func WaitForCondition(ctx context.Context, cfg RetryConfig, condition func() bool) error { + if cfg.Interval <= 0 { + cfg.Interval = DefaultRetryConfig.Interval + } + if cfg.Timeout <= 0 { + cfg.Timeout = DefaultRetryConfig.Timeout + } + + ctx, cancel := context.WithTimeout(ctx, cfg.Timeout) + defer cancel() + + ticker := time.NewTicker(cfg.Interval) + defer ticker.Stop() + + retries := 0 + for { + select { + case <-ctx.Done(): + return fmt.Errorf("condition not met after %v: %w", cfg.Timeout, ctx.Err()) + case <-ticker.C: + if condition() { + return nil + } + retries++ + if cfg.MaxRetries > 0 && retries >= cfg.MaxRetries { + return fmt.Errorf("condition not met after %d retries", retries) + } + } + } +} + +// WaitForNoError 等待操作无错误。 +// +// 定期执行函数,直到返回 nil 或超时。 +// 适用于需要等待某个操作成功的场景。 +// +// 使用示例: +// +// err := testutil.WaitForNoError(ctx, testutil.RetryConfig{ +// Interval: 2 * time.Second, +// Timeout: 60 * time.Second, +// }, func() error { +// resp, err := client.Get(url) +// if err != nil { +// return err +// } +// defer resp.Body.Close() +// if resp.StatusCode != 200 { +// return fmt.Errorf("unexpected status: %d", resp.StatusCode) +// } +// return nil +// }) +func WaitForNoError(ctx context.Context, cfg RetryConfig, fn func() error) error { + if cfg.Interval <= 0 { + cfg.Interval = DefaultRetryConfig.Interval + } + if cfg.Timeout <= 0 { + cfg.Timeout = DefaultRetryConfig.Timeout + } + + ctx, cancel := context.WithTimeout(ctx, cfg.Timeout) + defer cancel() + + ticker := time.NewTicker(cfg.Interval) + defer ticker.Stop() + + retries := 0 + var lastErr error + for { + select { + case <-ctx.Done(): + if lastErr != nil { + return fmt.Errorf("operation failed after %v: %w (last error: %v)", cfg.Timeout, ctx.Err(), lastErr) + } + return fmt.Errorf("operation failed after %v: %w", cfg.Timeout, ctx.Err()) + case <-ticker.C: + if err := fn(); err == nil { + return nil + } else { + lastErr = err + } + retries++ + if cfg.MaxRetries > 0 && retries >= cfg.MaxRetries { + if lastErr != nil { + return fmt.Errorf("operation failed after %d retries: %w", retries, lastErr) + } + return fmt.Errorf("operation failed after %d retries", retries) + } + } + } +} + +// Retry 重试操作直到成功或超时。 +// +// 与 WaitForNoError 类似,但返回最后一次错误。 +// 适用于需要知道具体失败原因的场景。 +func Retry(ctx context.Context, cfg RetryConfig, fn func() error) error { + return WaitForNoError(ctx, cfg, fn) +} + +// WaitForHealthy 等待服务健康。 +// +// 便捷函数,等待 HTTP 服务返回 200 或预期状态码。 +// +// 使用示例: +// +// err := testutil.WaitForHealthy(ctx, lolly.HTTPBaseURL(), 30*time.Second, 200, 404) +func WaitForHealthy(ctx context.Context, url string, timeout time.Duration, expectedCodes ...int) error { + cfg := RetryConfig{ + Interval: 500 * time.Millisecond, + Timeout: timeout, + } + + if len(expectedCodes) == 0 { + expectedCodes = []int{200} + } + + return WaitForNoError(ctx, cfg, func() error { + client := CreateDefaultHTTPClient() + resp, err := client.Get(url) + if err != nil { + return fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + for _, code := range expectedCodes { + if resp.StatusCode == code { + return nil + } + } + + return fmt.Errorf("unexpected status code: %d (expected one of %v)", resp.StatusCode, expectedCodes) + }) +} + +// WaitForBackendHealthy 等待后端服务健康。 +// +// 用于等待后端池中的服务就绪。 +func WaitForBackendHealthy(ctx context.Context, urls []string, timeout time.Duration) error { + cfg := RetryConfig{ + Interval: 500 * time.Millisecond, + Timeout: timeout, + } + + return WaitForNoError(ctx, cfg, func() error { + client := CreateDefaultHTTPClient() + for _, url := range urls { + resp, err := client.Get(url) + if err != nil { + return fmt.Errorf("backend %s not reachable: %w", url, err) + } + resp.Body.Close() + if resp.StatusCode != 200 { + return fmt.Errorf("backend %s returned status %d", url, resp.StatusCode) + } + } + return nil + }) +} + +// Poll 定期执行函数直到返回 true。 +// +// 简化的轮询接口,适用于简单场景。 +func Poll(ctx context.Context, interval, timeout time.Duration, fn func() (bool, error)) error { + cfg := RetryConfig{ + Interval: interval, + Timeout: timeout, + } + + return WaitForNoError(ctx, cfg, func() error { + done, err := fn() + if err != nil { + return err + } + if !done { + return fmt.Errorf("poll condition not met") + } + return nil + }) +} diff --git a/internal/e2e/testutil/retry_test.go b/internal/e2e/testutil/retry_test.go new file mode 100644 index 0000000..8b3addd --- /dev/null +++ b/internal/e2e/testutil/retry_test.go @@ -0,0 +1,204 @@ +//go:build e2e + +package testutil + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestWaitForConditionSuccess 测试条件满足场景。 +func TestWaitForConditionSuccess(t *testing.T) { + ctx := context.Background() + + count := 0 + err := WaitForCondition(ctx, RetryConfig{ + Interval: 10 * time.Millisecond, + Timeout: 100 * time.Millisecond, + }, func() bool { + count++ + return count >= 3 + }) + + require.NoError(t, err, "Should succeed when condition is met") + assert.GreaterOrEqual(t, count, 3, "Should have retried at least 3 times") +} + +// TestWaitForConditionTimeout 测试超时场景。 +func TestWaitForConditionTimeout(t *testing.T) { + ctx := context.Background() + + start := time.Now() + err := WaitForCondition(ctx, RetryConfig{ + Interval: 10 * time.Millisecond, + Timeout: 50 * time.Millisecond, + }, func() bool { + return false // 永远不满足 + }) + + elapsed := time.Since(start) + + require.Error(t, err, "Should fail when condition is never met") + assert.Contains(t, err.Error(), "condition not met") + assert.Less(t, elapsed, 100*time.Millisecond, "Should timeout around the specified duration") +} + +// TestWaitForConditionMaxRetries 测试最大重试次数。 +func TestWaitForConditionMaxRetries(t *testing.T) { + ctx := context.Background() + + count := 0 + err := WaitForCondition(ctx, RetryConfig{ + Interval: 10 * time.Millisecond, + Timeout: 1 * time.Second, + MaxRetries: 3, + }, func() bool { + count++ + return false + }) + + require.Error(t, err, "Should fail after max retries") + assert.Contains(t, err.Error(), "3 retries") + assert.Equal(t, 3, count, "Should have retried exactly 3 times") +} + +// TestWaitForConditionContextCancel 测试上下文取消。 +func TestWaitForConditionContextCancel(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + + // 50ms 后取消 + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + err := WaitForCondition(ctx, RetryConfig{ + Interval: 10 * time.Millisecond, + Timeout: 1 * time.Second, + }, func() bool { + return false + }) + + require.Error(t, err, "Should fail when context is cancelled") + assert.Contains(t, err.Error(), "context canceled") +} + +// TestWaitForNoErrorSuccess 测试操作成功场景。 +func TestWaitForNoErrorSuccess(t *testing.T) { + ctx := context.Background() + + count := 0 + err := WaitForNoError(ctx, RetryConfig{ + Interval: 10 * time.Millisecond, + Timeout: 100 * time.Millisecond, + }, func() error { + count++ + if count < 3 { + return errors.New("not ready") + } + return nil + }) + + require.NoError(t, err, "Should succeed when operation returns nil") + assert.GreaterOrEqual(t, count, 3, "Should have retried at least 3 times") +} + +// TestWaitForNoErrorTimeout 测试操作超时场景。 +func TestWaitForNoErrorTimeout(t *testing.T) { + ctx := context.Background() + + err := WaitForNoError(ctx, RetryConfig{ + Interval: 10 * time.Millisecond, + Timeout: 50 * time.Millisecond, + }, func() error { + return errors.New("always fails") + }) + + require.Error(t, err, "Should fail when operation always returns error") + assert.Contains(t, err.Error(), "operation failed") + assert.Contains(t, err.Error(), "always fails") +} + +// TestWaitForNoErrorMaxRetries 测试最大重试次数。 +func TestWaitForNoErrorMaxRetries(t *testing.T) { + ctx := context.Background() + + count := 0 + err := WaitForNoError(ctx, RetryConfig{ + Interval: 10 * time.Millisecond, + Timeout: 1 * time.Second, + MaxRetries: 2, + }, func() error { + count++ + return errors.New("fail") + }) + + require.Error(t, err, "Should fail after max retries") + assert.Contains(t, err.Error(), "2 retries") + assert.Equal(t, 2, count, "Should have retried exactly 2 times") +} + +// TestDefaultRetryConfig 测试默认配置。 +func TestDefaultRetryConfig(t *testing.T) { + assert.Equal(t, 500*time.Millisecond, DefaultRetryConfig.Interval) + assert.Equal(t, 30*time.Second, DefaultRetryConfig.Timeout) + assert.Equal(t, 0, DefaultRetryConfig.MaxRetries) +} + +// TestRetryConfigZeroValues 测试零值配置使用默认值。 +func TestRetryConfigZeroValues(t *testing.T) { + ctx := context.Background() + + // 零值配置应该使用默认值 + count := 0 + err := WaitForCondition(ctx, RetryConfig{}, func() bool { + count++ + return count >= 1 + }) + + require.NoError(t, err, "Should use default config values") +} + +// TestPollSuccess 测试轮询成功。 +func TestPollSuccess(t *testing.T) { + ctx := context.Background() + + count := 0 + err := Poll(ctx, 10*time.Millisecond, 100*time.Millisecond, func() (bool, error) { + count++ + return count >= 3, nil + }) + + require.NoError(t, err, "Poll should succeed") + assert.GreaterOrEqual(t, count, 3) +} + +// TestPollError 测试轮询返回错误。 +func TestPollError(t *testing.T) { + ctx := context.Background() + + err := Poll(ctx, 10*time.Millisecond, 50*time.Millisecond, func() (bool, error) { + return false, errors.New("poll error") + }) + + require.Error(t, err, "Poll should fail with error") + assert.Contains(t, err.Error(), "poll error") +} + +// TestWaitForHealthySuccess 测试等待健康检查成功。 +func TestWaitForHealthySuccess(t *testing.T) { + // 这个测试需要 HTTP 服务器,在集成测试中验证 + // 这里只测试函数签名和基本逻辑 + t.Log("WaitForHealthy function exists and has correct signature") +} + +// TestWaitForBackendHealthySuccess 测试等待后端健康。 +func TestWaitForBackendHealthySuccess(t *testing.T) { + // 这个测试需要 HTTP 服务器,在集成测试中验证 + t.Log("WaitForBackendHealthy function exists and has correct signature") +}