From d03c180f629e2d8d717a5b75a984cfc3ec6601db Mon Sep 17 00:00:00 2001 From: xfy Date: Thu, 4 Jun 2026 00:12:29 +0800 Subject: [PATCH] perf(loadbalance): replace failMu mutex with atomic operations in Target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove sync.Mutex from Target's failure tracking hot path: - failCount int64 → atomic.Int64 - failedUntil int64 → atomic.Int64 - IsAvailable(): Load() instead of Lock/Unlock, eliminating mutex from every per-target check in every LB Select() call - RecordFailure(): Add(1) + Store() instead of Lock/Unlock - RecordSuccess(): Store(0) instead of Lock/Unlock This removes the only mutex acquisition in the load balancing selection path. Every Select() call iterates targets and calls IsAvailable() on each — previously acquiring failMu per target when MaxFails > 0. Now fully lock-free. --- internal/loadbalance/balancer.go | 39 +++++++++++---------------- internal/loadbalance/balancer_test.go | 4 +-- 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/internal/loadbalance/balancer.go b/internal/loadbalance/balancer.go index 832949d..88e2dfd 100644 --- a/internal/loadbalance/balancer.go +++ b/internal/loadbalance/balancer.go @@ -63,10 +63,8 @@ type Target struct { // ProxyURI 代理传递的 URI 路径 ProxyURI string - // failMu 保护 failCount 和 failedUntil 的协调更新 - failMu sync.Mutex - failCount int64 - failedUntil int64 + failCount atomic.Int64 + failedUntil atomic.Int64 // 慢启动相关字段 // EffectiveWeight 当前有效权重(慢启动期间动态变化) @@ -298,17 +296,17 @@ func (t *Target) IsAvailable() bool { return false } if t.MaxFails > 0 { - t.failMu.Lock() - if t.failCount >= t.MaxFails && time.Now().UnixNano() < t.failedUntil { - t.failMu.Unlock() - return false + failCount := t.failCount.Load() + if failCount >= t.MaxFails { + failedUntil := t.failedUntil.Load() + if time.Now().UnixNano() < failedUntil { + return false + } + if failedUntil > 0 { + t.failCount.Store(0) + t.failedUntil.Store(0) + } } - // 冷却已过期,重置软状态 - if t.failCount >= t.MaxFails && t.failedUntil > 0 { - t.failCount = 0 - t.failedUntil = 0 - } - t.failMu.Unlock() } return true } @@ -320,17 +318,14 @@ func (t *Target) RecordFailure() int64 { if t.MaxFails <= 0 { return 0 } - t.failMu.Lock() - t.failCount++ - count := t.failCount + count := t.failCount.Add(1) if count >= t.MaxFails { timeout := t.FailTimeout if timeout <= 0 { timeout = 10 * time.Second } - t.failedUntil = time.Now().Add(timeout).UnixNano() + t.failedUntil.Store(time.Now().Add(timeout).UnixNano()) } - t.failMu.Unlock() return count } @@ -340,10 +335,8 @@ func (t *Target) RecordSuccess() { if t.MaxFails <= 0 { return } - t.failMu.Lock() - t.failCount = 0 - t.failedUntil = 0 - t.failMu.Unlock() + t.failCount.Store(0) + t.failedUntil.Store(0) } // IsBackup 返回目标是否为备份服务器。 diff --git a/internal/loadbalance/balancer_test.go b/internal/loadbalance/balancer_test.go index bc8d1d7..3fa122b 100644 --- a/internal/loadbalance/balancer_test.go +++ b/internal/loadbalance/balancer_test.go @@ -1819,11 +1819,9 @@ func TestTargetRecordSuccess(t *testing.T) { target.RecordFailure() target.RecordFailure() target.RecordSuccess() - target.failMu.Lock() - if target.failCount != 0 { + if target.failCount.Load() != 0 { t.Error("fail count should be reset after success") } - target.failMu.Unlock() if !target.IsAvailable() { t.Error("target should be available after success resets cooldown") }