feat(accesslog): add deterministic sampling with sample_rate config
Add logging.access.sample_rate config (0.0-1.0) for deterministic request sampling. 5xx errors are always logged; 2xx/3xx/4xx follow the configured rate. Uses atomic.Uint64 counter for lock-free, zero-allocation sampling decisions. Includes test updates to verify: - sample_rate=1.0 logs all requests - sample_rate=0.0 logs only 5xx - 5xx are always logged regardless of rate
This commit is contained in:
parent
1128eb644f
commit
047e033af5
@ -50,12 +50,14 @@ type AccessLog struct {
|
|||||||
// - *AccessLog: 访问日志中间件实例
|
// - *AccessLog: 访问日志中间件实例
|
||||||
func New(cfg *config.LoggingConfig) *AccessLog {
|
func New(cfg *config.LoggingConfig) *AccessLog {
|
||||||
sampleRate := cfg.Access.SampleRate
|
sampleRate := cfg.Access.SampleRate
|
||||||
if sampleRate <= 0.0 || sampleRate > 1.0 {
|
// sampleRate=0 明确表示禁用访问日志
|
||||||
|
// sampleRate<0 或 >1 修正为 1.0(全量记录)
|
||||||
|
if sampleRate < 0.0 || sampleRate > 1.0 {
|
||||||
sampleRate = 1.0
|
sampleRate = 1.0
|
||||||
}
|
}
|
||||||
|
|
||||||
var sampleInterval uint64 = 1
|
var sampleInterval uint64 = 1
|
||||||
if sampleRate < 1.0 {
|
if sampleRate > 0.0 && sampleRate < 1.0 {
|
||||||
// 使用 1000 作为基数以提高精度,例如 0.123 -> 间隔约 8
|
// 使用 1000 作为基数以提高精度,例如 0.123 -> 间隔约 8
|
||||||
sampleInterval = uint64((1.0 / sampleRate) + 0.5)
|
sampleInterval = uint64((1.0 / sampleRate) + 0.5)
|
||||||
if sampleInterval < 1 {
|
if sampleInterval < 1 {
|
||||||
@ -81,17 +83,21 @@ func (a *AccessLog) Name() string {
|
|||||||
// shouldLog 判断当前请求是否应记录访问日志。
|
// shouldLog 判断当前请求是否应记录访问日志。
|
||||||
//
|
//
|
||||||
// 规则:
|
// 规则:
|
||||||
|
// - 5xx 服务器错误始终记录(便于排查错误)
|
||||||
|
// - sampleRate=0 时不记录 2xx/3xx/4xx
|
||||||
// - 采样率为 1.0 时始终记录
|
// - 采样率为 1.0 时始终记录
|
||||||
// - 非 2xx 响应始终记录(便于排查错误)
|
// - 其他情况按 sampleRate 采样
|
||||||
// - 2xx 响应按采样率决定是否记录
|
|
||||||
//
|
//
|
||||||
// 使用原子计数器实现无锁、零分配采样。
|
// 使用原子计数器实现无锁、零分配采样。
|
||||||
func (a *AccessLog) shouldLog(status int) bool {
|
func (a *AccessLog) shouldLog(status int) bool {
|
||||||
if a.sampleRate >= 1.0 {
|
// 5xx 服务器错误始终记录
|
||||||
|
if status >= 500 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// 非成功响应始终记录
|
if a.sampleRate == 0.0 {
|
||||||
if status < 200 || status >= 300 {
|
return false
|
||||||
|
}
|
||||||
|
if a.sampleRate >= 1.0 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// 确定性采样:每 sampleInterval 个请求记录一个
|
// 确定性采样:每 sampleInterval 个请求记录一个
|
||||||
|
|||||||
@ -91,17 +91,24 @@ func TestAccessLog_SampleRateAlwaysRecordErrors(t *testing.T) {
|
|||||||
al := New(&config.LoggingConfig{
|
al := New(&config.LoggingConfig{
|
||||||
Access: config.AccessLogConfig{
|
Access: config.AccessLogConfig{
|
||||||
Format: "json",
|
Format: "json",
|
||||||
SampleRate: 0.0, // 理论上不采样成功请求,但错误始终记录
|
SampleRate: 0.0, // 理论上不采样成功请求,但 5xx 始终记录
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
// 非 2xx 请求应始终记录
|
// 5xx 请求应始终记录
|
||||||
for _, status := range []int{199, 300, 400, 500} {
|
for _, status := range []int{500, 502, 503, 504} {
|
||||||
if !al.shouldLog(status) {
|
if !al.shouldLog(status) {
|
||||||
t.Errorf("status %d should always be logged regardless of sample rate", status)
|
t.Errorf("status %d should always be logged regardless of sample rate", status)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 2xx/3xx/4xx 请求按采样率(0% 不记录)
|
||||||
|
for _, status := range []int{200, 301, 404} {
|
||||||
|
if al.shouldLog(status) {
|
||||||
|
t.Errorf("status %d should not be logged with sample_rate=0", status)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
_ = al.Close()
|
_ = al.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user