diff --git a/CHANGELOG.md b/CHANGELOG.md index 0756d48d25..01b98d63d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ * [BUGFIX] Query Frontend: Fix native histogram responses not being handled correctly in `minTime()` sort ordering for split_by_interval merge. #7555 * [BUGFIX] Distributor: Release the push worker pool goroutines on shutdown by stopping the async executor during the stopping phase when `-distributor.num-push-workers` is set. #7602 * [BUGFIX] Querier: Fix unbounded resource leak in the bucket-scan blocks finder (used when the bucket index is disabled). Per-tenant metadata fetchers, their Prometheus registries, and on-disk meta caches are now evicted once a tenant is no longer active, instead of being retained for the lifetime of the process. #7573 +* [BUGFIX] Querier: Increment `cortex_query_evictions_total` and deregister the victim before cancelling it in the query evictor, fixing flaky `TestPrometheusMetrics_IncrementedCorrectly` and double-counted evictions of still-unwinding queries. #7616 ## 1.21.0 2026-04-24 diff --git a/pkg/util/queryeviction/evictor.go b/pkg/util/queryeviction/evictor.go index 690d3fb875..a6c778411e 100644 --- a/pkg/util/queryeviction/evictor.go +++ b/pkg/util/queryeviction/evictor.go @@ -99,6 +99,15 @@ func (e *QueryEvictor) running(ctx context.Context) error { // Evict each victim. for _, victim := range victims { metricValue := e.registry.metric(victim.Stats) + // Account the eviction before cancelling the victim: Cancel is the + // externally observable commit point, so observers synchronized on + // the cancellation must already see it in evictionsTotal. + e.evictionsTotal.WithLabelValues(string(breachedResource)).Inc() + // Retire the victim before cancelling it so later cycles can never + // re-pick (and double-count) an already-cancelled query that is + // still unwinding. trackedQuery.Exec's own deferred Deregister + // remains a safe no-op. + e.registry.Deregister(victim.QueryID) victim.Cancel() level.Warn(e.logger).Log( @@ -112,8 +121,6 @@ func (e *QueryEvictor) running(ctx context.Context) error { "metric", e.cfg.EvictionMetric, "metric_value", metricValue, ) - - e.evictionsTotal.WithLabelValues(string(breachedResource)).Inc() } // Enter cooldown. diff --git a/pkg/util/queryeviction/evictor_test.go b/pkg/util/queryeviction/evictor_test.go index a6bbe92491..699902a8f3 100644 --- a/pkg/util/queryeviction/evictor_test.go +++ b/pkg/util/queryeviction/evictor_test.go @@ -2,6 +2,7 @@ package queryeviction import ( "context" + "sync" "testing" "time" @@ -210,6 +211,66 @@ func TestPrometheusMetrics_IncrementedCorrectly(t *testing.T) { assert.Equal(t, float64(3), promtest.ToFloat64(evictor.evictionsTotal.WithLabelValues(string(resource.CPU)))) } +func TestEvictionsTotal_IncrementedBeforeCancelObserved(t *testing.T) { + reg := NewQueryRegistry(testMetricFunc) + evictor := startEvictor(t, newMockMonitor(0.95, 0.0), reg, testEvictorConfig(0.9, 0, 0)) + + ctx, cancel := context.WithCancel(context.Background()) + stats := &querier_stats.QueryStats{} + stats.AddFetchedSamples(1000) + + // Read the eviction counter inside the cancel callback, i.e. on the evictor + // goroutine at the instant the cancellation signal fires: the eviction must + // already be accounted in evictionsTotal before it becomes externally + // observable. The sync.Once keeps the callback idempotent so a re-picked + // victim cannot close the channel twice. + var ( + once sync.Once + counterAtCancel float64 + ) + evicted := make(chan struct{}) + reg.Register(func() { + once.Do(func() { + counterAtCancel = promtest.ToFloat64(evictor.evictionsTotal.WithLabelValues(string(resource.CPU))) + cancel() + close(evicted) + }) + }, stats, "up", "user1", "") + _ = ctx + + waitEvicted(t, evicted) + assert.Equal(t, float64(1), counterAtCancel) +} + +func TestEvictedVictim_RemovedFromRegistry(t *testing.T) { + reg := NewQueryRegistry(testMetricFunc) + + ctx, cancel := context.WithCancel(context.Background()) + stats := &querier_stats.QueryStats{} + stats.AddFetchedSamples(1000) + + // The sync.Once keeps the callback idempotent: if the evictor wrongly + // re-picked the still-registered victim on a later cycle, the second + // Cancel would otherwise panic on the double close. + var once sync.Once + evicted := make(chan struct{}) + reg.Register(func() { + once.Do(func() { + cancel() + close(evicted) + }) + }, stats, "up", "user1", "") + _ = ctx + + startEvictor(t, newMockMonitor(0.95, 0.0), reg, testEvictorConfig(0.9, 0, 0)) + waitEvicted(t, evicted) + + // The evictor deregisters the victim before cancelling it, so once the + // eviction is observable the victim must already have left the registry + // and can never be re-picked (and double-counted) while still unwinding. + assert.Equal(t, 0, reg.Len()) +} + func TestNewQueryEvictor_ReturnsNilWhenDisabled(t *testing.T) { cfg := configs.EvictionConfig{CheckInterval: time.Second, EvictionMetric: "fetched_samples", MaxEvictionsPerCycle: 1} evictor := NewQueryEvictor(newMockMonitor(0, 0), NewQueryRegistry(testMetricFunc), cfg, log.NewNopLogger(), prometheus.NewPedanticRegistry(), "test")