diff --git a/CHANGELOG.md b/CHANGELOG.md index 0756d48d25..9e7cf316a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ * [BUGFIX] Query Frontend: Fix native histogram responses not being handled correctly in `minTime()` sort ordering for split_by_interval merge. #7555 * [BUGFIX] Distributor: Release the push worker pool goroutines on shutdown by stopping the async executor during the stopping phase when `-distributor.num-push-workers` is set. #7602 * [BUGFIX] Querier: Fix unbounded resource leak in the bucket-scan blocks finder (used when the bucket index is disabled). Per-tenant metadata fetchers, their Prometheus registries, and on-disk meta caches are now evicted once a tenant is no longer active, instead of being retained for the lifetime of the process. #7573 +* [BUGFIX] Querier: Fix flake in integration tests TestQuerierWithStoreGatewayDataBytesLimits and TestQuerierWithBlocksStorageLimits by waiting for the querier to see the store-gateway ACTIVE in the ring before querying. #7614 ## 1.21.0 2026-04-24 diff --git a/integration/querier_test.go b/integration/querier_test.go index 2bba87703f..f5683414b6 100644 --- a/integration/querier_test.go +++ b/integration/querier_test.go @@ -474,6 +474,14 @@ func TestQuerierWithBlocksStorageLimits(t *testing.T) { require.NoError(t, storeGateway.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total")) require.NoError(t, storeGateway.WaitSumMetrics(e2e.Equals(1), "cortex_bucket_store_blocks_loaded")) + // Wait until the store-gateway is ACTIVE in the querier's view of the store-gateway ring. The + // store-gateway registers JOINING (with tokens) and switches to ACTIVE only after the initial + // blocks sync, so the waits above can pass while the querier would still fail queries with + // "at least 1 healthy replica required, could only find 0" (HTTP 500) instead of the expected 422. + require.NoError(t, querier.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"cortex_ring_members"}, e2e.WithLabelMatchers( + labels.MustNewMatcher(labels.MatchEqual, "name", "store-gateway-client"), + labels.MustNewMatcher(labels.MatchEqual, "state", "ACTIVE")))) + // Query back the series. c, err = e2ecortex.NewClient("", querier.HTTPEndpoint(), "", "", "user-1") require.NoError(t, err) @@ -571,6 +579,14 @@ func TestQuerierWithStoreGatewayDataBytesLimits(t *testing.T) { require.NoError(t, storeGateway.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total")) require.NoError(t, storeGateway.WaitSumMetrics(e2e.Equals(1), "cortex_bucket_store_blocks_loaded")) + // Wait until the store-gateway is ACTIVE in the querier's view of the store-gateway ring. The + // store-gateway registers JOINING (with tokens) and switches to ACTIVE only after the initial + // blocks sync, so the waits above can pass while the querier would still fail queries with + // "at least 1 healthy replica required, could only find 0" (HTTP 500) instead of the expected 422. + require.NoError(t, querier.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"cortex_ring_members"}, e2e.WithLabelMatchers( + labels.MustNewMatcher(labels.MatchEqual, "name", "store-gateway-client"), + labels.MustNewMatcher(labels.MatchEqual, "state", "ACTIVE")))) + // Query back the series. c, err = e2ecortex.NewClient("", querier.HTTPEndpoint(), "", "", "user-1") require.NoError(t, err)