add queue-state count regression benchmark

bgentry · bgentry · commit abd5143f3cd9 · 2026-04-13T13:55:09.000-05:00
This adds a benchmark on top of #1203 to make the queue-state count query regression easy to reproduce and discuss. The query in that branch now groups by `(queue, state)` while filtering only on `queue`, which no longer lines up with the existing `(state, queue, priority, scheduled_at, id)` index on `river_job`. The new benchmark seeds a migrated `river_job` table and compares the current `JobCountByQueueAndState` implementation against the legacy query shape for a few queue-list sizes. That gives us a durable way to show the planner behavior and quantify the difference before deciding whether to reshape the SQL or add another index.
diff --git a/riverdriver/riverpgxv5/queue_state_count_benchmark_test.go b/riverdriver/riverpgxv5/queue_state_count_benchmark_test.go
@@ -0,0 +1,180 @@
+package riverpgxv5_test
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"github.com/jackc/pgx/v5/pgxpool"
+	"github.com/stretchr/testify/require"
+
+	"github.com/riverqueue/river/riverdbtest"
+	"github.com/riverqueue/river/riverdriver"
+	"github.com/riverqueue/river/riverdriver/riverpgxv5"
+	"github.com/riverqueue/river/rivershared/riversharedtest"
+)
+
+func BenchmarkJobCountByQueueAndState(b *testing.B) {
+	ctx := context.Background()
+
+	dbPool := riversharedtest.DBPool(ctx, b)
+	driver := riverpgxv5.New(dbPool)
+	schema := riverdbtest.TestSchema(ctx, b, driver, nil)
+
+	seedQueueStateCountBenchmarkData(ctx, b, dbPool, schema)
+
+	queueNamesTwo := []string{"queue_001", "queue_002"}
+	queueNamesTen := []string{
+		"queue_001", "queue_002", "queue_003", "queue_004", "queue_005",
+		"queue_006", "queue_007", "queue_008", "queue_009", "queue_010",
+	}
+
+	for _, benchmarkCase := range []struct {
+		name       string
+		queueNames []string
+	}{
+		{name: "TwoQueues", queueNames: queueNamesTwo},
+		{name: "TenQueues", queueNames: queueNamesTen},
+	} {
+		b.Run("Current/"+benchmarkCase.name, func(b *testing.B) {
+			b.ReportAllocs()
+
+			params := &riverdriver.JobCountByQueueAndStateParams{
+				QueueNames: benchmarkCase.queueNames,
+				Schema:     schema,
+			}
+
+			b.ResetTimer()
+			for range b.N {
+				results, err := driver.GetExecutor().JobCountByQueueAndState(ctx, params)
+				require.NoError(b, err)
+				require.NotEmpty(b, results)
+			}
+		})
+
+		b.Run("Legacy/"+benchmarkCase.name, func(b *testing.B) {
+			b.ReportAllocs()
+
+			query := legacyJobCountByQueueAndStateQuery(schema)
+
+			b.ResetTimer()
+			for range b.N {
+				rows, err := dbPool.Query(ctx, query, benchmarkCase.queueNames)
+				require.NoError(b, err)
+
+				var numRows int
+				for rows.Next() {
+					var (
+						countAvailable int64
+						countRunning   int64
+						queue          string
+					)
+
+					require.NoError(b, rows.Scan(&queue, &countAvailable, &countRunning))
+					numRows++
+				}
+
+				rows.Close()
+				require.NoError(b, rows.Err())
+				require.Equal(b, len(benchmarkCase.queueNames), numRows)
+			}
+		})
+	}
+}
+
+func legacyJobCountByQueueAndStateQuery(schema string) string {
+	return fmt.Sprintf(`
+WITH all_queues AS (
+    SELECT DISTINCT unnest($1::text[])::text AS queue
+),
+
+running_job_counts AS (
+    SELECT
+        queue,
+        COUNT(*) AS count
+    FROM %s.river_job
+    WHERE queue = ANY($1::text[])
+        AND state = 'running'
+    GROUP BY queue
+),
+
+available_job_counts AS (
+    SELECT
+        queue,
+        COUNT(*) AS count
+    FROM %s.river_job
+    WHERE queue = ANY($1::text[])
+        AND state = 'available'
+    GROUP BY queue
+)
+
+SELECT
+    all_queues.queue,
+    COALESCE(available_job_counts.count, 0) AS count_available,
+    COALESCE(running_job_counts.count, 0) AS count_running
+FROM
+    all_queues
+LEFT JOIN
+    running_job_counts ON all_queues.queue = running_job_counts.queue
+LEFT JOIN
+    available_job_counts ON all_queues.queue = available_job_counts.queue
+ORDER BY all_queues.queue ASC
+`, schema, schema)
+}
+
+func seedQueueStateCountBenchmarkData(ctx context.Context, b *testing.B, dbPool *pgxpool.Pool, schema string) {
+	b.Helper()
+
+	query := fmt.Sprintf(`
+WITH generated_jobs AS (
+    SELECT
+        CASE gs %% 8
+            WHEN 0 THEN 'running'
+            WHEN 1 THEN 'available'
+            WHEN 2 THEN 'completed'
+            WHEN 3 THEN 'cancelled'
+            WHEN 4 THEN 'discarded'
+            WHEN 5 THEN 'retryable'
+            WHEN 6 THEN 'scheduled'
+            ELSE 'pending'
+        END AS state,
+        now() - ((gs %% 100000)::text || ' seconds')::interval AS scheduled_at,
+        'queue_' || lpad(((gs %% 100) + 1)::text, 3, '0') AS queue
+    FROM generate_series(1, 200000) AS gs
+)
+INSERT INTO %s.river_job (
+    args,
+    finalized_at,
+    kind,
+    max_attempts,
+    metadata,
+    queue,
+    scheduled_at,
+    state
+)
+SELECT
+    '{}'::jsonb,
+    CASE
+        WHEN state IN ('cancelled', 'completed', 'discarded') THEN scheduled_at + interval '1 second'
+        ELSE NULL
+    END AS finalized_at,
+    'benchmark',
+    25,
+    '{}'::jsonb,
+    queue,
+    scheduled_at,
+    state::%s.river_job_state
+FROM generated_jobs;
+
+ANALYZE %s.river_job;
+`, schema, schema, schema)
+
+	_, err := dbPool.Exec(ctx, query)
+	require.NoError(b, err)
+
+	row := dbPool.QueryRow(ctx, "SELECT count(*) FROM "+schema+".river_job")
+
+	var numRows int
+	require.NoError(b, row.Scan(&numRows))
+	require.Equal(b, 200000, numRows)
+}