Skip to content

Commit b21886b

Browse files
tglsfdcreshke
authored andcommitted
Fix handling of bare boolean expressions in mcv_get_match_bitmap.
Since v14, the extended stats machinery will try to estimate for otherwise-unsupported boolean expressions if they match an expression available from an extended stats object. mcv.c did not get the memo about this, and would spit up with "unknown clause type". Fortunately the case is easy to handle, since we can expect the expression yields boolean. While here, replace some not-terribly-on-point assertions with simpler runtime tests for lookup failure. That seems appropriate so that we get an elog not a crash if we somehow get to the new it-should-be-a-bool-expression code with a subexpression that doesn't match any stats column. Per report from Danny Shemesh. Thanks to Justin Pryzby for preliminary investigation. Discussion: https://postgr.es/m/CAFZC=QqD6=27wQPOW1pbRa98KPyuyn+7cL_Ay_Ck-roZV84vHg@mail.gmail.com
1 parent b80b084 commit b21886b

4 files changed

Lines changed: 76 additions & 34 deletions

File tree

src/backend/statistics/mcv.c

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,13 +1531,13 @@ pg_mcv_list_send(PG_FUNCTION_ARGS)
15311531
/*
15321532
* match the attribute/expression to a dimension of the statistic
15331533
*
1534-
* Match the attribute/expression to statistics dimension. Optionally
1535-
* determine the collation.
1534+
* Returns the zero-based index of the matching statistics dimension.
1535+
* Optionally determines the collation.
15361536
*/
15371537
static int
15381538
mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
15391539
{
1540-
int idx = -1;
1540+
int idx;
15411541

15421542
if (IsA(expr, Var))
15431543
{
@@ -1549,20 +1549,19 @@ mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
15491549

15501550
idx = bms_member_index(keys, var->varattno);
15511551

1552-
/* make sure the index is valid */
1553-
Assert((idx >= 0) && (idx <= bms_num_members(keys)));
1552+
if (idx < 0)
1553+
elog(ERROR, "variable not found in statistics object");
15541554
}
15551555
else
15561556
{
1557+
/* expression - lookup in stats expressions */
15571558
ListCell *lc;
15581559

1559-
/* expressions are stored after the simple columns */
1560-
idx = bms_num_members(keys);
1561-
15621560
if (collid)
15631561
*collid = exprCollation(expr);
15641562

1565-
/* expression - lookup in stats expressions */
1563+
/* expressions are stored after the simple columns */
1564+
idx = bms_num_members(keys);
15661565
foreach(lc, exprs)
15671566
{
15681567
Node *stat_expr = (Node *) lfirst(lc);
@@ -1573,13 +1572,10 @@ mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
15731572
idx++;
15741573
}
15751574

1576-
/* make sure the index is valid */
1577-
Assert((idx >= bms_num_members(keys)) &&
1578-
(idx <= bms_num_members(keys) + list_length(exprs)));
1575+
if (lc == NULL)
1576+
elog(ERROR, "expression not found in statistics object");
15791577
}
15801578

1581-
Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1582-
15831579
return idx;
15841580
}
15851581

@@ -1659,8 +1655,6 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
16591655
/* match the attribute/expression to a dimension of the statistic */
16601656
idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
16611657

1662-
Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1663-
16641658
/*
16651659
* Walk through the MCV items and evaluate the current clause. We
16661660
* can skip items that were already ruled out, and terminate if
@@ -1944,7 +1938,30 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
19441938
}
19451939
}
19461940
else
1947-
elog(ERROR, "unknown clause type: %d", clause->type);
1941+
{
1942+
/* Otherwise, it must be a bare boolean-returning expression */
1943+
int idx;
1944+
1945+
/* match the expression to a dimension of the statistic */
1946+
idx = mcv_match_expression(clause, keys, exprs, NULL);
1947+
1948+
/*
1949+
* Walk through the MCV items and evaluate the current clause. We
1950+
* can skip items that were already ruled out, and terminate if
1951+
* there are no remaining MCV items that might possibly match.
1952+
*/
1953+
for (i = 0; i < mcvlist->nitems; i++)
1954+
{
1955+
bool match;
1956+
MCVItem *item = &mcvlist->items[i];
1957+
1958+
/* "match" just means it's bool TRUE */
1959+
match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
1960+
1961+
/* now, update the match bitmap, depending on OR/AND type */
1962+
matches[i] = RESULT_MERGE(matches[i], is_or, match);
1963+
}
1964+
}
19481965
}
19491966

19501967
return matches;

src/test/regress/expected/stats_ext.out

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,14 +270,23 @@ SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3';
270270
CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1;
271271
-- date_trunc on timestamp is immutable
272272
CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1;
273+
-- check use of a boolean-returning expression
274+
CREATE STATISTICS ab1_exprstat_6 ON
275+
(case a when 1 then true else false end), b FROM ab1;
273276
-- insert some data and run analyze, to test that these cases build properly
274277
INSERT INTO ab1
275-
SELECT
276-
generate_series(1,10),
277-
generate_series(1,10),
278-
generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'),
279-
generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day');
278+
SELECT x / 10, x / 3,
279+
'2020-10-01'::timestamp + x * interval '1 day',
280+
'2020-10-01'::timestamptz + x * interval '1 day'
281+
FROM generate_series(1, 100) x;
280282
ANALYZE ab1;
283+
-- apply some stats
284+
SELECT * FROM check_estimated_rows('SELECT * FROM ab1 WHERE (case a when 1 then true else false end) AND b=2');
285+
estimated | actual
286+
-----------+--------
287+
1 | 0
288+
(1 row)
289+
281290
DROP TABLE ab1;
282291
-- Verify supported object types for extended statistics
283292
CREATE schema tststats;

src/test/regress/expected/stats_ext_optimizer.out

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -278,14 +278,23 @@ SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3';
278278
CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1;
279279
-- date_trunc on timestamp is immutable
280280
CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1;
281+
-- check use of a boolean-returning expression
282+
CREATE STATISTICS ab1_exprstat_6 ON
283+
(case a when 1 then true else false end), b FROM ab1;
281284
-- insert some data and run analyze, to test that these cases build properly
282285
INSERT INTO ab1
283-
SELECT
284-
generate_series(1,10),
285-
generate_series(1,10),
286-
generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'),
287-
generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day');
286+
SELECT x / 10, x / 3,
287+
'2020-10-01'::timestamp + x * interval '1 day',
288+
'2020-10-01'::timestamptz + x * interval '1 day'
289+
FROM generate_series(1, 100) x;
288290
ANALYZE ab1;
291+
-- apply some stats
292+
SELECT * FROM check_estimated_rows('SELECT * FROM ab1 WHERE (case a when 1 then true else false end) AND b=2');
293+
estimated | actual
294+
-----------+--------
295+
3 | 0
296+
(1 row)
297+
289298
DROP TABLE ab1;
290299
-- Verify supported object types for extended statistics
291300
CREATE schema tststats;
@@ -2075,7 +2084,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY
20752084
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)');
20762085
estimated | actual
20772086
-----------+--------
2078-
4 | 50
2087+
72 | 50
20792088
(1 row)
20802089

20812090
-- create statistics
@@ -2228,7 +2237,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY
22282237
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)');
22292238
estimated | actual
22302239
-----------+--------
2231-
4 | 50
2240+
72 | 50
22322241
(1 row)
22332242

22342243
-- check change of unrelated column type does not reset the MCV statistics

src/test/regress/sql/stats_ext.sql

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -176,14 +176,21 @@ CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1;
176176
-- date_trunc on timestamp is immutable
177177
CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1;
178178

179+
-- check use of a boolean-returning expression
180+
CREATE STATISTICS ab1_exprstat_6 ON
181+
(case a when 1 then true else false end), b FROM ab1;
182+
179183
-- insert some data and run analyze, to test that these cases build properly
180184
INSERT INTO ab1
181-
SELECT
182-
generate_series(1,10),
183-
generate_series(1,10),
184-
generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'),
185-
generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day');
185+
SELECT x / 10, x / 3,
186+
'2020-10-01'::timestamp + x * interval '1 day',
187+
'2020-10-01'::timestamptz + x * interval '1 day'
188+
FROM generate_series(1, 100) x;
186189
ANALYZE ab1;
190+
191+
-- apply some stats
192+
SELECT * FROM check_estimated_rows('SELECT * FROM ab1 WHERE (case a when 1 then true else false end) AND b=2');
193+
187194
DROP TABLE ab1;
188195

189196
-- Verify supported object types for extended statistics

0 commit comments

Comments
 (0)