Skip to content

Commit b80b084

Browse files
tglsfdcreshke
authored andcommitted
Fix non-bulletproof ScalarArrayOpExpr code for extended statistics.
statext_is_compatible_clause_internal() checked that the arguments of a ScalarArrayOpExpr are one Var and one Const, but it would allow cases where the Const was on the left. Subsequent uses of the clause are not expecting that and would suffer assertion failures or core dumps. mcv.c also had not bothered to cope with the case of a NULL array constant, which seems really unacceptably sloppy of somebody. (Although our tools failed us there too, since AFAIK neither Coverity nor any compiler warned of the obvious use-of-uninitialized-variable condition.) It seems best to handle that by having statext_is_compatible_clause_internal() reject it. Noted while fixing bug #17570. Back-patch to v13 where the extended stats code grew some awareness of ScalarArrayOpExpr.
1 parent 5bee956 commit b80b084

5 files changed

Lines changed: 65 additions & 28 deletions

File tree

src/backend/statistics/extended_stats.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,8 +1333,8 @@ choose_best_statistics(List *stats, char requiredkind,
13331333
*
13341334
* (c) combinations using AND/OR/NOT
13351335
*
1336-
* (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (array)) or (Var/Expr
1337-
* op ALL (array))
1336+
* (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (Const)) or
1337+
* (Var/Expr op ALL (Const))
13381338
*
13391339
* In the future, the range of supported clauses may be expanded to more
13401340
* complex cases, for example (Var op Var).
@@ -1454,13 +1454,19 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
14541454
RangeTblEntry *rte = root->simple_rte_array[relid];
14551455
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
14561456
Node *clause_expr;
1457+
Const *cst;
1458+
bool expronleft;
14571459

14581460
/* Only expressions with two arguments are considered compatible. */
14591461
if (list_length(expr->args) != 2)
14601462
return false;
14611463

14621464
/* Check if the expression has the right shape (one Var, one Const) */
1463-
if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
1465+
if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1466+
return false;
1467+
1468+
/* We only support Var on left and non-null array constants */
1469+
if (!expronleft || cst->constisnull)
14641470
return false;
14651471

14661472
/*

src/backend/statistics/mcv.c

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1746,20 +1746,17 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
17461746
if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
17471747
elog(ERROR, "incompatible clause");
17481748

1749-
/* ScalarArrayOpExpr has the Var always on the left */
1750-
Assert(expronleft);
1749+
/* We expect Var on left and non-null constant on right */
1750+
if (!expronleft || cst->constisnull)
1751+
elog(ERROR, "incompatible clause");
17511752

1752-
/* XXX what if (cst->constisnull == NULL)? */
1753-
if (!cst->constisnull)
1754-
{
1755-
arrayval = DatumGetArrayTypeP(cst->constvalue);
1756-
get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1757-
&elmlen, &elmbyval, &elmalign);
1758-
deconstruct_array(arrayval,
1759-
ARR_ELEMTYPE(arrayval),
1760-
elmlen, elmbyval, elmalign,
1761-
&elem_values, &elem_nulls, &num_elems);
1762-
}
1753+
arrayval = DatumGetArrayTypeP(cst->constvalue);
1754+
get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1755+
&elmlen, &elmbyval, &elmalign);
1756+
deconstruct_array(arrayval,
1757+
ARR_ELEMTYPE(arrayval),
1758+
elmlen, elmbyval, elmalign,
1759+
&elem_values, &elem_nulls, &num_elems);
17631760

17641761
/* match the attribute/expression to a dimension of the statistic */
17651762
idx = mcv_match_expression(clause_expr, keys, exprs, &collid);

src/test/regress/expected/stats_ext.out

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,7 +1835,8 @@ CREATE TABLE mcv_lists (
18351835
b VARCHAR,
18361836
filler3 DATE,
18371837
c INT,
1838-
d TEXT
1838+
d TEXT,
1839+
ia INT[]
18391840
)
18401841
WITH (autovacuum_enabled = off);
18411842
-- random data (no MCV list)
@@ -1905,8 +1906,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 A
19051906
-- 100 distinct combinations, all in the MCV list
19061907
TRUNCATE mcv_lists;
19071908
DROP STATISTICS mcv_lists_stats;
1908-
INSERT INTO mcv_lists (a, b, c, filler1)
1909-
SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
1909+
INSERT INTO mcv_lists (a, b, c, ia, filler1)
1910+
SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i
1911+
FROM generate_series(1,5000) s(i);
19101912
ANALYZE mcv_lists;
19111913
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
19121914
estimated | actual
@@ -2046,8 +2048,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY
20462048
1 | 100
20472049
(1 row)
20482050

2051+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)');
2052+
estimated | actual
2053+
-----------+--------
2054+
4 | 50
2055+
(1 row)
2056+
20492057
-- create statistics
2050-
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
2058+
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists;
20512059
ANALYZE mcv_lists;
20522060
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
20532061
estimated | actual
@@ -2193,6 +2201,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY
21932201
100 | 100
21942202
(1 row)
21952203

2204+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)');
2205+
estimated | actual
2206+
-----------+--------
2207+
4 | 50
2208+
(1 row)
2209+
21962210
-- check change of unrelated column type does not reset the MCV statistics
21972211
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
21982212
SELECT d.stxdmcv IS NOT NULL

src/test/regress/expected/stats_ext_optimizer.out

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1857,7 +1857,8 @@ CREATE TABLE mcv_lists (
18571857
b VARCHAR,
18581858
filler3 DATE,
18591859
c INT,
1860-
d TEXT
1860+
d TEXT,
1861+
ia INT[]
18611862
)
18621863
WITH (autovacuum_enabled = off);
18631864
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'filler1' as the Apache Cloudberry data distribution key for this table.
@@ -1929,8 +1930,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 A
19291930
-- 100 distinct combinations, all in the MCV list
19301931
TRUNCATE mcv_lists;
19311932
DROP STATISTICS mcv_lists_stats;
1932-
INSERT INTO mcv_lists (a, b, c, filler1)
1933-
SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
1933+
INSERT INTO mcv_lists (a, b, c, ia, filler1)
1934+
SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i
1935+
FROM generate_series(1,5000) s(i);
19341936
ANALYZE mcv_lists;
19351937
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
19361938
estimated | actual
@@ -2070,8 +2072,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY
20702072
22 | 100
20712073
(1 row)
20722074

2075+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)');
2076+
estimated | actual
2077+
-----------+--------
2078+
4 | 50
2079+
(1 row)
2080+
20732081
-- create statistics
2074-
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
2082+
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists;
20752083
ANALYZE mcv_lists;
20762084
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
20772085
estimated | actual
@@ -2217,6 +2225,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY
22172225
22 | 100
22182226
(1 row)
22192227

2228+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)');
2229+
estimated | actual
2230+
-----------+--------
2231+
4 | 50
2232+
(1 row)
2233+
22202234
-- check change of unrelated column type does not reset the MCV statistics
22212235
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
22222236
SELECT d.stxdmcv IS NOT NULL

src/test/regress/sql/stats_ext.sql

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -923,7 +923,8 @@ CREATE TABLE mcv_lists (
923923
b VARCHAR,
924924
filler3 DATE,
925925
c INT,
926-
d TEXT
926+
d TEXT,
927+
ia INT[]
927928
)
928929
WITH (autovacuum_enabled = off);
929930

@@ -972,8 +973,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 A
972973
TRUNCATE mcv_lists;
973974
DROP STATISTICS mcv_lists_stats;
974975

975-
INSERT INTO mcv_lists (a, b, c, filler1)
976-
SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
976+
INSERT INTO mcv_lists (a, b, c, ia, filler1)
977+
SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i
978+
FROM generate_series(1,5000) s(i);
977979

978980
ANALYZE mcv_lists;
979981

@@ -1023,8 +1025,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY
10231025

10241026
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])');
10251027

1028+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)');
1029+
10261030
-- create statistics
1027-
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
1031+
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists;
10281032

10291033
ANALYZE mcv_lists;
10301034

@@ -1076,6 +1080,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY
10761080

10771081
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])');
10781082

1083+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)');
1084+
10791085
-- check change of unrelated column type does not reset the MCV statistics
10801086
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
10811087

0 commit comments

Comments
 (0)