Skip to content

Commit 69ef4da

Browse files
zxuejingmy-ship-it
authored andcommitted
get correlation from segments instead of calculating it on QD (#15357)
We cannot use the same method as PostgreSQL does to calculate the correlation in QD. When we collect data from segments to QD, this will change the physical order of the data. such as in segment 1 the data is 1,3,5,7,9. And in segment 2 the data is 2,4,6,8,10. In each segment the data is ordered, and correlation is 1 in each segment. But after we collect the data to QD, it may be 1,3,5,2,4,7,9,6,8,10. And the correlation is 0.3 or something else and it is not stable. And this will increase the cost of index scan which is shouldn't be done. So get correlations from segments and then calculate correlation for QD. we use the weighted mean algorithm to calculate correlation on QD, However, In some situations, we may not be able to obtain reltuples of a table, such as none-leaf part of partitioned table or the parent table of the inherited table. So we can only use the mean algorithm to calculate correlation for these tables.
1 parent e03740b commit 69ef4da

14 files changed

Lines changed: 917 additions & 109 deletions

File tree

src/backend/commands/analyze.c

Lines changed: 409 additions & 28 deletions
Large diffs are not rendered by default.

src/backend/commands/analyzefuncs.c

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "foreign/fdwapi.h"
3030
#include "miscadmin.h"
3131
#include "funcapi.h"
32+
#include "utils/syscache.h"
3233

3334
/**
3435
* Statistics related parameters.
@@ -368,3 +369,159 @@ gp_acquire_sample_rows_col_type(Oid typid)
368369
}
369370
return typid;
370371
}
372+
373+
/*
374+
* gp_acquire_correlations - Acquire each column's correlation for a table.
375+
* This is an internal function called in gp_acquire_correlations_dispatcher.
376+
* this function will return a result set, a row for each alive column.
377+
* each row contains 3 columns: attnum, the correlation for it and totalrows.
378+
* if correlation is null, set totalrows to 0 for it.
379+
*
380+
* So overall, this returns a result set like this:
381+
* create table t(tc1 int, tc2 int, tc3 int);
382+
* insert values.
383+
* alter table t drop column tc2;
384+
*
385+
* attnum | correlation| totalrows
386+
* ----------+------------|+------------
387+
* 0 | 0.8 | 200
388+
* 2 | | 0
389+
*/
390+
Datum
391+
gp_acquire_correlations(PG_FUNCTION_ARGS)
392+
{
393+
FuncCallContext *funcctx = NULL;
394+
gp_acquire_correlation_context *ctx;
395+
MemoryContext oldcontext;
396+
Oid relOid = PG_GETARG_OID(0);
397+
bool inherited = PG_GETARG_BOOL(1);
398+
TupleDesc relDesc;
399+
TupleDesc outDesc;
400+
401+
if (SRF_IS_FIRSTCALL())
402+
{
403+
Relation onerel;
404+
funcctx = SRF_FIRSTCALL_INIT();
405+
406+
/*
407+
* switch to memory context appropriate for multiple function
408+
* calls
409+
*/
410+
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
411+
412+
/* Construct the context to keep across calls. */
413+
ctx = (gp_acquire_correlation_context *) palloc0(sizeof(gp_acquire_correlation_context));
414+
415+
if (!pg_class_ownercheck(relOid, GetUserId()))
416+
aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TABLE,
417+
get_rel_name(relOid));
418+
419+
onerel = table_open(relOid, AccessShareLock);
420+
relDesc = RelationGetDescr(onerel);
421+
422+
outDesc = CreateTemplateTupleDesc(3);
423+
TupleDescInitEntry(outDesc,
424+
1,
425+
"attnum",
426+
INT4OID,
427+
-1,
428+
0);
429+
TupleDescInitEntry(outDesc,
430+
2,
431+
"correlation",
432+
FLOAT4OID,
433+
-1,
434+
0);
435+
TupleDescInitEntry(outDesc,
436+
3,
437+
"totalrows",
438+
INT4OID,
439+
-1,
440+
0);
441+
442+
BlessTupleDesc(outDesc);
443+
funcctx->tuple_desc = outDesc;
444+
445+
ctx->onerel = onerel;
446+
funcctx->user_fctx = ctx;
447+
ctx->outDesc = outDesc;
448+
449+
ctx->index = 0;
450+
ctx->totalAttr = relDesc->natts;
451+
MemoryContextSwitchTo(oldcontext);
452+
}
453+
454+
/* stuff done on every call of the function */
455+
funcctx = SRF_PERCALL_SETUP();
456+
457+
ctx = funcctx->user_fctx;
458+
relDesc = RelationGetDescr(ctx->onerel);
459+
outDesc = ctx->outDesc;
460+
461+
Datum *outvalues = (Datum *) palloc(outDesc->natts * sizeof(Datum));
462+
bool *outnulls = (bool *) palloc(outDesc->natts * sizeof(bool));
463+
HeapTuple res;
464+
int attno = ctx->index;
465+
466+
/* Return all alive attribute correlation */
467+
for (; attno < ctx->totalAttr; attno++)
468+
{
469+
/* get the correlation of the column */
470+
int totalrows = 0;
471+
HeapTuple statsTuple;
472+
Form_pg_attribute relatt = TupleDescAttr(relDesc, attno);
473+
if (relatt->attisdropped)
474+
continue;
475+
statsTuple = SearchSysCache3(STATRELATTINH,
476+
ObjectIdGetDatum(relOid),
477+
Int16GetDatum(attno + 1),
478+
BoolGetDatum(inherited));
479+
outvalues[0] = Int32GetDatum(attno);
480+
outnulls[0] = false;
481+
482+
if (HeapTupleIsValid(statsTuple))
483+
{
484+
AttStatsSlot sslot;
485+
486+
if (get_attstatsslot(&sslot, statsTuple,
487+
STATISTIC_KIND_CORRELATION, InvalidOid,
488+
ATTSTATSSLOT_NUMBERS))
489+
{
490+
float4 varCorrelation;
491+
Assert(sslot.nnumbers == 1);
492+
varCorrelation = sslot.numbers[0];
493+
494+
free_attstatsslot(&sslot);
495+
496+
outvalues[1] = Float4GetDatum(varCorrelation);
497+
outnulls[1] = false;
498+
totalrows = ctx->onerel->rd_rel->reltuples;
499+
}
500+
else
501+
{
502+
outvalues[1] = (Datum) 0;
503+
outnulls[1] = true;
504+
}
505+
ReleaseSysCache(statsTuple);
506+
}
507+
else
508+
{
509+
outvalues[1] = (Datum) 0;
510+
outnulls[1] = true;
511+
}
512+
513+
outvalues[2] = Int32GetDatum(totalrows);
514+
outnulls[2] = false;
515+
516+
res = heap_form_tuple(outDesc, outvalues, outnulls);
517+
ctx->index = attno + 1;
518+
519+
SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(res));
520+
}
521+
522+
table_close(ctx->onerel, AccessShareLock);
523+
pfree(ctx);
524+
funcctx->user_fctx = NULL;
525+
526+
SRF_RETURN_DONE(funcctx);
527+
}

src/include/catalog/pg_proc.dat

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11810,7 +11810,7 @@
1181011810
{ oid => 6464, descr => 'get backends of overflowed subtransaction',
1181111811
proname => 'gp_get_suboverflowed_backends', provolatile => 'v', prorettype => '_int4', proargtypes => '', prosrc => 'gp_get_suboverflowed_backends' },
1181211812

11813-
{ oid => 6040, descr => 'get gp all segments pg_snapshot',
11813+
{ oid => 6041, descr => 'get gp all segments pg_snapshot',
1181411814
proname => 'gp_current_snapshot', proisstrict => 'f',
1181511815
proretset => 't', provolatile => 'v', proparallel => 'r',
1181611816
prorettype => 'record', proargtypes => '',
@@ -11992,6 +11992,8 @@
1199211992
# Analyze related
1199311993
{ oid => 6038, descr => 'Collect a random sample of rows from table',
1199411994
proname => 'gp_acquire_sample_rows', prorows => '1000', proretset => 't', provolatile => 'v', proparallel => 'u', prorettype => 'record', proargtypes => 'oid int4 bool', prosrc => 'gp_acquire_sample_rows', proexeclocation => 's' },
11995+
{ oid => 6040, descr => 'Collect correlations from segments',
11996+
proname => 'gp_acquire_correlations', prorows => '10', proretset => 't', provolatile => 'v', proparallel => 'u', prorettype => 'record', proargtypes => 'oid bool', prosrc => 'gp_acquire_correlations', proexeclocation => 's' },
1199511997

1199611998
# Backoff related
1199711999
{ oid => 7016, descr => 'change weight of all the backends for a given session id',

src/include/commands/vacuum.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ typedef struct VacAttrStats
179179
bool *exprnulls;
180180
int rowstride;
181181
bool merge_stats;
182+
bool corrnull; /* whether correlation value is null */
183+
bool partitiontbl_qd; /* analyze is on QD and the policy of table is partitioned */
184+
float4 corrval; /* correlation gathered from segments */
182185
} VacAttrStats;
183186

184187

@@ -327,6 +330,24 @@ typedef struct
327330
bool summary_sent;
328331
} gp_acquire_sample_rows_context;
329332

333+
typedef struct
334+
{
335+
/* Table being analyzed */
336+
Relation onerel;
337+
338+
/* whether acquire inherited table's correlations */
339+
bool inherited;
340+
341+
/*
342+
* Result tuple descriptor.
343+
*/
344+
TupleDesc outDesc;
345+
346+
/* SRF state, to track which rows have already been returned. */
347+
int index;
348+
int totalAttr;
349+
} gp_acquire_correlation_context;
350+
330351
/* GUC parameters */
331352
extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */
332353
extern int vacuum_freeze_min_age;
@@ -416,6 +437,7 @@ extern int acquire_inherited_sample_rows(Relation onerel, int elevel,
416437

417438
/* in commands/analyzefuncs.c */
418439
extern Datum gp_acquire_sample_rows(PG_FUNCTION_ARGS);
440+
extern Datum gp_acquire_correlations(PG_FUNCTION_ARGS);
419441
extern Oid gp_acquire_sample_rows_col_type(Oid typid);
420442

421443
extern bool gp_vacuum_needs_update_stats(void);

0 commit comments

Comments
 (0)