Skip to content

Commit 60790f0

Browse files
committed
Fix segfilecount of AO/AOCO when bulk insertion: COPY
Fix #529. For COPY FROM on AO/AOCO tables, we need to try switch physical seg files on the fly during bulk insertion. Else, only one insertDesc will be used and the GUC around segfilecount does't take effect. That's important for parallel plan. For AO specially, bulk insertion is optimized to reuse var block if possible, leads to additional check. Also fix memory leak for used_segment_files which is allocated at enter_dml_state() and shoule be preed when dml finished. Authored-by: Zhang Mingli avamingli@gmail.com
1 parent d1103ed commit 60790f0

6 files changed

Lines changed: 181 additions & 1 deletion

File tree

.github/workflows/build_external_fts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
needs: build
4040
runs-on: [self-hosted, example]
4141
env:
42-
MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=off -c gp_appendonly_insert_files=0' installcheck-world"
42+
MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=off' installcheck-world"
4343
TEST_OS: "centos"
4444
DUMP_DB: "true"
4545
steps:

src/backend/access/aocs/aocsam_handler.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,9 @@ aoco_dml_finish(Relation relation, CmdType operation)
316316
Assert(state->insertDesc->aoi_rel == relation);
317317
aocs_insert_finish(state->insertDesc, &state->head);
318318
state->insertDesc = NULL;
319+
state->insertMultiFiles = 0;
320+
pfree(state->used_segment_files);
321+
state->used_segment_files = NIL;
319322
}
320323

321324
if (state->uniqueCheckDesc)
@@ -1071,9 +1074,21 @@ aoco_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
10711074
AOCSInsertDesc insertDesc;
10721075
insertDesc = get_insert_descriptor(relation);
10731076

1077+
AOCODMLState *state;
1078+
state = find_dml_state(RelationGetRelid(relation));
1079+
10741080
for (int i = 0; i < ntuples; i++)
10751081
{
10761082
slot_getallattrs(slots[i]);
1083+
/*
1084+
* For bulk insert, we may switch insertDesc
1085+
* on the fly.
1086+
*/
1087+
if (state->insertMultiFiles && state->insertDesc->range == gp_appendonly_insert_files_tuples_range)
1088+
{
1089+
insertDesc = get_insert_descriptor(relation);
1090+
}
1091+
10771092
aocs_insert_values(insertDesc, slots[i]->tts_values, slots[i]->tts_isnull, (AOTupleId *) &slots[i]->tts_tid);
10781093
}
10791094

src/backend/access/appendonly/appendonlyam_handler.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,9 @@ appendonly_dml_finish(Relation relation, CmdType operation)
286286
Assert(state->insertDesc->aoi_rel == relation);
287287
appendonly_insert_finish(state->insertDesc, &state->head);
288288
state->insertDesc = NULL;
289+
state->insertMultiFiles = 0;
290+
pfree(state->used_segment_files);
291+
state->used_segment_files = NIL;
289292
}
290293

291294
if (state->uniqueCheckDesc)
@@ -930,10 +933,12 @@ appendonly_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
930933
CommandId cid, int options, BulkInsertState bistate)
931934
{
932935
AppendOnlyInsertDesc insertDesc;
936+
AppendOnlyDMLState *state;
933937
MemTuple *mtuple;
934938
int ndone = 0;
935939
int nthisBlock = 0;
936940
insertDesc = get_insert_descriptor(relation);
941+
state = find_dml_state(RelationGetRelid(relation));
937942
Oid tableOid = RelationGetRelid(relation);
938943
mtuple = palloc(ntuples * sizeof(MemTuple));
939944
for (int i = 0; i < ntuples; i++)
@@ -943,11 +948,29 @@ appendonly_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
943948
}
944949
while (ndone < ntuples)
945950
{
951+
/*
952+
* For bulk insert, we may switch insertDesc
953+
* on the fly.
954+
*/
955+
if (state->insertMultiFiles && state->insertDesc->range == gp_appendonly_insert_files_tuples_range)
956+
{
957+
insertDesc = get_insert_descriptor(relation);
958+
}
959+
946960
appendonly_insert(insertDesc, mtuple[ndone], (AOTupleId *) &slots[ndone]->tts_tid);
947961
for (nthisBlock = 1; ndone + nthisBlock < ntuples; nthisBlock++)
948962
{
949963
if (insertDesc->useNoToast)
950964
{
965+
/*
966+
* This is a hack way to insert into AO of CBDB.
967+
* Check switch insertDesc again.
968+
*/
969+
if (state->insertMultiFiles && state->insertDesc->range == gp_appendonly_insert_files_tuples_range)
970+
{
971+
insertDesc = get_insert_descriptor(relation);
972+
}
973+
951974
MemTuple tup = mtuple[ndone + nthisBlock] ;
952975
uint8 *itemPtr = NULL;
953976
VarBlockByteLen itemLen;
@@ -968,6 +991,7 @@ appendonly_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
968991
{
969992
memcpy(itemPtr, tup, itemLen);
970993
insertDesc->insertCount++;
994+
insertDesc->range++;
971995
insertDesc->lastSequence++;
972996
if (insertDesc->numSequences > 0)
973997
(insertDesc->numSequences)--;
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
create schema ao_segfile;
2+
set search_path to ao_segfile;
3+
set gp_appendonly_insert_files = 4;
4+
-- ao table
5+
create table ao_copy (a int) using ao_row;
6+
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table.
7+
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
8+
select segfilecount from pg_appendonly where relid = 'ao_copy'::regclass;
9+
segfilecount
10+
--------------
11+
0
12+
(1 row)
13+
14+
set gp_appendonly_insert_files_tuples_range = 1;
15+
-- ensure 4 files on 3 segments
16+
COPY ao_copy from stdin;
17+
analyze ao_copy;
18+
select count(*) from ao_copy;
19+
count
20+
-------
21+
20
22+
(1 row)
23+
24+
select segfilecount from pg_appendonly where relid = 'ao_copy'::regclass;
25+
segfilecount
26+
--------------
27+
4
28+
(1 row)
29+
30+
reset gp_appendonly_insert_files_tuples_range;
31+
-- aocs table
32+
create table aocs_copy (a int) using ao_column;
33+
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table.
34+
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
35+
select segfilecount from pg_appendonly where relid = 'aocs_copy'::regclass;
36+
segfilecount
37+
--------------
38+
0
39+
(1 row)
40+
41+
set gp_appendonly_insert_files_tuples_range = 1;
42+
-- ensure 4 files on 3 segments
43+
COPY aocs_copy from stdin;
44+
analyze aocs_copy;
45+
select count(*) from aocs_copy;
46+
count
47+
-------
48+
20
49+
(1 row)
50+
51+
select segfilecount from pg_appendonly where relid = 'aocs_copy'::regclass;
52+
segfilecount
53+
--------------
54+
4
55+
(1 row)
56+
57+
reset gp_appendonly_insert_files_tuples_range;
58+
reset gp_appendonly_insert_files;
59+
-- start_ignore
60+
drop schema ao_segfile cascade;
61+
NOTICE: drop cascades to 2 other objects
62+
DETAIL: drop cascades to table ao_copy
63+
drop cascades to table aocs_copy
64+
-- end_ignore

src/test/regress/greenplum_schedule

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,4 +332,7 @@ test: subtrx_overflow
332332

333333
test: bfv_meta_track
334334

335+
# tests of ao/aoco seg file count for parallel plan
336+
test: ao_segfile
337+
335338
# end of tests
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
create schema ao_segfile;
2+
set search_path to ao_segfile;
3+
set gp_appendonly_insert_files = 4;
4+
5+
-- ao table
6+
create table ao_copy (a int) using ao_row;
7+
select segfilecount from pg_appendonly where relid = 'ao_copy'::regclass;
8+
set gp_appendonly_insert_files_tuples_range = 1;
9+
-- ensure 4 files on 3 segments
10+
COPY ao_copy from stdin;
11+
1
12+
2
13+
3
14+
4
15+
5
16+
6
17+
7
18+
8
19+
9
20+
10
21+
11
22+
12
23+
13
24+
14
25+
15
26+
16
27+
17
28+
18
29+
19
30+
20
31+
\.
32+
33+
analyze ao_copy;
34+
select count(*) from ao_copy;
35+
select segfilecount from pg_appendonly where relid = 'ao_copy'::regclass;
36+
reset gp_appendonly_insert_files_tuples_range;
37+
38+
-- aocs table
39+
create table aocs_copy (a int) using ao_column;
40+
select segfilecount from pg_appendonly where relid = 'aocs_copy'::regclass;
41+
set gp_appendonly_insert_files_tuples_range = 1;
42+
-- ensure 4 files on 3 segments
43+
COPY aocs_copy from stdin;
44+
1
45+
2
46+
3
47+
4
48+
5
49+
6
50+
7
51+
8
52+
9
53+
10
54+
11
55+
12
56+
13
57+
14
58+
15
59+
16
60+
17
61+
18
62+
19
63+
20
64+
\.
65+
66+
analyze aocs_copy;
67+
select count(*) from aocs_copy;
68+
select segfilecount from pg_appendonly where relid = 'aocs_copy'::regclass;
69+
reset gp_appendonly_insert_files_tuples_range;
70+
reset gp_appendonly_insert_files;
71+
72+
-- start_ignore
73+
drop schema ao_segfile cascade;
74+
-- end_ignore

0 commit comments

Comments
 (0)