Skip to content

Commit 3656812

Browse files
Replace libcsv with pg COPY for csv loading (#2310)
- Commit also adds permission checks - Resolves a critical memory spike issue on loading large file - Use pg's COPY infrastructure (BeginCopyFrom, NextCopyFromRawFields) for 64KB buffered CSV parsing instead of libcsv - Add byte based flush threshold (64KB) matching COPY behavior for memory safety - Use heap_multi_insert with BulkInsertState for optimized batch inserts - Add per batch memory context to prevent memory growth during large loads - Remove libcsv dependency (libcsv.c, csv.h) - Improves loading performance by 15-20% - No previous regression tests were impacted - Added regression tests for permissions/rls Assisted-by AI Resolved conflict with ExecInitRangeTable
1 parent 49b186e commit 3656812

13 files changed

Lines changed: 1000 additions & 1132 deletions

File tree

Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ OBJS = src/backend/age.o \
6969
src/backend/utils/load/ag_load_labels.o \
7070
src/backend/utils/load/ag_load_edges.o \
7171
src/backend/utils/load/age_load.o \
72-
src/backend/utils/load/libcsv.o \
7372
src/backend/utils/name_validation.o \
7473
src/backend/utils/ag_guc.o
7574

regress/expected/age_load.out

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,195 @@ NOTICE: graph "agload_conversion" has been dropped
454454

455455
(1 row)
456456

457+
--
458+
-- Test security and permissions
459+
--
460+
SELECT create_graph('agload_security');
461+
NOTICE: graph "agload_security" has been created
462+
create_graph
463+
--------------
464+
465+
(1 row)
466+
467+
SELECT create_vlabel('agload_security', 'Person1');
468+
NOTICE: VLabel "Person1" has been created
469+
create_vlabel
470+
---------------
471+
472+
(1 row)
473+
474+
SELECT create_vlabel('agload_security', 'Person2');
475+
NOTICE: VLabel "Person2" has been created
476+
create_vlabel
477+
---------------
478+
479+
(1 row)
480+
481+
SELECT create_elabel('agload_security', 'SecEdge');
482+
NOTICE: ELabel "SecEdge" has been created
483+
create_elabel
484+
---------------
485+
486+
(1 row)
487+
488+
--
489+
-- Test 1: File read permission (pg_read_server_files role)
490+
--
491+
-- Create a user without pg_read_server_files role
492+
CREATE USER load_test_user;
493+
GRANT USAGE ON SCHEMA ag_catalog TO load_test_user;
494+
-- This should fail because load_test_user doesn't have pg_read_server_files
495+
SET ROLE load_test_user;
496+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
497+
ERROR: permission denied to LOAD from a file
498+
DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file.
499+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
500+
ERROR: permission denied to LOAD from a file
501+
DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file.
502+
RESET ROLE;
503+
-- Grant pg_read_server_files and try again - should fail on table permission now
504+
GRANT pg_read_server_files TO load_test_user;
505+
--
506+
-- Test 2: Table INSERT permission (ACL_INSERT)
507+
--
508+
-- User has file read permission but no INSERT on the label table
509+
SET ROLE load_test_user;
510+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
511+
ERROR: permission denied for table Person1
512+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
513+
ERROR: permission denied for table SecEdge
514+
RESET ROLE;
515+
-- Grant INSERT permission and try again - should succeed
516+
GRANT USAGE ON SCHEMA agload_security TO load_test_user;
517+
GRANT INSERT ON agload_security."Person1" TO load_test_user;
518+
GRANT INSERT ON agload_security."SecEdge" TO load_test_user;
519+
GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user;
520+
GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user;
521+
GRANT SELECT ON ag_catalog.ag_label TO load_test_user;
522+
GRANT SELECT ON ag_catalog.ag_graph TO load_test_user;
523+
SET ROLE load_test_user;
524+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
525+
load_labels_from_file
526+
-----------------------
527+
528+
(1 row)
529+
530+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
531+
load_edges_from_file
532+
----------------------
533+
534+
(1 row)
535+
536+
RESET ROLE;
537+
-- Verify data was loaded
538+
SELECT COUNT(*) FROM agload_security."Person1";
539+
count
540+
-------
541+
6
542+
(1 row)
543+
544+
SELECT COUNT(*) FROM agload_security."SecEdge";
545+
count
546+
-------
547+
6
548+
(1 row)
549+
550+
-- cleanup
551+
DELETE FROM agload_security."Person1";
552+
DELETE FROM agload_security."SecEdge";
553+
--
554+
-- Test 3: Row-Level Security (RLS)
555+
--
556+
-- Enable RLS on the label tables
557+
ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY;
558+
ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY;
559+
-- Switch to load_test_user
560+
SET ROLE load_test_user;
561+
-- Loading should fail when RLS is enabled
562+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
563+
ERROR: LOAD from file is not supported with row-level security
564+
HINT: Use Cypher CREATE clause instead.
565+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
566+
ERROR: LOAD from file is not supported with row-level security
567+
HINT: Use Cypher CREATE clause instead.
568+
RESET ROLE;
569+
-- Disable RLS and try again - should succeed
570+
ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY;
571+
ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY;
572+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
573+
load_labels_from_file
574+
-----------------------
575+
576+
(1 row)
577+
578+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
579+
load_edges_from_file
580+
----------------------
581+
582+
(1 row)
583+
584+
-- Verify data was loaded
585+
SELECT COUNT(*) FROM agload_security."Person1";
586+
count
587+
-------
588+
6
589+
(1 row)
590+
591+
SELECT COUNT(*) FROM agload_security."SecEdge";
592+
count
593+
-------
594+
6
595+
(1 row)
596+
597+
-- cleanup
598+
DELETE FROM agload_security."Person1";
599+
DELETE FROM agload_security."SecEdge";
600+
--
601+
-- Test 4: Constraint checking (CHECK constraint)
602+
--
603+
-- Add constraint on vertex properties - fail if bool property is false
604+
ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true
605+
CHECK ((properties->>'"bool"')::boolean = true);
606+
-- This should fail - constraint violation
607+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
608+
ERROR: new row for relation "Person1" violates check constraint "check_bool_true"
609+
DETAIL: Failing row contains (844424930131970, {"id": "2", "bool": "false", "__id__": 2, "string": "John", "num...).
610+
-- Add constraint on edge properties - fail if bool property is false
611+
ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true
612+
CHECK ((properties->>'"bool"')::boolean = true);
613+
-- This should fail - some edges have bool = false
614+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
615+
ERROR: new row for relation "SecEdge" violates check constraint "check_bool_true"
616+
DETAIL: Failing row contains (1407374883553294, 844424930131969, 1125899906842625, {"bool": "false", "string": "John", "numeric": "-2"}).
617+
-- cleanup
618+
ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true;
619+
ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true;
620+
--
621+
-- Cleanup
622+
--
623+
REVOKE ALL ON agload_security."Person1" FROM load_test_user;
624+
REVOKE ALL ON agload_security."SecEdge" FROM load_test_user;
625+
REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user;
626+
REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user;
627+
REVOKE ALL ON ag_catalog.ag_label FROM load_test_user;
628+
REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user;
629+
REVOKE ALL ON SCHEMA agload_security FROM load_test_user;
630+
REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user;
631+
REVOKE pg_read_server_files FROM load_test_user;
632+
DROP USER load_test_user;
633+
SELECT drop_graph('agload_security', true);
634+
NOTICE: drop cascades to 5 other objects
635+
DETAIL: drop cascades to table agload_security._ag_label_vertex
636+
drop cascades to table agload_security._ag_label_edge
637+
drop cascades to table agload_security."Person1"
638+
drop cascades to table agload_security."Person2"
639+
drop cascades to table agload_security."SecEdge"
640+
NOTICE: graph "agload_security" has been dropped
641+
drop_graph
642+
------------
643+
644+
(1 row)
645+
457646
--
458647
-- End
459648
--

regress/expected/index.out

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -264,19 +264,19 @@ $$) as (n agtype);
264264
(0 rows)
265265

266266
-- Verify that the incices are created on id columns
267-
SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index';
267+
SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1;
268268
indexname | indexdef
269269
-----------------------------+------------------------------------------------------------------------------------------------
270+
City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id)
271+
Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id)
272+
_ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id)
270273
_ag_label_edge_pkey | CREATE UNIQUE INDEX _ag_label_edge_pkey ON cypher_index._ag_label_edge USING btree (id)
271274
_ag_label_edge_start_id_idx | CREATE INDEX _ag_label_edge_start_id_idx ON cypher_index._ag_label_edge USING btree (start_id)
272-
_ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id)
273275
_ag_label_vertex_pkey | CREATE UNIQUE INDEX _ag_label_vertex_pkey ON cypher_index._ag_label_vertex USING btree (id)
274-
idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id)
275276
cypher_index_idx_props_uq | CREATE UNIQUE INDEX cypher_index_idx_props_uq ON cypher_index.idx USING btree (properties)
276-
Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id)
277-
has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id)
278277
has_city_end_id_idx | CREATE INDEX has_city_end_id_idx ON cypher_index.has_city USING btree (end_id)
279-
City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id)
278+
has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id)
279+
idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id)
280280
(10 rows)
281281

282282
SET enable_mergejoin = ON;

regress/sql/age_load.sql

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,131 @@ SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', t
194194
--
195195
SELECT drop_graph('agload_conversion', true);
196196

197+
--
198+
-- Test security and permissions
199+
--
200+
201+
SELECT create_graph('agload_security');
202+
SELECT create_vlabel('agload_security', 'Person1');
203+
SELECT create_vlabel('agload_security', 'Person2');
204+
SELECT create_elabel('agload_security', 'SecEdge');
205+
206+
--
207+
-- Test 1: File read permission (pg_read_server_files role)
208+
--
209+
-- Create a user without pg_read_server_files role
210+
CREATE USER load_test_user;
211+
GRANT USAGE ON SCHEMA ag_catalog TO load_test_user;
212+
213+
-- This should fail because load_test_user doesn't have pg_read_server_files
214+
SET ROLE load_test_user;
215+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
216+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
217+
RESET ROLE;
218+
219+
-- Grant pg_read_server_files and try again - should fail on table permission now
220+
GRANT pg_read_server_files TO load_test_user;
221+
222+
--
223+
-- Test 2: Table INSERT permission (ACL_INSERT)
224+
--
225+
-- User has file read permission but no INSERT on the label table
226+
SET ROLE load_test_user;
227+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
228+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
229+
RESET ROLE;
230+
231+
-- Grant INSERT permission and try again - should succeed
232+
GRANT USAGE ON SCHEMA agload_security TO load_test_user;
233+
GRANT INSERT ON agload_security."Person1" TO load_test_user;
234+
GRANT INSERT ON agload_security."SecEdge" TO load_test_user;
235+
GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user;
236+
GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user;
237+
GRANT SELECT ON ag_catalog.ag_label TO load_test_user;
238+
GRANT SELECT ON ag_catalog.ag_graph TO load_test_user;
239+
240+
SET ROLE load_test_user;
241+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
242+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
243+
RESET ROLE;
244+
245+
-- Verify data was loaded
246+
SELECT COUNT(*) FROM agload_security."Person1";
247+
SELECT COUNT(*) FROM agload_security."SecEdge";
248+
249+
-- cleanup
250+
DELETE FROM agload_security."Person1";
251+
DELETE FROM agload_security."SecEdge";
252+
253+
--
254+
-- Test 3: Row-Level Security (RLS)
255+
--
256+
257+
-- Enable RLS on the label tables
258+
ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY;
259+
ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY;
260+
261+
-- Switch to load_test_user
262+
SET ROLE load_test_user;
263+
264+
-- Loading should fail when RLS is enabled
265+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
266+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
267+
268+
RESET ROLE;
269+
270+
-- Disable RLS and try again - should succeed
271+
ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY;
272+
ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY;
273+
274+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
275+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
276+
277+
-- Verify data was loaded
278+
SELECT COUNT(*) FROM agload_security."Person1";
279+
SELECT COUNT(*) FROM agload_security."SecEdge";
280+
281+
-- cleanup
282+
DELETE FROM agload_security."Person1";
283+
DELETE FROM agload_security."SecEdge";
284+
285+
--
286+
-- Test 4: Constraint checking (CHECK constraint)
287+
--
288+
289+
-- Add constraint on vertex properties - fail if bool property is false
290+
ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true
291+
CHECK ((properties->>'"bool"')::boolean = true);
292+
293+
-- This should fail - constraint violation
294+
SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true);
295+
296+
-- Add constraint on edge properties - fail if bool property is false
297+
ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true
298+
CHECK ((properties->>'"bool"')::boolean = true);
299+
300+
-- This should fail - some edges have bool = false
301+
SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv');
302+
303+
-- cleanup
304+
ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true;
305+
ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true;
306+
307+
--
308+
-- Cleanup
309+
--
310+
REVOKE ALL ON agload_security."Person1" FROM load_test_user;
311+
REVOKE ALL ON agload_security."SecEdge" FROM load_test_user;
312+
REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user;
313+
REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user;
314+
REVOKE ALL ON ag_catalog.ag_label FROM load_test_user;
315+
REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user;
316+
REVOKE ALL ON SCHEMA agload_security FROM load_test_user;
317+
REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user;
318+
REVOKE pg_read_server_files FROM load_test_user;
319+
DROP USER load_test_user;
320+
SELECT drop_graph('agload_security', true);
321+
197322
--
198323
-- End
199324
--

regress/sql/index.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ SELECT * FROM cypher('cypher_index', $$
165165
$$) as (n agtype);
166166

167167
-- Verify that the incices are created on id columns
168-
SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index';
168+
SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1;
169169

170170
SET enable_mergejoin = ON;
171171
SET enable_hashjoin = OFF;

0 commit comments

Comments
 (0)