@@ -3,13 +3,67 @@ import { BigQueryExport } from './bigquery.js'
33
44export class FirestoreBatch {
55 constructor ( ) {
6- this . firestore = new Firestore ( )
6+ this . firestore = new Firestore ( {
7+ gaxOptions : {
8+ grpc : {
9+ max_receive_message_length : 500 * 1024 * 1024 , // 500MB
10+ max_send_message_length : 500 * 1024 * 1024 , // 500MB
11+ 'grpc.max_connection_idle_ms' : 5 * 60 * 1000 , // 5 minutes
12+ 'grpc.keepalive_time_ms' : 30 * 1000 , // 30 seconds
13+ 'grpc.keepalive_timeout_ms' : 60 * 1000 , // 1 minute
14+ 'grpc.keepalive_permit_without_calls' : true
15+ }
16+ }
17+ } )
718 this . bigquery = new BigQueryExport ( )
8- this . batchSize = 500
9- this . maxConcurrentBatches = 200
19+
20+ // Configuration constants
21+ this . config = {
22+ batchSize : {
23+ delete : 500 ,
24+ write : 400
25+ } ,
26+ maxConcurrentBatches : 200 ,
27+ retryCount : 5 ,
28+ timeout : 10 * 60 * 1000 // 10 minutes
29+ }
30+
31+ this . reset ( )
1032 }
1133
12- queueBatch ( operation ) {
34+ reset ( ) {
35+ this . currentBatch = [ ]
36+ this . batchPromises = [ ]
37+ }
38+
39+ getCurrentBatchSize ( operation ) {
40+ return this . config . batchSize [ operation === 'delete' ? 'delete' : 'write' ]
41+ }
42+
43+ async commitWithRetry ( batch , index ) {
44+ let lastError
45+
46+ for ( let attempt = 1 ; attempt <= this . config . retryCount ; attempt ++ ) {
47+ try {
48+ await batch . commit ( )
49+ return
50+ } catch ( error ) {
51+ lastError = error
52+ console . warn ( `Batch ${ index } attempt ${ attempt } failed:` , error . message )
53+
54+ if ( attempt < this . config . retryCount ) {
55+ const delayMs = Math . pow ( 2 , attempt ) * 500
56+ console . log ( `Retrying batch ${ index } in ${ delayMs } ms...` )
57+ await new Promise ( resolve => setTimeout ( resolve , delayMs ) )
58+ }
59+ }
60+ }
61+
62+ console . error ( `Batch ${ index } failed after ${ this . config . retryCount } attempts:` , lastError )
63+ throw lastError
64+ }
65+
66+ createBatch ( operation ) {
1367 const batch = this . firestore . batch ( )
1468
1569 this . currentBatch . forEach ( ( doc ) => {
@@ -19,119 +73,126 @@ export class FirestoreBatch {
1973 const docRef = this . firestore . collection ( this . collectionName ) . doc ( )
2074 batch . set ( docRef , doc )
2175 } else {
22- throw new Error ( ' Invalid operation' )
76+ throw new Error ( ` Invalid operation: ${ operation } ` )
2377 }
2478 } )
79+
80+ return batch
81+ }
82+
83+ queueBatch ( operation ) {
84+ const batch = this . createBatch ( operation )
2585 this . batchPromises . push ( batch )
2686 this . currentBatch = [ ]
2787 }
2888
2989 async commitBatches ( ) {
90+ if ( this . batchPromises . length === 0 ) return
91+
3092 console . log ( `Committing ${ this . batchPromises . length } batches to ${ this . collectionName } ` )
93+
3194 await Promise . all (
32- this . batchPromises . map ( async ( batchPromise ) => await batchPromise . commit ( )
33- . catch ( ( error ) => {
34- console . error ( 'Error committing batch:' , error )
35- throw error
36- } )
95+ this . batchPromises . map ( ( batch , index ) =>
96+ this . commitWithRetry ( batch , index )
3797 )
3898 )
99+
39100 this . batchPromises = [ ]
40101 }
41102
42- async finalFlush ( operation ) {
43- if ( this . currentBatch . length > 0 ) {
103+ async processInBatches ( operation , shouldFlush = false ) {
104+ const batchSize = this . getCurrentBatchSize ( operation )
105+
106+ if ( this . currentBatch . length >= batchSize || shouldFlush ) {
44107 this . queueBatch ( operation )
45108 }
46109
47- if ( this . batchPromises . length > 0 ) {
110+ if ( this . batchPromises . length >= this . config . maxConcurrentBatches || shouldFlush ) {
48111 await this . commitBatches ( )
49112 }
50113 }
51114
115+ buildQuery ( collectionRef ) {
116+ const queryMap = {
117+ report : ( ) => {
118+ console . info ( `Deleting documents from ${ this . collectionName } for date ${ this . date } ` )
119+ return collectionRef . where ( 'date' , '==' , this . date )
120+ } ,
121+ dict : ( ) => {
122+ console . info ( `Deleting documents from ${ this . collectionName } ` )
123+ return collectionRef
124+ }
125+ }
126+
127+ const queryBuilder = queryMap [ this . collectionType ]
128+ if ( ! queryBuilder ) {
129+ throw new Error ( `Invalid collection type: ${ this . collectionType } ` )
130+ }
131+
132+ return queryBuilder ( )
133+ }
134+
52135 async batchDelete ( ) {
53136 console . info ( 'Starting batch deletion...' )
54137 const startTime = Date . now ( )
55- this . currentBatch = [ ]
56- this . batchPromises = [ ]
138+ this . reset ( )
57139
58140 let totalDocsDeleted = 0
59141 const collectionRef = this . firestore . collection ( this . collectionName )
60-
61- let collectionQuery
62- if ( this . collectionType === 'report' ) {
63- console . info ( 'Deleting documents from ' + this . collectionName + ' for date ' + this . date )
64- // Query to fetch monthly documents
65- collectionQuery = collectionRef . where ( 'date' , '==' , this . date )
66- } else if ( this . collectionType === 'dict' ) {
67- console . info ( 'Deleting documents from ' + this . collectionName )
68- collectionQuery = collectionRef
69- } else {
70- throw new Error ( 'Invalid collection type' )
71- }
142+ const collectionQuery = this . buildQuery ( collectionRef )
143+ const batchSize = this . getCurrentBatchSize ( 'delete' )
72144
73145 while ( true ) {
74- const snapshot = await collectionQuery . limit ( this . batchSize * this . maxConcurrentBatches ) . get ( )
75- if ( snapshot . empty ) {
76- break
77- }
146+ const snapshot = await collectionQuery . limit ( batchSize * this . config . maxConcurrentBatches ) . get ( )
147+ if ( snapshot . empty ) break
78148
79- for await ( const doc of snapshot . docs ) {
149+ for ( const doc of snapshot . docs ) {
80150 this . currentBatch . push ( doc )
81-
82- if ( this . currentBatch . length >= this . batchSize ) {
83- this . queueBatch ( 'delete' )
84- }
85- if ( this . batchPromises . length >= this . maxConcurrentBatches ) {
86- await this . commitBatches ( )
87- }
151+ await this . processInBatches ( 'delete' )
88152 totalDocsDeleted ++
89153 }
90154 }
91- await this . finalFlush ( 'delete' )
155+
156+ // Final flush
157+ await this . processInBatches ( 'delete' , true )
92158
93159 const duration = ( Date . now ( ) - startTime ) / 1000
94160 console . info ( `Deletion complete. Total docs deleted: ${ totalDocsDeleted } . Time: ${ duration } seconds` )
95161 }
96162
97- /**
98- * Streams BigQuery query results into a Firestore collection using batch commits.
99- * @param {string } query - The BigQuery SQL query.
100- */
101163 async streamFromBigQuery ( rowStream ) {
102164 console . info ( 'Starting BigQuery to Firestore transfer...' )
103165 const startTime = Date . now ( )
104166 let totalRowsProcessed = 0
105167
106- this . currentBatch = [ ]
107- this . batchPromises = [ ]
168+ this . reset ( )
108169
109170 for await ( const row of rowStream ) {
110171 this . currentBatch . push ( row )
111-
112- // Write batch when it reaches specified size
113- if ( this . currentBatch . length >= this . batchSize ) {
114- this . queueBatch ( 'set' )
115- }
116-
117- if ( this . batchPromises . length >= this . maxConcurrentBatches ) {
118- await this . commitBatches ( )
119- }
172+ await this . processInBatches ( 'set' )
120173 totalRowsProcessed ++
121174 }
122- await this . finalFlush ( 'set' )
175+
176+ // Final flush
177+ await this . processInBatches ( 'set' , true )
123178
124179 const duration = ( Date . now ( ) - startTime ) / 1000
125180 console . info ( `Transfer to ${ this . collectionName } complete. Total rows processed: ${ totalRowsProcessed } . Time: ${ duration } seconds` )
126181 }
127182
128183 async export ( query , exportConfig ) {
184+ // Configure Firestore settings
129185 this . firestore . settings ( {
130- databaseId : exportConfig . database
186+ databaseId : exportConfig . database ,
187+ timeout : this . config . timeout
188+ } )
189+
190+ // Set instance properties
191+ Object . assign ( this , {
192+ collectionName : exportConfig . collection ,
193+ collectionType : exportConfig . type ,
194+ date : exportConfig . date
131195 } )
132- this . collectionName = exportConfig . collection
133- this . collectionType = exportConfig . type
134- this . date = exportConfig . date
135196
136197 await this . batchDelete ( )
137198
0 commit comments