Skip to content

Commit feacc0b

Browse files
committed
Merge branch 'feature/CG-700' into 'main'
CG-700: fix(bigQuery): use API in dataset/table fetch Closes CG-700 See merge request auto-cloud/cloudgraph/provider/cloudgraph-provider-gcp!43
2 parents 67490f2 + bd8426d commit feacc0b

11 files changed

Lines changed: 266 additions & 228 deletions

File tree

package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,17 @@
3434
"dependencies": {
3535
"@cloudgraph/sdk": "^0.11.0",
3636
"@google-cloud/asset": "^3.22.0",
37+
"@google-cloud/bigquery": "^5.10.0",
38+
"@google-cloud/bigquery-connection": "^1.5.1",
39+
"@google-cloud/bigquery-data-transfer": "^2.3.0",
40+
"@google-cloud/bigquery-reservation": "^1.4.0",
3741
"@google-cloud/compute": "^3.0.0",
3842
"@google-cloud/dataproc": "^3.2.0",
3943
"@google-cloud/dns": "^2.2.3",
4044
"@google-cloud/functions": "^1.2.0",
4145
"@google-cloud/kms": "^2.10.0",
4246
"@google-cloud/logging": "^9.6.4",
4347
"@google-cloud/monitoring": "^2.3.5",
44-
"@google-cloud/bigquery": "^5.9.3",
45-
"@google-cloud/bigquery-connection": "^1.5.1",
46-
"@google-cloud/bigquery-data-transfer": "^2.3.0",
47-
"@google-cloud/bigquery-reservation": "^1.4.0",
4848
"@google-cloud/resource-manager": "^3.0.0",
4949
"@google-cloud/secret-manager": "^3.10.1",
5050
"@google-cloud/storage": "^5.16.1",

src/services/bigQuery/data.ts

Lines changed: 70 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,86 @@
1-
import { BigQuery } from '@google-cloud/bigquery'
1+
import { DNS } from '@google-cloud/dns'
2+
import bigquery from '@google-cloud/bigquery/build/src/types'
23
import CloudGraph from '@cloudgraph/sdk'
34
import groupBy from 'lodash/groupBy'
5+
import isEmpty from 'lodash/isEmpty'
46
import gcpLoggerText from '../../properties/logger'
5-
import { GcpServiceInput } from '../../types'
7+
import { GcpCredentials, GcpServiceInput } from '../../types'
68
import { generateGcpErrorLog, initTestEndpoint } from '../../utils'
7-
import { RawGcpBigQueryDataset } from './types'
8-
import { MULTI_REGIONS } from '../../config/constants'
9+
import { listData } from '../../utils/fetchUtils'
910

1011
const lt = { ...gcpLoggerText }
1112
const { logger } = CloudGraph
1213
const serviceName = 'BigQuery Dataset'
1314
const apiEndpoint = initTestEndpoint(serviceName)
1415

15-
export default async ({
16-
regions,
17-
config,
18-
}: GcpServiceInput): Promise<{
19-
[region: string]: RawGcpBigQueryDataset[]
20-
}> => {
21-
const bigQueryClient = new BigQuery({ ...config, apiEndpoint })
22-
const datasetsResult: RawGcpBigQueryDataset[] = []
23-
const { projectId } = config
24-
const allRegions = regions.split(',').concat(MULTI_REGIONS)
25-
try {
26-
const dataSetIter = bigQueryClient.getDatasetsStream()
27-
for await (const dataSetResponse of dataSetIter) {
28-
if (allRegions.includes(dataSetResponse.location)) {
29-
const dsMetaData = dataSetResponse.metadata
30-
const result = {
31-
...dsMetaData,
32-
region: dataSetResponse.location,
33-
Labels: dataSetResponse.labels,
34-
tables: [],
35-
projectId,
36-
}
37-
try {
38-
const tableIter = dataSetResponse.getTablesStream()
39-
for await (const tableResponse of tableIter) {
40-
result.tables.push(tableResponse.metadata)
16+
export interface RawGcpBigQueryDataset extends bigquery.IDataset {
17+
projectId: string
18+
region: string
19+
tables: RawGcpBigQueryTable[]
20+
}
21+
22+
export interface RawGcpBigQueryTable extends bigquery.ITable {
23+
projectId: string
24+
region: string
25+
}
26+
27+
export const listBigQueryDatasets = async (
28+
config: GcpCredentials,
29+
datasetsResult: RawGcpBigQueryDataset[]
30+
): Promise<void> =>
31+
new Promise(async resolve => {
32+
const { projectId } = config
33+
34+
try {
35+
const service = new DNS({ ...config, apiEndpoint })
36+
const dataSetlist = await listData({
37+
service,
38+
apiUri: `https://bigquery.googleapis.com/bigquery/v2/projects/${projectId}/datasets`,
39+
dataFieldName: 'datasets',
40+
})
41+
42+
for (const { datasetReference } of dataSetlist) {
43+
const dataSetResponse = await listData({
44+
service,
45+
apiUri: `https://bigquery.googleapis.com/bigquery/v2/projects/${projectId}/datasets/${datasetReference?.datasetId}`,
46+
})
47+
48+
if (!isEmpty(dataSetResponse)) {
49+
const result = {
50+
...dataSetResponse[0],
51+
region: dataSetResponse[0].location,
52+
tables: [],
53+
projectId,
54+
}
55+
56+
const tableResponse = await listData({
57+
service,
58+
apiUri: `https://bigquery.googleapis.com/bigquery/v2/projects/${projectId}/datasets/${datasetReference?.datasetId}/tables`,
59+
dataFieldName: 'tables',
60+
})
61+
62+
for (const table of tableResponse) {
63+
result.tables.push(table)
4164
}
4265
datasetsResult.push(result)
43-
} catch (error) {
44-
generateGcpErrorLog(serviceName, 'bigQuery:getTablesStream', error)
4566
}
4667
}
68+
} catch (error) {
69+
generateGcpErrorLog(serviceName, 'bigquery:datasets', error)
4770
}
48-
} catch (error) {
49-
generateGcpErrorLog(serviceName, 'bigQuery:getDatasetsStream', error)
50-
}
71+
resolve()
72+
})
5173

52-
logger.debug(lt.foundResources(serviceName, datasetsResult.length))
53-
return groupBy(datasetsResult, 'region')
54-
}
74+
export default async ({
75+
config,
76+
}: GcpServiceInput): Promise<{
77+
[region: string]: RawGcpBigQueryDataset[]
78+
}> =>
79+
new Promise(async resolve => {
80+
const datasetsResult: RawGcpBigQueryDataset[] = []
81+
82+
await listBigQueryDatasets(config, datasetsResult)
83+
84+
logger.debug(lt.foundResources(serviceName, datasetsResult.length))
85+
resolve(groupBy(datasetsResult, 'region'))
86+
})

src/services/bigQuery/format.ts

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ import {
77
GcpBigQueryTableViewUserDefinedFunctionResource,
88
} from '../../types/generated'
99
import { formatLabelsFromMap } from '../../utils/format'
10-
import { RawGcpBigQueryDataset, RawGcpBigQueryTable } from './types'
10+
import { toISOString, millisToSeconds } from '../../utils/dateutils'
11+
import { RawGcpBigQueryDataset, RawGcpBigQueryTable } from './data'
1112

1213
const formatTableField = (field): GcpBigQueryTableSchemaField => {
1314
const {
@@ -134,9 +135,9 @@ const formatTable = (table: RawGcpBigQueryTable): GcpBigQueryTable => {
134135
numBytes,
135136
numLongTermBytes,
136137
numRows,
137-
creationTime,
138-
expirationTime,
139-
lastModifiedTime,
138+
creationTime: toISOString(millisToSeconds(creationTime)),
139+
expirationTime: toISOString(millisToSeconds(expirationTime)),
140+
lastModifiedTime: toISOString(millisToSeconds(lastModifiedTime)),
140141
viewQuery: view?.query || '',
141142
viewUserDefinedFunctionResources: view?.userDefinedFunctionResources?.map(
142143
resource => formatTableViewUserDefinedFunctionResource(resource)
@@ -173,7 +174,6 @@ const formatTable = (table: RawGcpBigQueryTable): GcpBigQueryTable => {
173174
externalDataConfiguration?.hivePartitioningOptions?.sourceUriPrefix || '',
174175
externalDataConfigurationHivePartitioningOptionsRequirePartitionFilter:
175176
externalDataConfiguration?.hivePartitioningOptions?.requirePartitionFilter || false,
176-
externalDataConfigurationHivePartitioningOptionsFields: externalDataConfiguration?.hivePartitioningOptions?.fields || [],
177177
externalDataConfigurationConnectionId: externalDataConfiguration?.connectionId || '',
178178
externalDataConfigurationDecimalTargetTypes: externalDataConfiguration?.decimalTargetTypes || [],
179179
externalDataConfigurationAvroOptionsUseAvroLogicalTypes:externalDataConfiguration?.avroOptions?.useAvroLogicalTypes || false,
@@ -201,18 +201,47 @@ export default ({
201201
}): GcpBigQueryDataset => {
202202
const {
203203
id,
204+
projectId,
204205
kind,
206+
labels = {},
207+
etag,
208+
selfLink,
209+
datasetReference,
210+
friendlyName,
211+
description,
212+
defaultTableExpirationMs,
213+
defaultPartitionExpirationMs,
214+
access,
215+
creationTime,
216+
lastModifiedTime,
217+
location,
218+
defaultEncryptionConfiguration,
219+
satisfiesPZS,
205220
tables = [],
206-
labels = {}
207221
} = service
208222

209223
return {
210224
id: id || cuid(),
211-
projectId: account,
225+
projectId,
212226
region,
213227
kind,
214228
labels: formatLabelsFromMap(labels),
215-
totalTables: tables.length || 0,
229+
etag,
230+
selfLink,
231+
datasetReference,
232+
friendlyName,
233+
description,
234+
defaultTableExpirationMs,
235+
defaultPartitionExpirationMs,
236+
access: access?.map(acc => ({
237+
id: cuid(),
238+
...acc,
239+
})),
240+
creationTime: toISOString(millisToSeconds(creationTime)),
241+
lastModifiedTime: toISOString(millisToSeconds(lastModifiedTime)),
242+
location,
243+
defaultEncryptionConfiguration,
244+
satisfiesPzs: satisfiesPZS,
216245
tables: tables.map(table => formatTable(table)) || []
217246
}
218247
}

src/services/bigQuery/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import getData from './data'
55
import mutation from './mutation'
66

77
export default class GcpBigQuery extends BaseService implements Service {
8-
format = format.bind(this)
8+
format = format.bind(this)
99

1010
getData = getData.bind(this)
1111

src/services/bigQuery/schema.graphql

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,83 @@ type gcpBigQueryDataset implements gcpBaseResource
126126
@generate(
127127
query: { get: true, query: true, aggregate: true }
128128
mutation: { add: true, delete: false }
129-
)
130-
@key(fields: "id") {
129+
) @key(fields: "id") {
130+
etag: String @search(by: [hash, regexp])
131+
selfLink: String @search(by: [hash, regexp])
132+
datasetReference: gcpBigQueryDatasetReference
131133
friendlyName: String @search(by: [hash, regexp])
132-
totalTables: Int @search
134+
description: String @search(by: [hash, regexp])
135+
defaultTableExpirationMs: String @search(by: [hash, regexp])
136+
defaultPartitionExpirationMs: String @search(by: [hash, regexp])
137+
access: [gcpBigQueryDatasetAccess]
138+
creationTime: String @search(by: [hash, regexp])
139+
lastModifiedTime: String @search(by: [hash, regexp])
140+
location: String @search(by: [hash, regexp])
141+
defaultEncryptionConfiguration: gcpBigQueryEncryptionConfiguration
142+
satisfiesPzs: Boolean @search
133143
tables: [gcpBigQueryTable]
134144
project: [gcpProject] @hasInverse(field: bigQueryDataset)
135145
}
146+
147+
type gcpBigQueryDatasetAccess
148+
@generate(
149+
query: { get: true, query: true, aggregate: true }
150+
mutation: { add: true, delete: false }
151+
) {
152+
id: String! @id
153+
role: String @search(by: [hash, regexp])
154+
userByEmail: String @search(by: [hash, regexp])
155+
groupByEmail: String @search(by: [hash, regexp])
156+
domain: String @search(by: [hash, regexp])
157+
specialGroup: String @search(by: [hash, regexp])
158+
iamMember: String @search(by: [hash, regexp])
159+
view: gcpBigQueryTableReference
160+
routine: gcpBigQueryRoutineReference
161+
dataset: gcpBigQueryFeedbackDatasetAccessEntry
162+
}
163+
164+
type gcpBigQueryTableReference
165+
@generate(
166+
query: { get: true, query: true, aggregate: true }
167+
mutation: { add: true, delete: false }
168+
) {
169+
projectId: String @search(by: [hash, regexp])
170+
datasetId: String @search(by: [hash, regexp])
171+
tableId: String @search(by: [hash, regexp])
172+
}
173+
174+
type gcpBigQueryRoutineReference
175+
@generate(
176+
query: { get: true, query: true, aggregate: true }
177+
mutation: { add: true, delete: false }
178+
) {
179+
projectId: String @search(by: [hash, regexp])
180+
datasetId: String @search(by: [hash, regexp])
181+
routineId: String @search(by: [hash, regexp])
182+
}
183+
184+
type gcpBigQueryFeedbackDatasetAccessEntry
185+
@generate(
186+
query: { get: true, query: true, aggregate: true }
187+
mutation: { add: true, delete: false }
188+
) {
189+
dataset: gcpBigQueryDatasetReference
190+
targetTypes: [String] @search(by: [hash])
191+
}
192+
193+
type gcpBigQueryDatasetReference
194+
@generate(
195+
query: { get: true, query: true, aggregate: true }
196+
mutation: { add: true, delete: false }
197+
) {
198+
datasetId: String @search(by: [hash, regexp])
199+
projectId: String @search(by: [hash, regexp])
200+
}
201+
202+
type gcpBigQueryEncryptionConfiguration
203+
@generate(
204+
query: { get: true, query: true, aggregate: true }
205+
mutation: { add: true, delete: false }
206+
) {
207+
kmsKeyName: String @search(by: [hash, regexp])
208+
}

0 commit comments

Comments
 (0)