Skip to content

Commit 727c3c6

Browse files
max-ostapenkoGCP Dataform
authored andcommitted
common lenses
1 parent c26f79e commit 727c3c6

2 files changed

Lines changed: 38 additions & 37 deletions

File tree

definitions/output/reports/reports_dynamic.js

Lines changed: 17 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
const configs = new reports.HTTPArchiveReports()
22
const metrics = configs.listMetrics()
3+
const lenses = configs.lenses;
34

45
const bucket = 'httparchive'
56
const storagePath = '/reports/dev/'
@@ -8,31 +9,30 @@ const storagePath = '/reports/dev/'
89
const startDate = '2024-12-01' // constants.currentMonth;
910
const endDate = '2024-12-01' // constants.currentMonth;
1011

11-
function generateExportPath (metric, sql, params) {
12-
if (sql.type === 'histogram') {
13-
return `${storagePath}${params.date.replaceAll('-', '_')}/${metric.id}.json`
14-
} else if (sql.type === 'timeseries') {
15-
return `${storagePath}${metric.id}.json`
12+
function generateExportPath (ctx, params) {
13+
if (params.sql.type === 'histogram') {
14+
return `${storagePath}${params.date.replaceAll('-', '_')}/${params.metric.id}.json`
15+
} else if (params.sql.type === 'timeseries') {
16+
return `${storagePath}${params.metric.id}.json`
1617
} else {
1718
throw new Error('Unknown SQL type')
1819
}
1920
}
2021

21-
function generateExportQuery (metric, sql, params, ctx) {
22+
function generateExportQuery (ctx, params) {
2223
let query = ''
23-
if (sql.type === 'histogram') {
24+
if (params.sql.type === 'histogram') {
2425
query = `
25-
SELECT
26-
* EXCEPT(date)
27-
FROM ${ctx.self()}
26+
SELECT * EXCEPT(date)
27+
FROM \`reports.${params.sql.type}\`
2828
WHERE date = '${params.date}'
2929
`
30-
} else if (sql.type === 'timeseries') {
30+
} else if (params.sql.type === 'timeseries') {
3131
query = `
3232
SELECT
3333
FORMAT_DATE('%Y_%m_%d', date) AS date,
3434
* EXCEPT(date)
35-
FROM ${ctx.self()}
35+
FROM \`reports.${params.sql.type}\`
3636
`
3737
} else {
3838
throw new Error('Unknown SQL type')
@@ -42,17 +42,6 @@ FROM ${ctx.self()}
4242
return queryOutput
4343
}
4444

45-
const lenses = {
46-
all: '',
47-
top1k: 'AND rank <= 1000',
48-
top10k: 'AND rank <= 10000',
49-
top100k: 'AND rank <= 100000',
50-
top1m: 'AND rank <= 1000000',
51-
drupal: 'AND \'Drupal\' IN UNNEST(technologies.technology)',
52-
magento: 'AND \'Magento\' IN UNNEST(technologies.technology)',
53-
wordpress: 'AND \'WordPress\' IN UNNEST(technologies.technology)'
54-
}
55-
5645
const iterations = []
5746
// dates
5847
for (
@@ -91,25 +80,23 @@ CREATE TABLE IF NOT EXISTS reports.${params.sql.type} (
9180
data JSON
9281
)
9382
PARTITION BY date
94-
CLUSTER BY metric, lens;
83+
CLUSTER BY metric, lens, client;
9584
9685
DELETE FROM reports.${params.sql.type}
9786
WHERE date = '${params.date}'
98-
AND metric = '${params.metric.id}'
99-
AND lens = '${params.lens.sql}';
87+
AND metric = '${params.metric.id}';
10088
101-
INSERT INTO reports.${params.sql.type}
102-
${params.sql.query(ctx, params)};
89+
INSERT INTO reports.${params.sql.type} ${params.sql.query(ctx, params)};
10390
10491
SELECT
10592
reports.run_export_job(
10693
JSON '''{
10794
"destination": "cloud_storage",
10895
"config": {
10996
"bucket": "${bucket}",
110-
"name": "${generateExportPath(params.metric, params.sql, params)}"
97+
"name": "${generateExportPath(ctx, params)}"
11198
},
112-
"query": "${generateExportQuery(params.metric, params.sql, params, ctx)}"
99+
"query": "${generateExportQuery(ctx, params)}"
113100
}'''
114101
);
115102
`)

includes/reports.js

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ WITH pages AS (
1414
CAST(FLOOR(INT64(summary.bytesTotal) / 1024 / 100) * 100 AS INT64) AS bin
1515
FROM crawl.pages
1616
WHERE
17-
date = '${params.date}' ${params.lens.sql} AND
18-
is_root_page AND
19-
INT64(summary.bytesTotal) > 0
17+
date = '${params.date}'
18+
${params.lens.sql}
19+
AND is_root_page
20+
AND INT64(summary.bytesTotal) > 0
2021
)
2122
2223
SELECT
@@ -54,9 +55,10 @@ WITH pages AS (
5455
INT64(summary.bytesTotal) AS bytesTotal
5556
FROM crawl.pages
5657
WHERE
57-
date = '${params.date}' $ ${params.lens.sql} AND
58-
is_root_page AND
59-
INT64(summary.bytesTotal) > 0
58+
date = '${params.date}'
59+
${params.lens.sql}
60+
AND is_root_page
61+
AND INT64(summary.bytesTotal) > 0
6062
)
6163
6264
SELECT
@@ -80,9 +82,21 @@ GROUP BY
8082
}
8183
}
8284

85+
const lenses = {
86+
all: '',
87+
top1k: 'AND rank <= 1000',
88+
top10k: 'AND rank <= 10000',
89+
top100k: 'AND rank <= 100000',
90+
top1m: 'AND rank <= 1000000',
91+
drupal: 'AND \'Drupal\' IN UNNEST(technologies.technology)',
92+
magento: 'AND \'Magento\' IN UNNEST(technologies.technology)',
93+
wordpress: 'AND \'WordPress\' IN UNNEST(technologies.technology)'
94+
}
95+
8396
class HTTPArchiveReports {
8497
constructor () {
85-
this.config = config
98+
this.config = config,
99+
this.lenses = lenses;
86100
}
87101

88102
listReports () {

0 commit comments

Comments
 (0)