Skip to content

Commit 5457752

Browse files
max-ostapenkoGCP Dataform
authored andcommitted
bytesTotal tested
1 parent 390269b commit 5457752

3 files changed

Lines changed: 63 additions & 40 deletions

File tree

definitions/output/reports/reports_dynamic.js

Lines changed: 57 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,37 +2,47 @@ const configs = new reports.HTTPArchiveReports()
22
const metrics = configs.listMetrics()
33
const lenses = configs.lenses
44

5-
const bucket = 'httparchive'
6-
const storagePath = '/reports/dev/'
5+
const bucket = constants.bucket
6+
const storagePath = constants.storagePath
7+
const dataset = 'reports'
78

89
// Adjust start and end dates to update reports retrospectively
9-
const startDate = '2024-12-01' // constants.currentMonth;
10-
const endDate = '2024-12-01' // constants.currentMonth;
10+
const startDate = constants.currentMonth; // '2025-07-01'
11+
const endDate = constants.currentMonth; // '2025-07-01'
1112

12-
function generateExportPath (ctx, params) {
13+
function generateExportPath (params) {
14+
objectName = storagePath
1315
if (params.sql.type === 'histogram') {
14-
return `${storagePath}${params.date.replaceAll('-', '_')}/${params.metric.id}.json`
16+
objectName = objectName + params.date.replaceAll('-', '_') + '/' + params.metric.id
1517
} else if (params.sql.type === 'timeseries') {
16-
return `${storagePath}${params.metric.id}.json`
18+
objectName = objectName + params.metric.id
1719
} else {
1820
throw new Error('Unknown SQL type')
1921
}
22+
return objectName + '_test.json' // TODO: remove test suffix from the path
2023
}
2124

22-
function generateExportQuery (ctx, params) {
25+
function generateExportQuery (params) {
2326
let query = ''
2427
if (params.sql.type === 'histogram') {
2528
query = `
26-
SELECT * EXCEPT(date)
27-
FROM \`reports.${params.sql.type}\`
29+
SELECT
30+
* EXCEPT(date, metric, lens)
31+
FROM \`${dataset}.${params.tableName}\`
2832
WHERE date = '${params.date}'
33+
AND metric = '${params.metric.id}'
34+
AND lens = '${params.lens.name}'
35+
ORDER BY bin ASC
2936
`
3037
} else if (params.sql.type === 'timeseries') {
3138
query = `
3239
SELECT
3340
FORMAT_DATE('%Y_%m_%d', date) AS date,
34-
* EXCEPT(date)
35-
FROM \`reports.${params.sql.type}\`
41+
* EXCEPT(date, metric, lens)
42+
FROM \`${dataset}.${params.tableName}\`
43+
WHERE metric = '${params.metric.id}'
44+
AND lens = '${params.lens.name}'
45+
ORDER BY date DESC
3646
`
3747
} else {
3848
throw new Error('Unknown SQL type')
@@ -60,44 +70,54 @@ for (
6070
metric,
6171
sql,
6272
lens: { name: key, sql: value },
63-
devRankFilter: constants.devRankFilter
73+
devRankFilter: constants.devRankFilter,
74+
tableName: metric.id + '_' + sql.type
6475
})
6576
}
6677
})
6778
})
6879
}
6980

7081
iterations.forEach((params, i) => {
71-
operate(
72-
params.metric.id + '_' + params.sql.type + '_' + params.lens.name + '_' + params.date)
82+
operate(params.tableName + '_' + params.date + '_' + params.lens.name)
7383
.tags(['crawl_complete', 'reports'])
7484
.queries(ctx => `
75-
CREATE TABLE IF NOT EXISTS reports.${params.sql.type} (
76-
date DATE,
77-
lens STRING,
78-
metric STRING,
79-
client STRING,
80-
data JSON
81-
)
85+
DECLARE job_config JSON;
86+
87+
/* First report run
88+
CREATE TABLE IF NOT EXISTS ${dataset}.${params.tableName}
8289
PARTITION BY date
83-
CLUSTER BY metric, lens, client;
90+
CLUSTER BY metric, lens, client
91+
AS
92+
*/
8493
85-
DELETE FROM reports.${params.sql.type}
94+
--/* Subsequent report run
95+
DELETE FROM ${dataset}.${params.tableName}
8696
WHERE date = '${params.date}'
87-
AND metric = '${params.metric.id}';
88-
89-
INSERT INTO reports.${params.sql.type} ${params.sql.query(ctx, params)};
97+
AND metric = '${params.metric.id}'
98+
AND lens = '${params.lens.name}';
99+
INSERT INTO ${dataset}.${params.tableName}
100+
--*/
90101
91102
SELECT
92-
reports.run_export_job(
93-
JSON '''{
94-
"destination": "cloud_storage",
95-
"config": {
96-
"bucket": "${bucket}",
97-
"name": "${generateExportPath(ctx, params)}"
98-
},
99-
"query": "${generateExportQuery(ctx, params)}"
100-
}'''
103+
'${params.metric.id}' AS metric,
104+
'${params.lens.name}' AS lens,
105+
*
106+
FROM (
107+
${params.sql.query(ctx, params)}
101108
);
102-
`)
109+
110+
SET job_config = TO_JSON(
111+
STRUCT(
112+
"cloud_storage" AS destination,
113+
STRUCT(
114+
"httparchive" AS bucket,
115+
"${generateExportPath(params)}" AS name
116+
) AS config,
117+
r"${generateExportQuery(params)}" AS query
118+
)
119+
);
120+
121+
SELECT reports.run_export_job(job_config);
122+
`)
103123
})

includes/constants.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class DataformTemplateBuilder {
4949
if (typeof value === 'string') return `'${value}'`
5050
if (typeof value === 'number') return value.toString()
5151
if (typeof value === 'boolean') return value.toString()
52+
if (typeof value === 'function') return value.toString()
5253

5354
// For objects or arrays, use JSON.stringify
5455
return JSON.stringify(value)

includes/reports.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ WITH pages AS (
1212
date,
1313
client,
1414
CAST(FLOOR(FLOAT64(summary.bytesTotal) / 1024 / 100) * 100 AS INT64) AS bin
15-
FROM crawl.pages
15+
FROM ${ctx.ref('crawl', 'pages')}
1616
WHERE
1717
date = '${params.date}'
18+
${params.devRankFilter}
1819
${params.lens.sql}
1920
AND is_root_page
2021
AND FLOAT64(summary.bytesTotal) > 0
@@ -52,9 +53,10 @@ WITH pages AS (
5253
date,
5354
client,
5455
FLOAT64(summary.bytesTotal) AS bytesTotal
55-
FROM crawl.pages
56+
FROM ${ctx.ref('crawl', 'pages')}
5657
WHERE
5758
date = '${params.date}'
59+
${params.devRankFilter}
5860
${params.lens.sql}
5961
AND is_root_page
6062
AND INT64(summary.bytesTotal) > 0
@@ -63,7 +65,7 @@ WITH pages AS (
6365
SELECT
6466
date,
6567
client,
66-
UNIX_SECONDS(TIMESTAMP(date)) AS timestamp,
68+
UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp,
6769
ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(101)] / 1024, 2) AS p10,
6870
ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(251)] / 1024, 2) AS p25,
6971
ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(501)] / 1024, 2) AS p50,

0 commit comments

Comments
 (0)