Skip to content

Commit 9a360df

Browse files
committed
beautified
1 parent 5457752 commit 9a360df

1 file changed

Lines changed: 198 additions & 106 deletions

File tree

Lines changed: 198 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1,123 +1,215 @@
1-
const configs = new reports.HTTPArchiveReports()
2-
const metrics = configs.listMetrics()
3-
const lenses = configs.lenses
4-
5-
const bucket = constants.bucket
6-
const storagePath = constants.storagePath
7-
const dataset = 'reports'
8-
9-
// Adjust start and end dates to update reports retrospectively
10-
const startDate = constants.currentMonth; // '2025-07-01'
11-
const endDate = constants.currentMonth; // '2025-07-01'
12-
13-
function generateExportPath (params) {
14-
objectName = storagePath
15-
if (params.sql.type === 'histogram') {
16-
objectName = objectName + params.date.replaceAll('-', '_') + '/' + params.metric.id
17-
} else if (params.sql.type === 'timeseries') {
18-
objectName = objectName + params.metric.id
1+
/**
2+
* Dynamic Reports Generator
3+
*
4+
* This file automatically generates Dataform operations for HTTP Archive reports.
5+
* It creates operations for each combination of:
6+
* - Date range (from startDate to endDate)
7+
* - Metrics (defined in includes/reports.js)
8+
* - SQL types (histogram, timeseries)
9+
* - Lenses (data filters like all, top1k, wordpress, etc.)
10+
*
11+
* Each operation:
12+
* 1. Calculates metrics from crawl data
13+
* 2. Stores results in BigQuery tables
14+
* 3. Exports data to Cloud Storage as JSON
15+
*/
16+
17+
// Initialize configurations
18+
const httpArchiveReports = new reports.HTTPArchiveReports()
19+
const availableMetrics = httpArchiveReports.listMetrics()
20+
const availableLenses = httpArchiveReports.lenses
21+
22+
// Configuration constants
23+
const EXPORT_CONFIG = {
24+
bucket: constants.bucket,
25+
storagePath: constants.storagePath,
26+
dataset: 'reports',
27+
testSuffix: '_test.json' // TODO: remove test suffix from the path
28+
}
29+
30+
// Date range for report generation
31+
// Adjust these dates to update reports retrospectively
32+
const DATE_RANGE = {
33+
startDate: constants.currentMonth, // '2025-07-01'
34+
endDate: constants.currentMonth // '2025-07-01'
35+
}
36+
37+
/**
38+
* Generates the Cloud Storage export path for a report
39+
* @param {Object} reportConfig - Report configuration object
40+
* @returns {string} - Cloud Storage object path
41+
*/
42+
function buildExportPath(reportConfig) {
43+
const { sql, date, metric } = reportConfig
44+
let objectPath = EXPORT_CONFIG.storagePath
45+
46+
if (sql.type === 'histogram') {
47+
// Histogram exports are organized by date folders
48+
const dateFolder = date.replaceAll('-', '_')
49+
objectPath += `${dateFolder}/${metric.id}`
50+
} else if (sql.type === 'timeseries') {
51+
// Timeseries exports are organized by metric
52+
objectPath += metric.id
1953
} else {
20-
throw new Error('Unknown SQL type')
54+
throw new Error(`Unknown SQL type: ${sql.type}`)
2155
}
22-
return objectName + '_test.json' // TODO: remove test suffix from the path
56+
57+
return objectPath + EXPORT_CONFIG.testSuffix
2358
}
2459

25-
function generateExportQuery (params) {
26-
let query = ''
27-
if (params.sql.type === 'histogram') {
60+
/**
61+
* Generates the BigQuery export query for a report
62+
* @param {Object} reportConfig - Report configuration object
63+
* @returns {string} - SQL query for exporting data
64+
*/
65+
function buildExportQuery(reportConfig) {
66+
const { sql, date, metric, lens, tableName } = reportConfig
67+
68+
let query
69+
if (sql.type === 'histogram') {
2870
query = `
29-
SELECT
30-
* EXCEPT(date, metric, lens)
31-
FROM \`${dataset}.${params.tableName}\`
32-
WHERE date = '${params.date}'
33-
AND metric = '${params.metric.id}'
34-
AND lens = '${params.lens.name}'
35-
ORDER BY bin ASC
36-
`
37-
} else if (params.sql.type === 'timeseries') {
71+
SELECT
72+
* EXCEPT(date, metric, lens)
73+
FROM \`${EXPORT_CONFIG.dataset}.${tableName}\`
74+
WHERE date = '${date}'
75+
AND metric = '${metric.id}'
76+
AND lens = '${lens.name}'
77+
ORDER BY bin ASC
78+
`
79+
} else if (sql.type === 'timeseries') {
3880
query = `
39-
SELECT
40-
FORMAT_DATE('%Y_%m_%d', date) AS date,
41-
* EXCEPT(date, metric, lens)
42-
FROM \`${dataset}.${params.tableName}\`
43-
WHERE metric = '${params.metric.id}'
44-
AND lens = '${params.lens.name}'
45-
ORDER BY date DESC
46-
`
81+
SELECT
82+
FORMAT_DATE('%Y_%m_%d', date) AS date,
83+
* EXCEPT(date, metric, lens)
84+
FROM \`${EXPORT_CONFIG.dataset}.${tableName}\`
85+
WHERE metric = '${metric.id}'
86+
AND lens = '${lens.name}'
87+
ORDER BY date DESC
88+
`
4789
} else {
48-
throw new Error('Unknown SQL type')
90+
throw new Error(`Unknown SQL type: ${sql.type}`)
4991
}
5092

51-
const queryOutput = query.replace(/[\r\n]+/g, ' ')
52-
return queryOutput
93+
// Convert to single line for JSON embedding
94+
return query.replace(/[\r\n]+/g, ' ').trim()
5395
}
5496

55-
const iterations = []
56-
// dates
57-
for (
58-
let date = endDate;
59-
date >= startDate;
60-
date = constants.fnPastMonth(date)
61-
) {
62-
// metrics
63-
metrics.forEach(metric => {
64-
// timeseries and histograms
65-
metric.SQL.forEach(sql => {
66-
// lenses
67-
for (const [key, value] of Object.entries(lenses)) {
68-
iterations.push({
69-
date,
70-
metric,
71-
sql,
72-
lens: { name: key, sql: value },
73-
devRankFilter: constants.devRankFilter,
74-
tableName: metric.id + '_' + sql.type
97+
/**
98+
* Creates a report configuration object
99+
* @param {string} date - Report date (YYYY-MM-DD)
100+
* @param {Object} metric - Metric configuration
101+
* @param {Object} sql - SQL configuration (type and query)
102+
* @param {string} lensName - Lens name
103+
* @param {string} lensSQL - Lens SQL filter
104+
* @returns {Object} - Complete report configuration
105+
*/
106+
function createReportConfig(date, metric, sql, lensName, lensSQL) {
107+
return {
108+
date,
109+
metric,
110+
sql,
111+
lens: { name: lensName, sql: lensSQL },
112+
devRankFilter: constants.devRankFilter,
113+
tableName: `${metric.id}_${sql.type}`
114+
}
115+
}
116+
117+
/**
118+
* Generates all report configurations for the specified date range
119+
* @returns {Array} - Array of report configuration objects
120+
*/
121+
function generateReportConfigurations() {
122+
const reportConfigs = []
123+
124+
// Generate configurations for each date in range
125+
for (let date = DATE_RANGE.endDate;
126+
date >= DATE_RANGE.startDate;
127+
date = constants.fnPastMonth(date)) {
128+
129+
// For each available metric
130+
availableMetrics.forEach(metric => {
131+
// For each SQL type (histogram, timeseries)
132+
metric.SQL.forEach(sql => {
133+
// For each available lens (all, top1k, wordpress, etc.)
134+
Object.entries(availableLenses).forEach(([lensName, lensSQL]) => {
135+
const config = createReportConfig(date, metric, sql, lensName, lensSQL)
136+
reportConfigs.push(config)
75137
})
76-
}
138+
})
77139
})
78-
})
140+
}
141+
142+
return reportConfigs
143+
}
144+
145+
/**
146+
* Creates a Dataform operation name for a report configuration
147+
* @param {Object} reportConfig - Report configuration object
148+
* @returns {string} - Operation name
149+
*/
150+
function createOperationName(reportConfig) {
151+
const { tableName, date, lens } = reportConfig
152+
return `${tableName}_${date}_${lens.name}`
153+
}
154+
155+
/**
156+
* Generates the SQL for a Dataform operation
157+
* @param {Object} ctx - Dataform context
158+
* @param {Object} reportConfig - Report configuration object
159+
* @returns {string} - Complete SQL for the operation
160+
*/
161+
function generateOperationSQL(ctx, reportConfig) {
162+
const { date, metric, lens, sql, tableName } = reportConfig
163+
164+
return `
165+
DECLARE job_config JSON;
166+
167+
/* First report run - uncomment to create table
168+
CREATE TABLE IF NOT EXISTS ${EXPORT_CONFIG.dataset}.${tableName}
169+
PARTITION BY date
170+
CLUSTER BY metric, lens, client
171+
AS
172+
*/
173+
174+
--/* Subsequent report run
175+
DELETE FROM ${EXPORT_CONFIG.dataset}.${tableName}
176+
WHERE date = '${date}'
177+
AND metric = '${metric.id}'
178+
AND lens = '${lens.name}';
179+
INSERT INTO ${EXPORT_CONFIG.dataset}.${tableName}
180+
--*/
181+
182+
SELECT
183+
'${metric.id}' AS metric,
184+
'${lens.name}' AS lens,
185+
*
186+
FROM (
187+
${sql.query(ctx, reportConfig)}
188+
);
189+
190+
SET job_config = TO_JSON(
191+
STRUCT(
192+
"cloud_storage" AS destination,
193+
STRUCT(
194+
"httparchive" AS bucket,
195+
"${buildExportPath(reportConfig)}" AS name
196+
) AS config,
197+
r"${buildExportQuery(reportConfig)}" AS query
198+
)
199+
);
200+
201+
SELECT reports.run_export_job(job_config);
202+
`
79203
}
80204

81-
iterations.forEach((params, i) => {
82-
operate(params.tableName + '_' + params.date + '_' + params.lens.name)
205+
// Generate all report configurations
206+
const reportConfigurations = generateReportConfigurations()
207+
208+
// Create Dataform operations for each report configuration
209+
reportConfigurations.forEach(reportConfig => {
210+
const operationName = createOperationName(reportConfig)
211+
212+
operate(operationName)
83213
.tags(['crawl_complete', 'reports'])
84-
.queries(ctx => `
85-
DECLARE job_config JSON;
86-
87-
/* First report run
88-
CREATE TABLE IF NOT EXISTS ${dataset}.${params.tableName}
89-
PARTITION BY date
90-
CLUSTER BY metric, lens, client
91-
AS
92-
*/
93-
94-
--/* Subsequent report run
95-
DELETE FROM ${dataset}.${params.tableName}
96-
WHERE date = '${params.date}'
97-
AND metric = '${params.metric.id}'
98-
AND lens = '${params.lens.name}';
99-
INSERT INTO ${dataset}.${params.tableName}
100-
--*/
101-
102-
SELECT
103-
'${params.metric.id}' AS metric,
104-
'${params.lens.name}' AS lens,
105-
*
106-
FROM (
107-
${params.sql.query(ctx, params)}
108-
);
109-
110-
SET job_config = TO_JSON(
111-
STRUCT(
112-
"cloud_storage" AS destination,
113-
STRUCT(
114-
"httparchive" AS bucket,
115-
"${generateExportPath(params)}" AS name
116-
) AS config,
117-
r"${generateExportQuery(params)}" AS query
118-
)
119-
);
120-
121-
SELECT reports.run_export_job(job_config);
122-
`)
214+
.queries(ctx => generateOperationSQL(ctx, reportConfig))
123215
})

0 commit comments

Comments
 (0)