Skip to content

Commit e05ce1e

Browse files
Snapshots for f1 (#99)
* snapshots for f1 * lint * fix clusters
1 parent cafdecd commit e05ce1e

4 files changed

Lines changed: 113 additions & 6 deletions

File tree

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
publish('pages_latest', {
2+
type: 'table',
3+
schema: 'f1',
4+
description: 'The latest date from the crawl.pages table',
5+
bigquery: {
6+
partitionBy: 'date',
7+
clusterBy: ['client', 'is_root_page', 'rank', 'page']
8+
},
9+
tags: ['crawl_complete']
10+
}).preOps(ctx => `
11+
SET @@RESERVATION='projects/httparchive/locations/US/reservations/enterprise';
12+
`).query(ctx => `
13+
SELECT
14+
date,
15+
client,
16+
page,
17+
is_root_page,
18+
root_page,
19+
rank,
20+
wptid,
21+
TO_JSON_STRING(payload) AS payload,
22+
TO_JSON_STRING(summary) AS summary,
23+
STRUCT<
24+
a11y STRING,
25+
cms STRING,
26+
cookies STRING,
27+
css_variables STRING,
28+
ecommerce STRING,
29+
element_count STRING,
30+
javascript STRING,
31+
markup STRING,
32+
media STRING,
33+
origin_trials STRING,
34+
performance STRING,
35+
privacy STRING,
36+
responsive_images STRING,
37+
robots_txt STRING,
38+
security STRING,
39+
structured_data STRING,
40+
third_parties STRING,
41+
well_known STRING,
42+
wpt_bodies STRING,
43+
other STRING
44+
> (
45+
TO_JSON_STRING(custom_metrics.a11y),
46+
TO_JSON_STRING(custom_metrics.cms),
47+
TO_JSON_STRING(custom_metrics.cookies),
48+
TO_JSON_STRING(custom_metrics.css_variables),
49+
TO_JSON_STRING(custom_metrics.ecommerce),
50+
TO_JSON_STRING(custom_metrics.element_count),
51+
TO_JSON_STRING(custom_metrics.javascript),
52+
TO_JSON_STRING(custom_metrics.markup),
53+
TO_JSON_STRING(custom_metrics.media),
54+
TO_JSON_STRING(custom_metrics.origin_trials),
55+
TO_JSON_STRING(custom_metrics.performance),
56+
TO_JSON_STRING(custom_metrics.privacy),
57+
TO_JSON_STRING(custom_metrics.responsive_images),
58+
TO_JSON_STRING(custom_metrics.robots_txt),
59+
TO_JSON_STRING(custom_metrics.security),
60+
TO_JSON_STRING(custom_metrics.structured_data),
61+
TO_JSON_STRING(custom_metrics.third_parties),
62+
TO_JSON_STRING(custom_metrics.well_known),
63+
TO_JSON_STRING(custom_metrics.wpt_bodies),
64+
TO_JSON_STRING(custom_metrics.other)
65+
) AS custom_metrics,
66+
TO_JSON_STRING(lighthouse) AS lighthouse,
67+
features,
68+
technologies,
69+
TO_JSON_STRING(metadata) AS metadata
70+
FROM ${ctx.ref('crawl', 'pages')}
71+
WHERE
72+
date = '${constants.currentMonth}'
73+
`)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
publish('requests_latest', {
2+
type: 'table',
3+
schema: 'f1',
4+
description: 'The latest date from the crawl.requests table',
5+
bigquery: {
6+
partitionBy: 'date',
7+
clusterBy: ['client', 'is_root_page', 'rank', 'type']
8+
},
9+
tags: ['crawl_complete']
10+
}).preOps(ctx => `
11+
SET @@RESERVATION='projects/httparchive/locations/US/reservations/enterprise';
12+
`).query(ctx => `
13+
SELECT
14+
date,
15+
client,
16+
page,
17+
is_root_page,
18+
root_page,
19+
rank,
20+
url,
21+
is_main_document,
22+
type,
23+
index,
24+
TO_JSON_STRING(payload) AS payload,
25+
TO_JSON_STRING(summary) AS summary,
26+
request_headers,
27+
response_headers,
28+
response_body
29+
FROM ${ctx.ref('crawl', 'requests')}
30+
WHERE
31+
date = '${constants.currentMonth}'
32+
`)

definitions/output/sample_data/pages_10k.js

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ publish('pages_10k', {
33
schema: 'sample_data',
44
bigquery: {
55
partitionBy: 'date',
6-
clusterBy: ['client', 'is_root_page', 'rank']
6+
clusterBy: ['client', 'is_root_page', 'rank', 'page']
77
},
88
tags: ['crawl_complete']
99
}).query(ctx => `
1010
SELECT *
1111
FROM ${ctx.ref('crawl', 'pages')}
12-
WHERE date = '${constants.currentMonth}' AND
13-
rank <= 10000
12+
WHERE
13+
date = '${constants.currentMonth}' AND
14+
rank <= 10000
1415
`)

definitions/output/sample_data/requests_10k.js

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ publish('requests_10k', {
33
schema: 'sample_data',
44
bigquery: {
55
partitionBy: 'date',
6-
clusterBy: ['client', 'is_root_page', 'is_main_document', 'type']
6+
clusterBy: ['client', 'is_root_page', 'rank', 'type']
77
},
88
tags: ['crawl_complete']
99
}).query(ctx => `
1010
SELECT *
1111
FROM ${ctx.ref('crawl', 'requests')}
12-
WHERE date = '${constants.currentMonth}' AND
13-
rank <= 10000
12+
WHERE
13+
date = '${constants.currentMonth}' AND
14+
rank <= 10000
1415
`)

0 commit comments

Comments
 (0)