Skip to content

Commit a74d79f

Browse files
committed
fix after migration
1 parent 85eb576 commit a74d79f

4 files changed

Lines changed: 118 additions & 119 deletions

File tree

definitions/output/all/parsed_css.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ publish('parsed_css', {
88
requirePartitionFilter: true
99
},
1010
tags: ['crawl_results_legacy']
11-
}).preOps(ctx => `
11+
}).query(ctx => `
1212
DROP SNAPSHOT TABLE IF EXISTS ${ctx.self()};
1313
1414
CREATE SNAPSHOT TABLE ${ctx.self()}
15-
CLONE ${ctx.ref('crawl', 'parsed_css')};
15+
CLONE ${ctx.ref('crawl', 'parsed_css')}
1616
`)

definitions/output/crawl/pages.js

Lines changed: 40 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -7,62 +7,48 @@ publish('pages', {
77
clusterBy: ['client', 'is_root_page', 'rank', 'page'],
88
requirePartitionFilter: true
99
},
10+
columns: {
11+
date: 'YYYY-MM-DD format of the HTTP Archive monthly crawl',
12+
client: 'Test environment: desktop or mobile',
13+
page: 'The URL of the page being tested',
14+
is_root_page: 'Whether the page is the root of the origin',
15+
root_page: 'The URL of the root page being tested, the origin followed by /',
16+
rank: 'Site popularity rank, from CrUX',
17+
wptid: 'ID of the WebPageTest results',
18+
payload: 'JSON-encoded WebPageTest results for the page',
19+
summary: 'JSON-encoded summarization of the page-level data',
20+
custom_metrics: {
21+
description: 'Custom metrics from WebPageTest',
22+
columns: {
23+
a11y: 'JSON-encoded A11Y metrics',
24+
cms: 'JSON-encoded CMS detection',
25+
cookies: 'JSON-encoded cookie metrics',
26+
css_variables: 'JSON-encoded CSS variable metrics',
27+
ecommerce: 'JSON-encoded ecommerce metrics',
28+
element_count: 'JSON-encoded element count metrics',
29+
javascript: 'JSON-encoded JavaScript metrics',
30+
markup: 'JSON-encoded markup metrics',
31+
media: 'JSON-encoded media metrics',
32+
origin_trials: 'JSON-encoded origin trial metrics',
33+
performance: 'JSON-encoded performance metrics',
34+
privacy: 'JSON-encoded privacy metrics',
35+
responsive_images: 'JSON-encoded responsive image metrics',
36+
robots_txt: 'JSON-encoded robots.txt metrics',
37+
security: 'JSON-encoded security metrics',
38+
structured_data: 'JSON-encoded structured data metrics',
39+
third_parties: 'JSON-encoded third-party metrics',
40+
well_known: 'JSON-encoded well-known metrics',
41+
wpt_bodies: 'JSON-encoded WebPageTest bodies',
42+
other: 'JSON-encoded other custom metrics'
43+
}
44+
},
45+
lighthouse: 'JSON-encoded Lighthouse report',
46+
features: 'Blink features detected at runtime (see https://chromestatus.com/features)',
47+
technologies: 'Technologies detected at runtime (see https://www.wappalyzer.com/)',
48+
metadata: 'Additional metadata about the test'
49+
},
1050
tags: ['crawl_complete']
1151
}).preOps(ctx => `
12-
CREATE SCHEMA IF NOT EXISTS crawl;
13-
14-
CREATE TABLE IF NOT EXISTS ${ctx.self()}
15-
(
16-
date DATE NOT NULL OPTIONS(description='YYYY-MM-DD format of the HTTP Archive monthly crawl'),
17-
client STRING NOT NULL OPTIONS(description='Test environment: desktop or mobile'),
18-
page STRING NOT NULL OPTIONS(description='The URL of the page being tested'),
19-
is_root_page BOOL NOT NULL OPTIONS(description='Whether the page is the root of the origin'),
20-
root_page STRING NOT NULL OPTIONS(description='The URL of the root page being tested, the origin followed by /'),
21-
rank INT64 OPTIONS(description='Site popularity rank, from CrUX'),
22-
wptid STRING OPTIONS(description='ID of the WebPageTest results'),
23-
payload JSON OPTIONS(description='JSON-encoded WebPageTest results for the page'),
24-
summary JSON OPTIONS(description='JSON-encoded summarization of the page-level data'),
25-
custom_metrics STRUCT<
26-
a11y JSON,
27-
cms JSON,
28-
cookies JSON,
29-
css_variables JSON,
30-
ecommerce JSON,
31-
element_count JSON,
32-
javascript JSON,
33-
markup JSON,
34-
media JSON,
35-
origin_trials JSON,
36-
performance JSON,
37-
privacy JSON,
38-
responsive_images JSON,
39-
robots_txt JSON,
40-
security JSON,
41-
structured_data JSON,
42-
third_parties JSON,
43-
well_known JSON,
44-
wpt_bodies JSON,
45-
other JSON
46-
> OPTIONS(description='Custom metrics from WebPageTest'),
47-
lighthouse JSON OPTIONS(description='JSON-encoded Lighthouse report'),
48-
features ARRAY<STRUCT<
49-
feature STRING OPTIONS(description='Blink feature name'),
50-
id STRING OPTIONS(description='Blink feature ID'),
51-
type STRING OPTIONS(description='Blink feature type (css, default)')
52-
>> OPTIONS(description='Blink features detected at runtime (see https://chromestatus.com/features)'),
53-
technologies ARRAY<STRUCT<
54-
technology STRING OPTIONS(description='Name of the detected technology'),
55-
categories ARRAY<STRING> OPTIONS(description='List of categories to which this technology belongs'),
56-
info ARRAY<STRING> OPTIONS(description='Additional metadata about the detected technology, ie version number')
57-
>> OPTIONS(description='Technologies detected at runtime (see https://www.wappalyzer.com/)'),
58-
metadata JSON OPTIONS(description='Additional metadata about the test')
59-
)
60-
PARTITION BY date
61-
CLUSTER BY client, is_root_page, rank, page
62-
OPTIONS(
63-
require_partition_filter=true
64-
);
65-
6652
DELETE FROM ${ctx.self()}
6753
WHERE date = '${constants.currentMonth}' AND
6854
client = 'desktop';

definitions/output/crawl/requests.js

Lines changed: 49 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,40 +7,37 @@ publish('requests', {
77
clusterBy: ['client', 'is_root_page', 'type', 'rank'],
88
requirePartitionFilter: true
99
},
10+
columns: {
11+
date: 'YYYY-MM-DD format of the HTTP Archive monthly crawl',
12+
client: 'Test environment: desktop or mobile',
13+
page: 'The URL of the page being tested',
14+
is_root_page: 'Whether the page is the root of the origin.',
15+
root_page: 'The URL of the root page being tested',
16+
rank: 'Site popularity rank, from CrUX',
17+
url: 'The URL of the request',
18+
is_main_document: 'Whether this request corresponds with the main HTML document of the page, which is the first HTML request after redirects',
19+
type: 'Simplified description of the type of resource (script, html, css, text, other, etc)',
20+
index: 'The sequential 0-based index of the request',
21+
payload: 'JSON-encoded WebPageTest result data for this request',
22+
summary: 'JSON-encoded summarization of request data',
23+
request_headers: {
24+
description: 'Request headers',
25+
columns: {
26+
name: 'Request header name',
27+
value: 'Request header value'
28+
}
29+
},
30+
response_headers: {
31+
description: 'Response headers',
32+
columns: {
33+
name: 'Response header name',
34+
value: 'Response header value'
35+
}
36+
},
37+
response_body: 'Text-based response body'
38+
},
1039
tags: ['crawl_complete']
1140
}).preOps(ctx => `
12-
CREATE SCHEMA IF NOT EXISTS crawl;
13-
14-
CREATE TABLE IF NOT EXISTS ${ctx.self()}
15-
(
16-
date DATE NOT NULL OPTIONS(description='YYYY-MM-DD format of the HTTP Archive monthly crawl'),
17-
client STRING NOT NULL OPTIONS(description='Test environment: desktop or mobile'),
18-
page STRING NOT NULL OPTIONS(description='The URL of the page being tested'),
19-
is_root_page BOOL OPTIONS(description='Whether the page is the root of the origin.'),
20-
root_page STRING NOT NULL OPTIONS(description='The URL of the root page being tested'),
21-
rank INT64 OPTIONS(description='Site popularity rank, from CrUX'),
22-
url STRING NOT NULL OPTIONS(description='The URL of the request'),
23-
is_main_document BOOL NOT NULL OPTIONS(description='Whether this request corresponds with the main HTML document of the page, which is the first HTML request after redirects'),
24-
type STRING OPTIONS(description='Simplified description of the type of resource (script, html, css, text, other, etc)'),
25-
index INT64 OPTIONS(description='The sequential 0-based index of the request'),
26-
payload JSON OPTIONS(description='JSON-encoded WebPageTest result data for this request'),
27-
summary JSON OPTIONS(description='JSON-encoded summarization of request data'),
28-
request_headers ARRAY<STRUCT<
29-
name STRING OPTIONS(description='Request header name'),
30-
value STRING OPTIONS(description='Request header value')
31-
>> OPTIONS(description='Request headers'),
32-
response_headers ARRAY<STRUCT<
33-
name STRING OPTIONS(description='Response header name'),
34-
value STRING OPTIONS(description='Response header value')
35-
>> OPTIONS(description='Response headers'),
36-
response_body STRING OPTIONS(description='Text-based response body')
37-
)
38-
PARTITION BY date
39-
CLUSTER BY client, is_root_page, type, rank
40-
OPTIONS(
41-
require_partition_filter=true
42-
);
43-
4441
CREATE TEMP FUNCTION pruneHeaders(
4542
jsonObject JSON
4643
) RETURNS JSON
@@ -105,8 +102,16 @@ FROM (
105102
FROM ${ctx.ref('crawl_staging', 'requests')}
106103
WHERE date = '${constants.currentMonth}'
107104
AND client = 'desktop'
108-
${constants.devTABLESAMPLE}
109-
)
105+
${constants.devRankFilter}
106+
) AS requests
107+
LEFT JOIN (
108+
SELECT DISTINCT
109+
CONCAT(origin, '/') AS page,
110+
experimental.popularity.rank AS rank
111+
FROM ${ctx.resolve('chrome-ux-report', 'experimental', 'global')}
112+
WHERE yyyymm = ${constants.fnPastMonth(constants.currentMonth).substring(0, 7).replace('-', '')}
113+
) AS crux
114+
ON requests.root_page = crux.page
110115
`).postOps(ctx => `
111116
DELETE FROM ${ctx.self()}
112117
WHERE date = '${constants.currentMonth}' AND
@@ -157,6 +162,14 @@ FROM (
157162
FROM ${ctx.ref('crawl_staging', 'requests')}
158163
WHERE date = '${constants.currentMonth}'
159164
AND client = 'mobile'
160-
${constants.devTABLESAMPLE}
161-
)
165+
${constants.devRankFilter}
166+
) AS requests
167+
LEFT JOIN (
168+
SELECT DISTINCT
169+
CONCAT(origin, '/') AS page,
170+
experimental.popularity.rank AS rank
171+
FROM ${ctx.resolve('chrome-ux-report', 'experimental', 'global')}
172+
WHERE yyyymm = ${constants.fnPastMonth(constants.currentMonth).substring(0, 7).replace('-', '')}
173+
) AS crux
174+
ON requests.root_page = crux.page;
162175
`)

infra/tf/.terraform.lock.hcl

Lines changed: 27 additions & 27 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)