Skip to content

Commit 4732f85

Browse files
Strict merge of CrUX and crawl (#85)
* versions * technologies * categories * Update definitions/output/reports/tech_report_categories.js Co-authored-by: Barry Pollard <barrypollard@google.com> * all pages in categories = 'ALL' --------- Co-authored-by: Barry Pollard <barrypollard@google.com>
1 parent 291b453 commit 4732f85

3 files changed

Lines changed: 44 additions & 98 deletions

File tree

definitions/output/reports/tech_report_categories.js

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,26 @@ category_descriptions AS (
2323
FROM ${ctx.ref('wappalyzer', 'categories')}
2424
),
2525
26+
crux AS (
27+
SELECT
28+
IF(device = 'desktop', 'desktop', 'mobile') AS client,
29+
CONCAT(origin, '/') AS root_page
30+
FROM ${ctx.ref('chrome-ux-report', 'materialized', 'device_summary')}
31+
WHERE
32+
date = '${pastMonth}'
33+
AND device IN ('desktop', 'phone')
34+
),
35+
36+
merged_pages AS (
37+
SELECT DISTINCT
38+
client,
39+
technologies,
40+
root_page
41+
FROM pages
42+
INNER JOIN crux
43+
USING (client, root_page)
44+
),
45+
2646
category_stats AS (
2747
SELECT
2848
category,
@@ -35,8 +55,8 @@ category_stats AS (
3555
client,
3656
category,
3757
COUNT(DISTINCT root_page) AS origins
38-
FROM pages
39-
INNER JOIN pages.technologies AS tech
58+
FROM merged_pages
59+
INNER JOIN merged_pages.technologies AS tech
4060
INNER JOIN tech.categories AS category
4161
WHERE
4262
category IS NOT NULL
@@ -87,7 +107,7 @@ FROM (
87107
SELECT
88108
client,
89109
COUNT(DISTINCT root_page) AS origins
90-
FROM pages
110+
FROM merged_pages
91111
GROUP BY client
92112
)
93113
`).postOps(ctx => `

definitions/output/reports/tech_report_technologies.js

Lines changed: 14 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -5,36 +5,17 @@ publish('tech_report_technologies', {
55
type: 'table',
66
tags: ['tech_report']
77
}).query(ctx => `
8-
WITH pages AS (
9-
SELECT DISTINCT
10-
client,
11-
root_page,
12-
tech.technology
13-
FROM ${ctx.ref('crawl', 'pages')} AS pages
14-
INNER JOIN pages.technologies AS tech
15-
WHERE
16-
date = '${pastMonth}'
17-
${constants.devRankFilter}
18-
),
19-
20-
tech_origins AS (
8+
WITH tech_origins AS (
219
SELECT
22-
technology,
23-
STRUCT(
24-
MAX(IF(client = 'desktop', origins, 0)) AS desktop,
25-
MAX(IF(client = 'mobile', origins, 0)) AS mobile
26-
) AS origins
27-
FROM (
28-
SELECT
29-
client,
30-
technology,
31-
COUNT(DISTINCT root_page) AS origins
32-
FROM pages
33-
GROUP BY
34-
client,
35-
technology
36-
)
37-
GROUP BY technology
10+
technology,
11+
adoption AS origins
12+
FROM ${ctx.ref('reports', 'tech_report_adoption')}
13+
WHERE
14+
date = '${pastMonth}'
15+
AND rank = 'ALL'
16+
AND geo = 'ALL'
17+
AND version = 'ALL'
18+
${constants.devRankFilter}
3819
),
3920
4021
technologies AS (
@@ -51,14 +32,6 @@ technologies AS (
5132
description,
5233
categories,
5334
icon
54-
),
55-
56-
total_pages AS (
57-
SELECT
58-
client,
59-
COUNT(DISTINCT root_page) AS origins
60-
FROM pages
61-
GROUP BY client
6235
)
6336
6437
SELECT
@@ -75,16 +48,14 @@ USING(technology)
7548
UNION ALL
7649
7750
SELECT
78-
'ALL' AS technology,
51+
technology,
7952
NULL AS description,
8053
NULL AS icon,
8154
NULL AS category,
8255
NULL AS category_obj,
83-
STRUCT(
84-
MAX(IF(client = 'desktop', origins, 0)) AS desktop,
85-
MAX(IF(client = 'mobile', origins, 0)) AS mobile
86-
) AS origins
87-
FROM total_pages
56+
origins
57+
FROM tech_origins
58+
WHERE technology = 'ALL'
8859
`).postOps(ctx => `
8960
SELECT
9061
reports.run_export_job(

definitions/output/reports/tech_report_versions.js

Lines changed: 7 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -5,61 +5,16 @@ publish('tech_report_versions', {
55
type: 'table',
66
tags: ['tech_report']
77
}).query(ctx => `
8-
WITH pages AS (
9-
SELECT DISTINCT
10-
client,
11-
root_page,
12-
tech.technology,
13-
REGEXP_EXTRACT(version, r'\\d+(?:\\.\\d+)?') AS version
14-
FROM ${ctx.ref('crawl', 'pages')} AS pages
15-
INNER JOIN pages.technologies AS tech
16-
LEFT JOIN tech.info AS version
17-
WHERE
18-
date = '${pastMonth}'
19-
${constants.devRankFilter} AND
20-
tech.technology IS NOT NULL
21-
),
22-
23-
version_origins AS (
24-
SELECT
25-
client,
26-
technology,
27-
version,
28-
COUNT(DISTINCT root_page) AS origins
29-
FROM pages
30-
WHERE version IS NOT NULL
31-
GROUP BY
32-
client,
33-
technology,
34-
version
35-
),
36-
37-
total_origins AS (
38-
SELECT
39-
client,
40-
technology,
41-
COUNT(DISTINCT root_page) AS origins
42-
FROM pages
43-
GROUP BY
44-
client,
45-
technology
46-
)
47-
488
SELECT
49-
client,
509
technology,
5110
version,
52-
origins
53-
FROM version_origins
54-
55-
UNION ALL
56-
57-
SELECT
58-
client,
59-
technology,
60-
'ALL' AS version,
61-
origins
62-
FROM total_origins
11+
adoption AS origins
12+
FROM ${ctx.ref('reports', 'tech_report_adoption')}
13+
WHERE
14+
date = '${pastMonth}'
15+
AND rank = 'ALL'
16+
AND geo = 'ALL'
17+
${constants.devRankFilter}
6318
`).postOps(ctx => `
6419
SELECT
6520
reports.run_export_job(

0 commit comments

Comments
 (0)