We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 8e01af9 commit f34e8d6Copy full SHA for f34e8d6
1 file changed
definitions/output/crawl/pages.js
@@ -3,11 +3,13 @@ assert('corrupted_technology_values')
3
.tags(['crawl_complete'])
4
.query(ctx => `
5
SELECT
6
- date,
7
- client,
8
- tech,
9
- COUNT(DISTINCT page) AS cnt_pages,
10
- ARRAY_AGG(DISTINCT page LIMIT 3) AS sample_pages
+ /*
+ date,
+ client,
+ tech,
+ ARRAY_AGG(DISTINCT page LIMIT 3) AS sample_pages,
11
+ */
12
+ COUNT(DISTINCT page) AS cnt_pages
13
FROM ${ctx.ref('crawl_staging', 'pages')} AS pages
14
LEFT JOIN pages.technologies AS tech
15
LEFT JOIN tech.categories AS category
@@ -18,11 +20,14 @@ WHERE
18
20
OR category NOT IN (SELECT DISTINCT name FROM wappalyzer.categories)
19
21
OR ARRAY_LENGTH(tech.categories) = 0
22
)
23
+/*
24
GROUP BY
25
date,
26
client,
27
tech
28
ORDER BY cnt_pages DESC
29
+*/
30
+HAVING cnt_pages > 200
31
`)
32
33
publish('pages', {
0 commit comments