Skip to content

Commit 2a7c8b4

Browse files
max-ostapenkoGCP Dataform
authored andcommitted
Merge branch 'main' into main
2 parents b59c4ed + e62f22c commit 2a7c8b4

10 files changed

Lines changed: 56 additions & 12 deletions

File tree

.github/linters/.trivyignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Ignore the dataplexAdmin role issue
2+
AVD-GCP-0007

.github/linters/trivy.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ignorefile: ".github/linters/.trivyignore"

.github/linters/zizmor.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
rules:
2+
unpinned-uses:
3+
ignore:
4+
- ci.yaml

.github/workflows/ci.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ jobs:
2424
uses: actions/checkout@v5
2525
with:
2626
fetch-depth: 0
27+
persist-credentials: false
2728

2829
- name: Lint Code Base
29-
uses: super-linter/super-linter/slim@v8.0.0
30+
uses: super-linter/super-linter/slim@v8.1.0
3031
env:
3132
DEFAULT_BRANCH: main
3233
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
33-
LINTER_RULES_PATH: .
3434
VALIDATE_JSCPD: false
3535
VALIDATE_JAVASCRIPT_PRETTIER: false
3636
VALIDATE_MARKDOWN_PRETTIER: false

definitions/output/crawl/parsed_css.js

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@ publish('parsed_css', {
77
clusterBy: ['client', 'is_root_page', 'rank', 'page'],
88
requirePartitionFilter: true
99
},
10+
columns: {
11+
date: 'YYYY-MM-DD format of the HTTP Archive monthly crawl',
12+
client: 'Test environment: desktop or mobile',
13+
page: 'The URL of the page being tested',
14+
is_root_page: 'Whether the page is the root of the origin.',
15+
root_page: 'The URL of the root page being tested',
16+
rank: 'Site popularity rank, from CrUX',
17+
url: 'The URL of the request',
18+
css: 'The parsed CSS, in JSON format'
19+
},
1020
tags: ['crawl_complete']
1121
}).preOps(ctx => `
1222
SET @@RESERVATION='${constants.reservation_id}';
@@ -16,9 +26,7 @@ WHERE date = '${constants.currentMonth}'
1626
AND client = 'desktop';
1727
`).query(ctx => `
1828
SELECT
19-
* EXCEPT(css),
20-
NULL AS css_backup,
21-
SAFE.PARSE_JSON(css, wide_number_mode=>'round') AS css
29+
*
2230
FROM ${ctx.ref('crawl_staging', 'parsed_css')}
2331
WHERE date = '${constants.currentMonth}'
2432
AND client = 'desktop'
@@ -30,9 +38,7 @@ WHERE date = '${constants.currentMonth}'
3038
3139
INSERT INTO ${ctx.self()}
3240
SELECT
33-
* EXCEPT(css),
34-
NULL AS css_backup,
35-
SAFE.PARSE_JSON(css, wide_number_mode=>'round') AS css
41+
*
3642
FROM ${ctx.ref('crawl_staging', 'parsed_css')}
3743
WHERE date = '${constants.currentMonth}'
3844
AND client = 'mobile'

infra/bigquery-export/Dockerfile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ FROM node:22-slim
44
# Set the working directory
55
WORKDIR /app
66

7+
# Create a non-root user
8+
RUN groupadd -r appuser && useradd -r -g appuser appuser
9+
710
# Copy package files first for better layer caching
811
COPY package*.json ./
912

@@ -15,4 +18,13 @@ ENV EXPORT_CONFIG=""
1518
# Copy source code
1619
COPY . .
1720

21+
# Change ownership of the app directory to the non-root user
22+
RUN chown -R appuser:appuser /app
23+
24+
# Switch to non-root user
25+
USER appuser
26+
27+
# No healthcheck needed for one-time job containers
28+
HEALTHCHECK NONE
29+
1830
CMD ["node", "index.js"]

infra/dataform-service/Dockerfile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ FROM node:22-slim
33
# Set the working directory
44
WORKDIR /app
55

6+
# Create a non-root user
7+
RUN groupadd -r appuser && useradd -r -g appuser appuser
8+
69
# Copy package files first for better layer caching
710
COPY package*.json ./
811

@@ -12,11 +15,21 @@ RUN npm ci --only=production --quiet --no-fund --no-audit && npm cache clean --f
1215
# Copy source code
1316
COPY . .
1417

18+
# Change ownership of the app directory to the non-root user
19+
RUN chown -R appuser:appuser /app
20+
21+
# Switch to non-root user
22+
USER appuser
23+
1524
# Set default port (Cloud Run will override this)
1625
ENV PORT=8080
1726

1827
# Expose port for Cloud Run
1928
EXPOSE 8080
2029

30+
# Add healthcheck
31+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
32+
CMD node -e "require('http').get('http://localhost:$PORT/health', (res) => { process.exit(res.statusCode === 200 ? 0 : 1) }).on('error', () => { process.exit(1) })" || exit 1
33+
2134
# Start the function
2235
CMD ["npm", "start"]

infra/dataform-service/index.js

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,14 +223,20 @@ async function mainHandler (req, res) {
223223

224224
console.info(`Received request for path: ${path}`)
225225

226-
if (path === '/trigger' || path.startsWith('/trigger/')) {
226+
if (path === '/health') {
227+
// Health check endpoint
228+
res.status(200).json({
229+
status: 'healthy',
230+
timestamp: new Date().toISOString()
231+
})
232+
} else if (path === '/trigger' || path.startsWith('/trigger/')) {
227233
await handleTrigger(req, res)
228234
} else if (path === '/') {
229235
await handleExport(req, res)
230236
} else {
231237
res.status(404).json({
232238
error: 'Not Found',
233-
message: 'Available endpoints: /, /export'
239+
message: 'Available endpoints: /, /trigger, /health'
234240
})
235241
}
236242
}

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
"name": "crawl-data",
33
"author": "@max-ostapenko",
44
"scripts": {
5-
"format": "npx eslint --fix .; npx markdownlint --ignore-path .gitignore --config package.json --configPointer /markdownlint . --fix; terraform -chdir=infra/tf fmt -recursive",
6-
"lint": "npx eslint .; npx markdownlint --ignore-path .gitignore --config package.json --configPointer /markdownlint .; dataform compile",
5+
"format": "npx eslint -c .github/linters/eslint.config.mjs --fix .; npx markdownlint --ignore-path .gitignore --config package.json --configPointer /markdownlint . --fix; terraform -chdir=infra/tf fmt -recursive",
6+
"lint": "npx eslint -c .github/linters/eslint.config.mjs .; npx markdownlint --ignore-path .gitignore --config package.json --configPointer /markdownlint .; dataform compile",
77
"superlint": "docker run --platform linux/amd64 -e DEFAULT_BRANCH=main -e VALIDATE_GIT_COMMITLINT=false -e VALIDATE_TERRAFORM_TERRASCAN=false -e VALIDATE_TERRAFORM_TFLINT=false -e FIX_JSON_PRETTIER=true -e IGNORE_GITIGNORED_FILES=true -e VALIDATE_ALL_CODEBASE=true -e VALIDATE_JSCPD=false -e RUN_LOCAL=true -v ./:/tmp/lint ghcr.io/super-linter/super-linter:slim-latest"
88
},
99
"dependencies": {

0 commit comments

Comments
 (0)