-
-
Notifications
You must be signed in to change notification settings - Fork 19
401 lines (368 loc) · 19.9 KB
/
sharing-server-deploy.yml
File metadata and controls
401 lines (368 loc) · 19.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
name: Deploy Sharing Server
# Triggers on any push that touches the sharing server or this workflow file.
# main → production environment (sharing-server-prod)
# branch → testing environment (sharing-test-<slug><hash>)
on:
push:
branches: ["**"]
paths:
- "sharing-server/**"
- ".github/workflows/sharing-server-deploy.yml"
workflow_dispatch:
inputs:
deploy_to_test:
description: 'Deploy main branch to the test environment instead of production'
type: boolean
default: false
# One deploy per branch at a time — prevents concurrent Terraform runs from
# conflicting on the same remote state file (state blob already locked errors).
# Cancel in-progress for branch pushes so stale deploys don't block newer ones;
# queue (no cancel) for main so production deploys always complete.
concurrency:
group: sharing-server-deploy-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
# Minimal baseline; jobs declare only what they need.
permissions:
contents: read
jobs:
# ── Compute environment metadata ─────────────────────────────────────────────
setup:
name: Compute deployment environment
runs-on: ubuntu-latest
outputs:
app_name: ${{ steps.env.outputs.app_name }}
state_key: ${{ steps.env.outputs.state_key }}
environment: ${{ steps.env.outputs.environment }}
is_prod: ${{ steps.env.outputs.is_prod }}
min_replicas: ${{ steps.env.outputs.min_replicas }}
allow_custom_domain: ${{ steps.env.outputs.allow_custom_domain }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0
with:
egress-policy: audit
- name: Compute deployment parameters
id: env
env:
FORCE_TEST: ${{ inputs.deploy_to_test }}
run: |
if [[ "${{ github.ref }}" == "refs/heads/main" && "$FORCE_TEST" != "true" ]]; then
{
echo "app_name=sharing-server-prod"
echo "state_key=sharing-server/prod.tfstate"
echo "environment=production"
echo "is_prod=true"
echo "min_replicas=1"
echo "allow_custom_domain=true"
} >> "$GITHUB_OUTPUT"
elif [[ "${{ github.ref }}" == "refs/heads/main" && "$FORCE_TEST" == "true" ]]; then
# Stable test environment — fixed app name so ai-fluency-server-test.devopsjournal.io
# always points to the same ACA app. Keeps min_replicas=1 so it stays warm.
{
echo "app_name=sharing-server-test"
echo "state_key=sharing-server/test.tfstate"
echo "environment=testing"
echo "is_prod=false"
echo "min_replicas=1"
echo "allow_custom_domain=true"
} >> "$GITHUB_OUTPUT"
else
BRANCH="${{ github.ref_name }}"
# Slugify: lowercase, replace non-alphanumeric with hyphens, dedupe hyphens, strip leading/trailing hyphens
SLUG=$(echo "$BRANCH" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]/-/g' | sed 's/-\+/-/g' | sed 's/^-//;s/-$//')
# Truncate slug to 13 chars and strip any trailing hyphen that truncation may leave
SLUG_TRUNC=$(echo "$SLUG" | cut -c1-13 | sed 's/-$//')
# 6-char hash of full branch name to prevent collisions on identical prefixes
HASH=$(echo -n "$BRANCH" | sha256sum | cut -c1-6)
APP_NAME="sharing-test-${SLUG_TRUNC}${HASH}"
{
echo "app_name=${APP_NAME}"
echo "state_key=sharing-server/test-${SLUG_TRUNC}${HASH}.tfstate"
echo "environment=testing"
echo "is_prod=false"
echo "min_replicas=0"
echo "allow_custom_domain=false"
} >> "$GITHUB_OUTPUT"
fi
# ── Build & push container image to GHCR ─────────────────────────────────────
build:
name: Build & push container image
runs-on: ubuntu-latest
needs: setup
permissions:
contents: read
packages: write
outputs:
image: ${{ steps.image-ref.outputs.image }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0
with:
egress-policy: audit
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
- name: Log in to GitHub Container Registry
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Docker metadata
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # v5
with:
images: ghcr.io/${{ github.repository_owner }}/copilot-sharing-server
tags: |
type=sha,prefix=sha-,format=long
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Set build metadata
id: build-meta
run: |
echo "date=$(date -u +'%Y-%m-%d %H:%M UTC')" >> "$GITHUB_OUTPUT"
- name: Build and push
id: build
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
with:
context: sharing-server
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: |
DEPLOY_SHA=${{ github.sha }}
DEPLOY_BRANCH=${{ github.ref_name }}
DEPLOY_DATE=${{ steps.build-meta.outputs.date }}
- name: Set image output (digest-pinned reference)
id: image-ref
run: |
echo "image=ghcr.io/${{ github.repository_owner }}/copilot-sharing-server@${{ steps.build.outputs.digest }}" >> "$GITHUB_OUTPUT"
# ── Terraform deploy ──────────────────────────────────────────────────────────
deploy:
name: Deploy to Azure (${{ needs.setup.outputs.environment }})
runs-on: ubuntu-latest
needs: [setup, build]
# GitHub Environment gates this job and provides environment-scoped secrets.
# Required environments:
# production — service principal with Contributor on the resource group
# testing — service principal with Contributor on the resource group
environment: ${{ needs.setup.outputs.environment }}
permissions:
contents: read
# ARM_* env vars are read by both the AzureRM Terraform backend and provider.
env:
ARM_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
ARM_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }}
ARM_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
ARM_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0
with:
egress-policy: audit
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Check Azure credentials
id: prereqs
run: |
if [[ -z "$ARM_CLIENT_ID" ]]; then
echo "⚠️ Azure credentials not configured for '${{ needs.setup.outputs.environment }}' environment — skipping deployment." >> "$GITHUB_STEP_SUMMARY"
echo "configured=false" >> "$GITHUB_OUTPUT"
else
echo "configured=true" >> "$GITHUB_OUTPUT"
fi
- name: Setup Terraform
if: steps.prereqs.outputs.configured == 'true'
uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # v3
- name: Terraform init
if: steps.prereqs.outputs.configured == 'true'
working-directory: sharing-server/infra
run: |
terraform init -reconfigure \
-backend-config="resource_group_name=${{ vars.TF_STATE_RESOURCE_GROUP }}" \
-backend-config="storage_account_name=${{ vars.TF_STATE_STORAGE_ACCOUNT }}" \
-backend-config="container_name=${{ vars.TF_STATE_CONTAINER }}" \
-backend-config="key=${{ needs.setup.outputs.state_key }}"
- name: Reconcile custom domain Terraform state
# Custom domains (and their managed certs) are only meaningful on stable, long-lived
# environments (production). Per-branch testing environments each get a unique ACA FQDN
# which already has Azure TLS — setting SHARING_CUSTOM_DOMAIN on the testing GitHub
# environment will be ignored here to prevent 60-minute cert provisioning on every PR.
if: steps.prereqs.outputs.configured == 'true' && vars.SHARING_CUSTOM_DOMAIN != '' && needs.setup.outputs.allow_custom_domain == 'true'
working-directory: sharing-server/infra
env:
TF_VAR_resource_group_name: ${{ vars.AZURE_RESOURCE_GROUP }}
TF_VAR_location: ${{ vars.AZURE_LOCATION || 'westeurope' }}
TF_VAR_app_name: ${{ needs.setup.outputs.app_name }}
TF_VAR_container_image: ${{ needs.build.outputs.image }}
TF_VAR_github_client_id: ${{ secrets.SHARING_GITHUB_CLIENT_ID }}
TF_VAR_github_client_secret: ${{ secrets.SHARING_GITHUB_CLIENT_SECRET }}
TF_VAR_session_secret: ${{ secrets.SHARING_SESSION_SECRET }}
TF_VAR_allowed_github_org: ${{ vars.SHARING_ALLOWED_GITHUB_ORG }}
TF_VAR_github_org_check_token: ${{ secrets.ORG_CHECK_TOKEN }}
TF_VAR_admin_github_logins: ${{ vars.SHARING_ADMIN_GITHUB_LOGINS }}
TF_VAR_min_replicas: ${{ needs.setup.outputs.min_replicas }}
TF_VAR_custom_domain: ${{ vars.SHARING_CUSTOM_DOMAIN }}
run: |
az login --service-principal -u "$ARM_CLIENT_ID" -p "$ARM_CLIENT_SECRET" --tenant "$ARM_TENANT_ID" --output none
az account set --subscription "$ARM_SUBSCRIPTION_ID"
ENV_NAME="${TF_VAR_app_name}-env"
EXPECTED_CERT_NAME="sharing-cert"
# Detect if TF state has a cert with a name that doesn't match what Terraform
# would create (e.g. a portal-created cert was previously imported). If so, we
# must clean up Azure + state before apply, otherwise Terraform will try to
# destroy the old cert while the custom domain is still bound to it (400 error).
CURRENT_CERT_NAME=$(terraform state show 'azurerm_container_app_environment_managed_certificate.this[0]' 2>/dev/null \
| grep -E '^\s+name\s+=' | head -1 \
| sed 's/.*= "\(.*\)".*/\1/' || true)
if [[ "$CURRENT_CERT_NAME" == "$EXPECTED_CERT_NAME" ]]; then
echo "Cert already TF-managed as '$EXPECTED_CERT_NAME'. No cleanup needed."
else
# Cert is absent from TF state or has a mismatched name.
# Before deleting anything, check whether the correctly-named cert already
# exists in Azure (e.g. a previous apply timed out while polling for the cert
# to become Succeeded, leaving it stranded in Azure but dropped from TF state).
AZURE_CERT_ID=$(az containerapp env certificate list \
--name "$ENV_NAME" \
--resource-group "$TF_VAR_resource_group_name" \
--query "[?name=='$EXPECTED_CERT_NAME'].id | [0]" \
-o tsv 2>/dev/null || true)
if [[ -n "$AZURE_CERT_ID" && "$AZURE_CERT_ID" != "None" ]]; then
# The correctly-named cert exists in Azure but TF lost track of it.
# Import it so apply doesn't delete-and-recreate (which resets provisioning
# and triggers another 60-minute wait).
echo "Cert '$EXPECTED_CERT_NAME' found in Azure but not in TF state. Importing..."
if terraform import 'azurerm_container_app_environment_managed_certificate.this[0]' "$AZURE_CERT_ID"; then
# Drop stale custom-domain state so cert_binding re-runs to re-bind.
terraform state rm 'azurerm_container_app_custom_domain.this[0]' 2>/dev/null || true
echo "Import done. Terraform will rebind the cert without recreating it."
else
# Import failed; delete the Azure cert so apply doesn't hit "already exists".
echo "Import failed. Deleting Azure cert so Terraform can create a fresh one."
az containerapp hostname delete \
--name "$TF_VAR_app_name" \
--resource-group "$TF_VAR_resource_group_name" \
--hostname "$TF_VAR_custom_domain" --yes 2>/dev/null || true
az containerapp env certificate delete \
--name "$ENV_NAME" \
--resource-group "$TF_VAR_resource_group_name" \
--certificate "$EXPECTED_CERT_NAME" --yes 2>/dev/null || true
terraform state rm 'azurerm_container_app_custom_domain.this[0]' 2>/dev/null || true
terraform state rm 'azurerm_container_app_environment_managed_certificate.this[0]' 2>/dev/null || true
echo "Cleanup done. Terraform will create cert and domain binding from scratch."
fi
else
echo "Cert not TF-managed (current: '${CURRENT_CERT_NAME:-none}'). Cleaning up Azure resources so Terraform can recreate them."
# Remove hostname binding first (cert cannot be deleted while a domain uses it)
az containerapp hostname delete \
--name "$TF_VAR_app_name" \
--resource-group "$TF_VAR_resource_group_name" \
--hostname "$TF_VAR_custom_domain" --yes 2>/dev/null || true
# Find the cert by subject name and delete it
AZURE_CERT_NAME=$(az containerapp env certificate list \
--name "$ENV_NAME" \
--resource-group "$TF_VAR_resource_group_name" \
--query "[?properties.subjectName=='$TF_VAR_custom_domain'].name | [0]" \
-o tsv 2>/dev/null || true)
if [[ -n "$AZURE_CERT_NAME" && "$AZURE_CERT_NAME" != "None" ]]; then
echo "Deleting Azure cert: $AZURE_CERT_NAME"
az containerapp env certificate delete \
--name "$ENV_NAME" \
--resource-group "$TF_VAR_resource_group_name" \
--certificate "$AZURE_CERT_NAME" --yes 2>/dev/null || true
fi
# Remove stale TF state entries so Terraform creates fresh resources
terraform state rm 'azurerm_container_app_custom_domain.this[0]' 2>/dev/null || true
terraform state rm 'azurerm_container_app_environment_managed_certificate.this[0]' 2>/dev/null || true
echo "Cleanup done. Terraform will create cert and domain binding from scratch."
fi
fi
- name: Terraform plan
if: steps.prereqs.outputs.configured == 'true'
working-directory: sharing-server/infra
env:
TF_VAR_resource_group_name: ${{ vars.AZURE_RESOURCE_GROUP }}
TF_VAR_location: ${{ vars.AZURE_LOCATION || 'westeurope' }}
TF_VAR_app_name: ${{ needs.setup.outputs.app_name }}
TF_VAR_container_image: ${{ needs.build.outputs.image }}
TF_VAR_github_client_id: ${{ secrets.SHARING_GITHUB_CLIENT_ID }}
TF_VAR_github_client_secret: ${{ secrets.SHARING_GITHUB_CLIENT_SECRET }}
TF_VAR_session_secret: ${{ secrets.SHARING_SESSION_SECRET }}
TF_VAR_allowed_github_org: ${{ vars.SHARING_ALLOWED_GITHUB_ORG }}
TF_VAR_github_org_check_token: ${{ secrets.ORG_CHECK_TOKEN }}
TF_VAR_admin_github_logins: ${{ vars.SHARING_ADMIN_GITHUB_LOGINS }}
TF_VAR_min_replicas: ${{ needs.setup.outputs.min_replicas }}
TF_VAR_custom_domain: ${{ vars.SHARING_CUSTOM_DOMAIN }}
run: terraform plan -out=tfplan
- name: Terraform apply
if: steps.prereqs.outputs.configured == 'true'
working-directory: sharing-server/infra
env:
TF_VAR_resource_group_name: ${{ vars.AZURE_RESOURCE_GROUP }}
TF_VAR_location: ${{ vars.AZURE_LOCATION || 'westeurope' }}
TF_VAR_app_name: ${{ needs.setup.outputs.app_name }}
TF_VAR_container_image: ${{ needs.build.outputs.image }}
TF_VAR_github_client_id: ${{ secrets.SHARING_GITHUB_CLIENT_ID }}
TF_VAR_github_client_secret: ${{ secrets.SHARING_GITHUB_CLIENT_SECRET }}
TF_VAR_session_secret: ${{ secrets.SHARING_SESSION_SECRET }}
TF_VAR_allowed_github_org: ${{ vars.SHARING_ALLOWED_GITHUB_ORG }}
TF_VAR_github_org_check_token: ${{ secrets.ORG_CHECK_TOKEN }}
TF_VAR_admin_github_logins: ${{ vars.SHARING_ADMIN_GITHUB_LOGINS }}
TF_VAR_min_replicas: ${{ needs.setup.outputs.min_replicas }}
TF_VAR_custom_domain: ${{ vars.SHARING_CUSTOM_DOMAIN }}
run: terraform apply -auto-approve tfplan
- name: Health check
if: steps.prereqs.outputs.configured == 'true'
working-directory: sharing-server/infra
run: |
HEALTH_URL=$(terraform output -raw health_url)
echo "Waiting for ${HEALTH_URL} to respond..."
for i in $(seq 1 12); do
STATUS=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "${HEALTH_URL}" || echo "000")
if [[ "$STATUS" == "200" ]]; then
echo "✅ Health check passed (attempt ${i})"
exit 0
fi
echo " attempt ${i}/12 — HTTP ${STATUS}, retrying in 10s..."
sleep 10
done
echo "❌ Health check failed after 2 minutes"
exit 1
- name: Output deployment summary
if: steps.prereqs.outputs.configured == 'true'
working-directory: sharing-server/infra
run: |
APP_URL=$(terraform output -raw app_url)
ACA_FQDN=$(terraform output -raw aca_fqdn)
OAUTH_URL=$(terraform output -raw oauth_callback_url)
DASH_URL=$(terraform output -raw dashboard_url)
HEALTH_URL=$(terraform output -raw health_url)
{
echo "## 🚀 Sharing Server Deployed"
echo ""
echo "| | URL |"
echo "|---|---|"
echo "| App | ${APP_URL} |"
echo "| Dashboard | ${DASH_URL} |"
echo "| Health | ${HEALTH_URL} |"
echo "| OAuth callback | ${OAUTH_URL} |"
echo "| ACA FQDN (CNAME target) | ${ACA_FQDN} |"
} >> "$GITHUB_STEP_SUMMARY"
if [[ "${{ needs.setup.outputs.is_prod }}" == "true" ]]; then
{
echo ""
echo "> 📋 **Custom domain DNS setup** — if \`SHARING_CUSTOM_DOMAIN\` is not yet set,"
echo "> add these records at your DNS provider, then set the variable and redeploy:"
echo "> - \`CNAME <subdomain>\` → \`${ACA_FQDN}\`"
echo "> - \`TXT asuid.<subdomain>\` → *(ACA environment verification ID from Azure portal)*"
} >> "$GITHUB_STEP_SUMMARY"
fi
if [[ "${{ needs.setup.outputs.is_prod }}" == "false" ]]; then
{
echo ""
echo "> ⚠️ **Test deployment** — the dashboard OAuth login requires the callback URL above"
echo "> to be registered in your GitHub OAuth App before the login flow will work."
} >> "$GITHUB_STEP_SUMMARY"
fi