diff --git a/.github/workflows/pr-security-review.yml b/.github/workflows/pr-security-review.yml
index 131361a93..697cdc7b1 100644
--- a/.github/workflows/pr-security-review.yml
+++ b/.github/workflows/pr-security-review.yml
@@ -167,19 +167,39 @@ jobs:
# the base branch locally too. fetch-depth: 0 grabs the full history.
fetch-depth: 0
- - name: Set origin/HEAD for /security-review skill
+ - name: Prepare base ref for /security-review skill
env:
BASE_REF: ${{ steps.pr.outputs.base_ref }}
run: |
set -euo pipefail
- # actions/checkout doesn't set up the remote's symbolic HEAD ref, so
- # `git diff origin/HEAD...` (the first command the bundled
- # /security-review skill runs) fails with "ambiguous argument
- # 'origin/HEAD...': unknown revision". Point origin/HEAD at the PR's
- # base branch so the skill resolves the diff against the right ref.
+ # The bundled /security-review skill's SessionStart hook runs
+ # `git diff --name-only origin/HEAD...` as its first command. Two
+ # things have to be true for that to succeed:
+ # 1. origin/HEAD has to be a valid ref. actions/checkout doesn't
+ # set up the remote's symbolic HEAD, so we point it at the PR's
+ # base branch.
+ # 2. The PR head and origin/ have to share a merge base in
+ # the local clone. actions/checkout@v6 with `ref: `
+ # fetches the head's history but on fork PRs may NOT fetch
+ # origin/ into the clone — leaving `git diff origin/HEAD...`
+ # to fail with "no merge base", which silently bombs the skill
+ # (num_turns=0, model never invoked) and would otherwise look
+ # like a clean review with zero findings.
+ # Fetching origin/ explicitly closes that gap.
+ git fetch --no-tags origin "+refs/heads/$BASE_REF:refs/remotes/origin/$BASE_REF"
git remote set-head origin "$BASE_REF"
git symbolic-ref refs/remotes/origin/HEAD
+ # Sanity: ensure the merge base actually resolves before we hand off
+ # to the skill. If it doesn't, fail loudly here rather than letting
+ # the skill silently bail.
+ if ! git merge-base "origin/$BASE_REF" HEAD >/dev/null; then
+ echo "::error::No merge base between HEAD and origin/$BASE_REF — /security-review cannot compute a diff."
+ exit 1
+ fi
+ echo "Merge base: $(git merge-base "origin/$BASE_REF" HEAD)"
+ echo "Files changed: $(git diff --name-only "origin/$BASE_REF...HEAD" | wc -l)"
+
- name: Configure AWS credentials (OIDC)
uses: aws-actions/configure-aws-credentials@v6
with:
@@ -213,6 +233,50 @@ jobs:
--model us.anthropic.claude-opus-4-7 --max-turns 30 --allowedTools
mcp__github_inline_comment__create_inline_comment
+ - name: Verify model actually ran
+ id: model-ran
+ # The action exits 0 even when the model was never invoked (e.g. a
+ # SessionStart hook errored before the first turn). Treating that as
+ # success would let the workflow falsely report "no high-confidence
+ # findings" — that's exactly what happened on PR #1474, where the
+ # `/security-review` skill's first `git diff origin/HEAD...` hit a
+ # "no merge base" error, the SDK still returned `subtype: success`
+ # with `num_turns: 0` and zero tokens, and the buffer was empty.
+ #
+ # The action writes its full SDK transcript to
+ # ${RUNNER_TEMP}/claude-execution-output.json. We pull the final
+ # result envelope and require that the model actually took turns and
+ # spent tokens. If it didn't, mark the run as not-actually-completed
+ # so the summary comment uses the failure branch instead of pretending
+ # there were no findings.
+ if: steps.review.conclusion == 'success' || steps.review.conclusion == 'failure'
+ env:
+ # The action exposes its full SDK transcript path as the
+ # `execution_file` step output (a JSON array of stream events; the
+ # final element is the result envelope). We fall back to the known
+ # path under RUNNER_TEMP if for some reason the output is missing.
+ OUTPUT_JSON:
+ ${{ steps.review.outputs.execution_file || format('{0}/claude-execution-output.json', runner.temp) }}
+ run: |
+ set -euo pipefail
+ if [ ! -s "$OUTPUT_JSON" ]; then
+ echo "::warning::No claude-execution-output.json found at $OUTPUT_JSON; cannot verify the model ran."
+ echo "ran=unknown" >> "$GITHUB_OUTPUT"
+ echo "num_turns=0" >> "$GITHUB_OUTPUT"
+ exit 0
+ fi
+ NUM_TURNS=$(jq -r '.[-1].num_turns // 0' "$OUTPUT_JSON")
+ IS_ERROR=$(jq -r '.[-1].is_error // false' "$OUTPUT_JSON")
+ OUTPUT_TOKENS=$(jq -r '.[-1].usage.output_tokens // 0' "$OUTPUT_JSON")
+ echo "num_turns=$NUM_TURNS, is_error=$IS_ERROR, output_tokens=$OUTPUT_TOKENS"
+ echo "num_turns=$NUM_TURNS" >> "$GITHUB_OUTPUT"
+ if [ "$IS_ERROR" = "true" ] || [ "$NUM_TURNS" = "0" ] || [ "$OUTPUT_TOKENS" = "0" ]; then
+ echo "::error::Claude Code SDK reported success but the model never ran productively (num_turns=$NUM_TURNS, output_tokens=$OUTPUT_TOKENS, is_error=$IS_ERROR). The /security-review skill likely bailed before analysis (e.g. SessionStart hook error). Refusing to report 'no findings'."
+ echo "ran=false" >> "$GITHUB_OUTPUT"
+ exit 1
+ fi
+ echo "ran=true" >> "$GITHUB_OUTPUT"
+
- name: Count buffered findings
id: findings
# Only count if the review step actually ran (success or failure - both produce
@@ -240,6 +304,8 @@ jobs:
PR_NUMBER: ${{ steps.pr.outputs.number }}
FINDING_COUNT: ${{ steps.findings.outputs.count }}
REVIEW_CONCLUSION: ${{ steps.review.conclusion }}
+ MODEL_RAN: ${{ steps.model-ran.outputs.ran }}
+ NUM_TURNS: ${{ steps.model-ran.outputs.num_turns }}
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
with:
github-token: ${{ steps.app-token.outputs.token }}
@@ -247,10 +313,19 @@ jobs:
const prNumber = parseInt(process.env.PR_NUMBER, 10);
const count = parseInt(process.env.FINDING_COUNT || '0', 10);
const conclusion = process.env.REVIEW_CONCLUSION || 'skipped';
+ const modelRan = process.env.MODEL_RAN || 'unknown';
+ const numTurns = process.env.NUM_TURNS || '0';
const runUrl = process.env.RUN_URL;
let body;
- if (conclusion === 'success') {
+ if (modelRan !== 'true') {
+ // Two cases land here, both unsafe to report as "no findings":
+ // - 'false': SDK exited 0 but transcript shows the model never ran productively
+ // (e.g. /security-review's SessionStart hook errored before the first turn).
+ // - 'unknown': claude-execution-output.json was missing, so we couldn't verify
+ // the model ran at all. Treat as not-verified rather than silently green.
+ body = `**Claude Security Review:** the review did not actually analyze this PR (model took ${numTurns} turn${numTurns === '1' ? '' : 's'} — the skill likely failed during setup). See the [run](${runUrl}) for details; a later push or re-run is needed for a real review.`;
+ } else if (conclusion === 'success') {
body =
count > 0
? `**Claude Security Review:** posted ${count} inline finding${count === 1 ? '' : 's'} on this PR. ([run](${runUrl}))`