diff --git a/.github/workflows/sync-tests.yml b/.github/workflows/sync-tests.yml
new file mode 100644
index 0000000000..2197eb5751
--- /dev/null
+++ b/.github/workflows/sync-tests.yml
@@ -0,0 +1,161 @@
+name: A/V Sync Tests
+
+# Verifies audio/video sync correctness on every platform:
+#   1. Unit + property tests for the timestamp pipeline (encoders, drift
+#      trackers, muxers).
+#   2. The synthetic device matrix: fake cameras/screens/microphones across
+#      frame rates, sample rates, channel counts and delivery pathologies
+#      (jitter, drops, static-screen gaps), driven through the real recording
+#      pipeline and verified at the container level. No capture hardware or
+#      GPU required, so results are deterministic on hosted runners.
+#
+# Findings are published as a job-summary table and a JSON artifact per OS.
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 5 * * *"
+  pull_request:
+    paths:
+      - "crates/recording/**"
+      - "crates/enc-ffmpeg/**"
+      - "crates/enc-avfoundation/**"
+      - "crates/enc-mediafoundation/**"
+      - "crates/timestamp/**"
+      - "crates/rendering/**"
+      - "crates/media-info/**"
+      - ".github/workflows/sync-tests.yml"
+
+concurrency:
+  group: sync-tests-${{ github.head_ref || github.ref_name }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  sync-tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        runner:
+          - macos-latest
+          - windows-2022
+          - ubuntu-24.04
+    runs-on: ${{ matrix.runner }}
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Rust setup
+        uses: dtolnay/rust-toolchain@1.88.0
+
+      - name: Rust cache
+        uses: ./.github/actions/setup-rust-cache
+        with:
+          target: ${{ runner.os == 'Windows' && 'x86_64-pc-windows-msvc' || runner.os == 'macOS' && 'aarch64-apple-darwin' || 'x86_64-unknown-linux-gnu' }}
+
+      - name: Install desktop dependencies
+        uses: ./.github/actions/install-desktop-deps
+
+      - name: Setup Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: 24
+
+      - name: Native dependencies
+        env:
+          RUST_TARGET_TRIPLE: ${{ runner.os == 'Linux' && 'x86_64-unknown-linux-gnu' || runner.os == 'Windows' && 'x86_64-pc-windows-msvc' || 'aarch64-apple-darwin' }}
+        run: node scripts/setup.js
+
+      - name: Add FFmpeg DLLs to PATH
+        if: runner.os == 'Windows'
+        shell: pwsh
+        run: Add-Content -Path $env:GITHUB_PATH -Value "${{ github.workspace }}\\target\\ffmpeg\\bin"
+
+      - name: Install software Vulkan driver (Linux)
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y mesa-vulkan-drivers libvulkan1
+
+      - name: Timestamp pipeline unit + property tests
+        shell: bash
+        run: |
+          cargo test --locked -p cap-timestamp -p cap-enc-ffmpeg
+          cargo test --locked -p cap-recording --lib
+          cargo test --locked -p cap-rendering
+
+      - name: Synthetic device matrix
+        id: matrix
+        continue-on-error: true
+        shell: bash
+        env:
+          CAP_SYNC_MATRIX_REPORT: ${{ github.workspace }}/sync-matrix-${{ matrix.runner }}.json
+          CAP_SYNC_MATRIX_RANDOM_CASES: ${{ github.event_name == 'schedule' && '40' || '6' }}
+        run: |
+          cargo test --locked -p cap-recording --test sync_matrix -- --nocapture
+
+      # Verifies the editor's playback machinery (decoders, frame scheduling,
+      # audio pipeline) preserves sync. The fixture recording is generated
+      # through the real recording pipeline, so no capture hardware is needed;
+      # rendering uses the platform's software adapter where no GPU exists.
+      # 30s of pattern stabilizes the drift slope against frame-quantization
+      # noise. Playback runs at the default 30 fps: lower rates trip the audio
+      # sync policy's drift-correction threshold every few frames and accrue
+      # real (policy-induced) drift that fails the gate.
+      #
+      # Linux-only: the Windows WARP adapter composites blank frames and the
+      # macOS runners' paravirtualized Metal collapses to ~2 fps presentation
+      # regardless of decoder, so neither can sustain a wall-clock playback
+      # measurement. The decoder logic under test (FFmpeg gap holds) is fully
+      # exercised here; the macOS AVAssetReader path is covered by running
+      # `cap selftest playback` locally on real hardware.
+      - name: Editor playback sync harness
+        if: runner.os == 'Linux'
+        shell: bash
+        run: |
+          cargo run --locked -p cap -- --log-level info selftest playback --duration 30 --json
+
+      - name: Report findings
+        if: always()
+        shell: bash
+        run: |
+          REPORT="${{ github.workspace }}/sync-matrix-${{ matrix.runner }}.json"
+          {
+            echo "## A/V sync matrix — ${{ matrix.runner }}"
+            echo ""
+            if [ -f "$REPORT" ]; then
+              PYTHONIOENCODING=utf-8 python3 - "$REPORT" << 'PYEOF'
+          import json, sys
+          report = json.load(open(sys.argv[1]))
+          print(f"Randomized seed: `{report.get('seed')}` (rerun with CAP_SYNC_MATRIX_SEED)")
+          print()
+          print("| Case | Result | Detail |")
+          print("| --- | --- | --- |")
+          for case in report.get("cases", []):
+              verdict = "PASS" if case["pass"] else "FAIL"
+              detail = case["detail"].replace("|", "\\|")
+              print(f"| {case['name']} | {verdict} | {detail} |")
+          PYEOF
+            else
+              echo "No report produced — the matrix crashed before writing results."
+            fi
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload findings
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: sync-matrix-${{ matrix.runner }}
+          path: ${{ github.workspace }}/sync-matrix-${{ matrix.runner }}.json
+          if-no-files-found: ignore
+
+      - name: Fail on matrix failures
+        if: steps.matrix.outcome == 'failure'
+        shell: bash
+        run: |
+          echo "Synthetic sync matrix reported failures; see the job summary." >&2
+          exit 1
diff --git a/Cargo.lock b/Cargo.lock
index 9d7cbc65d1..33d20ab582 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8,6 +8,22 @@ version = "0.11.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3"
 
+[[package]]
+name = "ab_glyph"
+version = "0.2.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01c0457472c38ea5bd1c3b5ada5e368271cb550be7a4ca4a0b4634e9913f6cc2"
+dependencies = [
+ "ab_glyph_rasterizer",
+ "owned_ttf_parser",
+]
+
+[[package]]
+name = "ab_glyph_rasterizer"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "366ffbaa4442f4684d91e2cd7c5ea7c4ed8add41959a31447066e279e432b618"
+
 [[package]]
 name = "actix-codec"
 version = "0.5.2"
@@ -250,6 +266,31 @@ dependencies = [
  "pkg-config",
 ]
 
+[[package]]
+name = "android-activity"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f2a1bb052857d5dd49572219344a7332b31b76405648eabac5bc68978251bcd"
+dependencies = [
+ "android-properties",
+ "bitflags 2.9.4",
+ "cc",
+ "jni 0.22.4",
+ "libc",
+ "log",
+ "ndk 0.9.0",
+ "ndk-context",
+ "ndk-sys 0.6.0+11769913",
+ "num_enum",
+ "thiserror 2.0.16",
+]
+
+[[package]]
+name = "android-properties"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc7eb209b1518d6bb87b283c20095f5228ecda460da70b44f0802523dea6da04"
+
 [[package]]
 name = "android_system_properties"
 version = "0.1.5"
@@ -354,7 +395,7 @@ dependencies = [
  "image 0.25.8",
  "log",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-core-foundation",
  "objc2-core-graphics",
  "objc2-foundation 0.3.1",
@@ -388,6 +429,12 @@ version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
+[[package]]
+name = "as-raw-xcb-connection"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "175571dd1d178ced59193a6fc02dde1b972eb0bc56c892cde9beeceac5bf0f6b"
+
 [[package]]
 name = "ash"
 version = "0.38.0+1.3.281"
@@ -1076,6 +1123,32 @@ dependencies = [
  "system-deps 6.2.2",
 ]
 
+[[package]]
+name = "calloop"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b99da2f8558ca23c71f4fd15dc57c906239752dd27ff3c00a1d56b685b7cbfec"
+dependencies = [
+ "bitflags 2.9.4",
+ "log",
+ "polling",
+ "rustix 0.38.44",
+ "slab",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "calloop-wayland-source"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95a66a987056935f7efce4ab5668920b5d0dac4a7c99991a67395f13702ddd20"
+dependencies = [
+ "calloop",
+ "rustix 0.38.44",
+ "wayland-backend",
+ "wayland-client",
+]
+
 [[package]]
 name = "camino"
 version = "1.2.0"
@@ -1092,13 +1165,17 @@ dependencies = [
  "cap-automation",
  "cap-camera",
  "cap-cli-install",
+ "cap-editor",
  "cap-export",
+ "cap-media-info",
  "cap-project",
  "cap-recording",
+ "cap-timestamp",
  "chrono",
  "cidre",
  "clap",
  "clap_complete",
+ "cpal 0.15.3 (git+https://github.com/CapSoftware/cpal?rev=3cc779a7b4ca)",
  "dirs 6.0.0",
  "ffmpeg-next",
  "flume",
@@ -1106,11 +1183,13 @@ dependencies = [
  "image 0.25.8",
  "kameo",
  "libc",
+ "relative-path",
  "reqwest 0.12.24",
  "scap-screencapturekit",
  "scap-targets",
  "serde",
  "serde_json",
+ "softbuffer",
  "tempfile",
  "tokio",
  "tokio-util",
@@ -1118,6 +1197,7 @@ dependencies = [
  "tracing-subscriber",
  "uuid",
  "windows 0.60.0",
+ "winit",
  "workspace-hack",
 ]
 
@@ -1294,7 +1374,7 @@ version = "0.0.0"
 dependencies = [
  "hex",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "serde",
  "sha2",
  "specta",
@@ -1366,7 +1446,7 @@ dependencies = [
  "nix 0.29.0",
  "objc",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-foundation 0.3.1",
  "opentelemetry",
  "opentelemetry-otlp",
@@ -1739,7 +1819,7 @@ dependencies = [
  "libproc",
  "objc",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "parking_lot",
  "pipewire",
  "relative-path",
@@ -2145,7 +2225,7 @@ dependencies = [
  "clipboard-win",
  "image 0.25.8",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-foundation 0.3.1",
  "windows 0.59.0",
  "x11rb",
@@ -2374,6 +2454,19 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
+[[package]]
+name = "core-graphics"
+version = "0.23.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081"
+dependencies = [
+ "bitflags 1.3.2",
+ "core-foundation 0.9.4",
+ "core-graphics-types 0.1.3",
+ "foreign-types 0.5.0",
+ "libc",
+]
+
 [[package]]
 name = "core-graphics"
 version = "0.24.0"
@@ -2520,7 +2613,7 @@ dependencies = [
  "core-foundation-sys",
  "coreaudio-rs",
  "dasp_sample",
- "jni",
+ "jni 0.21.1",
  "js-sys",
  "libc",
  "mach2",
@@ -2542,7 +2635,7 @@ dependencies = [
  "core-foundation-sys",
  "coreaudio-rs",
  "dasp_sample",
- "jni",
+ "jni 0.21.1",
  "js-sys",
  "libc",
  "mach2",
@@ -2709,6 +2802,21 @@ dependencies = [
  "syn 2.0.106",
 ]
 
+[[package]]
+name = "ctor"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83cf0d42651b16c6dfe68685716d18480d18a9c39c62d76e8cf3eb6ed5d8bcbf"
+dependencies = [
+ "dtor",
+]
+
+[[package]]
+name = "cursor-icon"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f27ae1dd37df86211c42e150270f82743308803d90a6f6e6651cd730d5e1732f"
+
 [[package]]
 name = "darling"
 version = "0.20.11"
@@ -3101,6 +3209,45 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "drm"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "98888c4bbd601524c11a7ed63f814b8825f420514f78e96f752c437ae9cbb5d1"
+dependencies = [
+ "bitflags 2.9.4",
+ "bytemuck",
+ "drm-ffi",
+ "drm-fourcc",
+ "rustix 0.38.44",
+]
+
+[[package]]
+name = "drm-ffi"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97c98727e48b7ccb4f4aea8cfe881e5b07f702d17b7875991881b41af7278d53"
+dependencies = [
+ "drm-sys",
+ "rustix 0.38.44",
+]
+
+[[package]]
+name = "drm-fourcc"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0aafbcdb8afc29c1a7ee5fbe53b5d62f4565b35a042a662ca9fecd0b54dae6f4"
+
+[[package]]
+name = "drm-sys"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd39dde40b6e196c2e8763f23d119ddb1a8714534bf7d77fa97a65b0feda3986"
+dependencies = [
+ "libc",
+ "linux-raw-sys 0.6.5",
+]
+
 [[package]]
 name = "dtoa"
 version = "1.0.10"
@@ -3116,6 +3263,12 @@ dependencies = [
  "dtoa",
 ]
 
+[[package]]
+name = "dtor"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edf234dd1594d6dd434a8fb8cada51ddbbc593e40e4a01556a0b31c62da2775b"
+
 [[package]]
 name = "dunce"
 version = "1.0.5"
@@ -4042,7 +4195,7 @@ dependencies = [
  "crossbeam-channel",
  "keyboard-types",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "once_cell",
  "serde",
  "thiserror 2.0.16",
@@ -5003,19 +5156,68 @@ dependencies = [
  "cesu8",
  "cfg-if",
  "combine",
- "jni-sys",
+ "jni-sys 0.3.0",
  "log",
  "thiserror 1.0.69",
  "walkdir",
  "windows-sys 0.45.0",
 ]
 
+[[package]]
+name = "jni"
+version = "0.22.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5efd9a482cf3a427f00d6b35f14332adc7902ce91efb778580e180ff90fa3498"
+dependencies = [
+ "cfg-if",
+ "combine",
+ "jni-macros",
+ "jni-sys 0.4.1",
+ "log",
+ "simd_cesu8",
+ "thiserror 2.0.16",
+ "walkdir",
+ "windows-link 0.2.0",
+]
+
+[[package]]
+name = "jni-macros"
+version = "0.22.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "rustc_version",
+ "simd_cesu8",
+ "syn 2.0.106",
+]
+
 [[package]]
 name = "jni-sys"
 version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
 
+[[package]]
+name = "jni-sys"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2"
+dependencies = [
+ "jni-sys-macros",
+]
+
+[[package]]
+name = "jni-sys-macros"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264"
+dependencies = [
+ "quote",
+ "syn 2.0.106",
+]
+
 [[package]]
 name = "jobserver"
 version = "0.1.34"
@@ -5301,7 +5503,7 @@ checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb"
 dependencies = [
  "bitflags 2.9.4",
  "libc",
- "redox_syscall",
+ "redox_syscall 0.5.17",
 ]
 
 [[package]]
@@ -5346,6 +5548,12 @@ version = "0.4.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a385b1be4e5c3e362ad2ffa73c392e53f031eaa5b7d648e64cd87f27f6063d7"
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.11.0"
@@ -5830,7 +6038,7 @@ dependencies = [
  "gtk",
  "keyboard-types",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-core-foundation",
  "objc2-foundation 0.3.1",
  "once_cell",
@@ -5934,7 +6142,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2076a31b7010b17a38c01907c45b945e8f11495ee4dd588309718901b1f7a5b7"
 dependencies = [
  "bitflags 2.9.4",
- "jni-sys",
+ "jni-sys 0.3.0",
  "log",
  "ndk-sys 0.5.0+25.2.9519653",
  "num_enum",
@@ -5948,7 +6156,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c3f42e7bbe13d351b6bead8286a43aac9534b82bd3cc43e47037f012ebfd62d4"
 dependencies = [
  "bitflags 2.9.4",
- "jni-sys",
+ "jni-sys 0.3.0",
  "log",
  "ndk-sys 0.6.0+11769913",
  "num_enum",
@@ -5968,7 +6176,7 @@ version = "0.5.0+25.2.9519653"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8c196769dd60fd4f363e11d948139556a344e79d451aeb2fa2fd040738ef7691"
 dependencies = [
- "jni-sys",
+ "jni-sys 0.3.0",
 ]
 
 [[package]]
@@ -5977,7 +6185,7 @@ version = "0.6.0+11769913"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ee6cda3051665f1fb8d9e08fc35c96d5a244fb1be711a03b71118828afc9a873"
 dependencies = [
- "jni-sys",
+ "jni-sys 0.3.0",
 ]
 
 [[package]]
@@ -6247,6 +6455,22 @@ dependencies = [
  "objc2-exception-helper",
 ]
 
+[[package]]
+name = "objc2-app-kit"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e4e89ad9e3d7d297152b17d39ed92cd50ca8063a89a9fa569046d41568891eff"
+dependencies = [
+ "bitflags 2.9.4",
+ "block2 0.5.1",
+ "libc",
+ "objc2 0.5.2",
+ "objc2-core-data 0.2.2",
+ "objc2-core-image 0.2.2",
+ "objc2-foundation 0.2.2",
+ "objc2-quartz-core 0.2.2",
+]
+
 [[package]]
 name = "objc2-app-kit"
 version = "0.3.1"
@@ -6257,11 +6481,11 @@ dependencies = [
  "block2 0.6.1",
  "libc",
  "objc2 0.6.2",
- "objc2-cloud-kit",
- "objc2-core-data",
+ "objc2-cloud-kit 0.3.1",
+ "objc2-core-data 0.3.1",
  "objc2-core-foundation",
  "objc2-core-graphics",
- "objc2-core-image",
+ "objc2-core-image 0.3.1",
  "objc2-foundation 0.3.1",
  "objc2-quartz-core 0.3.1",
 ]
@@ -6279,7 +6503,7 @@ dependencies = [
  "objc2-avf-audio",
  "objc2-core-foundation",
  "objc2-core-graphics",
- "objc2-core-image",
+ "objc2-core-image 0.3.1",
  "objc2-core-media",
  "objc2-core-video",
  "objc2-foundation 0.3.1",
@@ -6296,6 +6520,19 @@ dependencies = [
  "objc2-foundation 0.3.1",
 ]
 
+[[package]]
+name = "objc2-cloud-kit"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74dd3b56391c7a0596a295029734d3c1c5e7e510a4cb30245f8221ccea96b009"
+dependencies = [
+ "bitflags 2.9.4",
+ "block2 0.5.1",
+ "objc2 0.5.2",
+ "objc2-core-location",
+ "objc2-foundation 0.2.2",
+]
+
 [[package]]
 name = "objc2-cloud-kit"
 version = "0.3.1"
@@ -6307,6 +6544,17 @@ dependencies = [
  "objc2-foundation 0.3.1",
 ]
 
+[[package]]
+name = "objc2-contacts"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5ff520e9c33812fd374d8deecef01d4a840e7b41862d849513de77e44aa4889"
+dependencies = [
+ "block2 0.5.1",
+ "objc2 0.5.2",
+ "objc2-foundation 0.2.2",
+]
+
 [[package]]
 name = "objc2-core-audio"
 version = "0.3.1"
@@ -6329,6 +6577,18 @@ dependencies = [
  "objc2 0.6.2",
 ]
 
+[[package]]
+name = "objc2-core-data"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "617fbf49e071c178c0b24c080767db52958f716d9eabdf0890523aeae54773ef"
+dependencies = [
+ "bitflags 2.9.4",
+ "block2 0.5.1",
+ "objc2 0.5.2",
+ "objc2-foundation 0.2.2",
+]
+
 [[package]]
 name = "objc2-core-data"
 version = "0.3.1"
@@ -6364,6 +6624,18 @@ dependencies = [
  "objc2-io-surface",
 ]
 
+[[package]]
+name = "objc2-core-image"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55260963a527c99f1819c4f8e3b47fe04f9650694ef348ffd2227e8196d34c80"
+dependencies = [
+ "block2 0.5.1",
+ "objc2 0.5.2",
+ "objc2-foundation 0.2.2",
+ "objc2-metal",
+]
+
 [[package]]
 name = "objc2-core-image"
 version = "0.3.1"
@@ -6374,6 +6646,18 @@ dependencies = [
  "objc2-foundation 0.3.1",
 ]
 
+[[package]]
+name = "objc2-core-location"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "000cfee34e683244f284252ee206a27953279d370e309649dc3ee317b37e5781"
+dependencies = [
+ "block2 0.5.1",
+ "objc2 0.5.2",
+ "objc2-contacts",
+ "objc2-foundation 0.2.2",
+]
+
 [[package]]
 name = "objc2-core-media"
 version = "0.3.1"
@@ -6425,6 +6709,7 @@ checksum = "0ee638a5da3799329310ad4cfa62fbf045d5f56e3ef5ba4149e7452dcf89d5a8"
 dependencies = [
  "bitflags 2.9.4",
  "block2 0.5.1",
+ "dispatch",
  "libc",
  "objc2 0.5.2",
 ]
@@ -6473,6 +6758,18 @@ dependencies = [
  "objc2-core-foundation",
 ]
 
+[[package]]
+name = "objc2-link-presentation"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1a1ae721c5e35be65f01a03b6d2ac13a54cb4fa70d8a5da293d7b0020261398"
+dependencies = [
+ "block2 0.5.1",
+ "objc2 0.5.2",
+ "objc2-app-kit 0.2.2",
+ "objc2-foundation 0.2.2",
+]
+
 [[package]]
 name = "objc2-metal"
 version = "0.2.2"
@@ -6493,7 +6790,7 @@ checksum = "26bb88504b5a050dbba515d2414607bf5e57dd56b107bc5f0351197a3e7bdc5d"
 dependencies = [
  "bitflags 2.9.4",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-foundation 0.3.1",
 ]
 
@@ -6532,6 +6829,37 @@ dependencies = [
  "objc2-core-foundation",
 ]
 
+[[package]]
+name = "objc2-symbols"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a684efe3dec1b305badae1a28f6555f6ddd3bb2c2267896782858d5a78404dc"
+dependencies = [
+ "objc2 0.5.2",
+ "objc2-foundation 0.2.2",
+]
+
+[[package]]
+name = "objc2-ui-kit"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8bb46798b20cd6b91cbd113524c490f1686f4c4e8f49502431415f3512e2b6f"
+dependencies = [
+ "bitflags 2.9.4",
+ "block2 0.5.1",
+ "objc2 0.5.2",
+ "objc2-cloud-kit 0.2.2",
+ "objc2-core-data 0.2.2",
+ "objc2-core-image 0.2.2",
+ "objc2-core-location",
+ "objc2-foundation 0.2.2",
+ "objc2-link-presentation",
+ "objc2-quartz-core 0.2.2",
+ "objc2-symbols",
+ "objc2-uniform-type-identifiers",
+ "objc2-user-notifications",
+]
+
 [[package]]
 name = "objc2-ui-kit"
 version = "0.3.1"
@@ -6544,6 +6872,30 @@ dependencies = [
  "objc2-foundation 0.3.1",
 ]
 
+[[package]]
+name = "objc2-uniform-type-identifiers"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44fa5f9748dbfe1ca6c0b79ad20725a11eca7c2218bceb4b005cb1be26273bfe"
+dependencies = [
+ "block2 0.5.1",
+ "objc2 0.5.2",
+ "objc2-foundation 0.2.2",
+]
+
+[[package]]
+name = "objc2-user-notifications"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76cfcbf642358e8689af64cee815d139339f3ed8ad05103ed5eaf73db8d84cb3"
+dependencies = [
+ "bitflags 2.9.4",
+ "block2 0.5.1",
+ "objc2 0.5.2",
+ "objc2-core-location",
+ "objc2-foundation 0.2.2",
+]
+
 [[package]]
 name = "objc2-web-kit"
 version = "0.3.1"
@@ -6553,7 +6905,7 @@ dependencies = [
  "bitflags 2.9.4",
  "block2 0.6.1",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-core-foundation",
  "objc2-foundation 0.3.1",
  "objc2-javascript-core",
@@ -6593,7 +6945,7 @@ version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e8b61bebd49e5d43f5f8cc7ee2891c16e0f41ec7954d36bcb6c14c5e0de867fb"
 dependencies = [
- "jni",
+ "jni 0.21.1",
  "ndk 0.8.0",
  "ndk-context",
  "num-derive",
@@ -6795,6 +7147,16 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8c04f5d74368e4d0dfe06c45c8627c81bd7c317d52762d118fb9b3076f6420fd"
 
+[[package]]
+name = "orbclient"
+version = "0.3.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5df339f526ea9a60e371768d50efc2f2508c7203290731565d1f7a6f71d21747"
+dependencies = [
+ "libc",
+ "libredox",
+]
+
 [[package]]
 name = "ordered-channel"
 version = "1.2.0"
@@ -6894,6 +7256,15 @@ dependencies = [
  "thiserror 2.0.16",
 ]
 
+[[package]]
+name = "owned_ttf_parser"
+version = "0.25.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36820e9051aca1014ddc75770aab4d68bc1e9e632f0f5627c4086bc216fb583b"
+dependencies = [
+ "ttf-parser 0.25.1",
+]
+
 [[package]]
 name = "pango"
 version = "0.18.3"
@@ -6959,7 +7330,7 @@ checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
 dependencies = [
  "cfg-if",
  "libc",
- "redox_syscall",
+ "redox_syscall 0.5.17",
  "smallvec",
  "windows-targets 0.52.6",
 ]
@@ -7913,6 +8284,15 @@ dependencies = [
  "rustfft",
 ]
 
+[[package]]
+name = "redox_syscall"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
+dependencies = [
+ "bitflags 1.3.2",
+]
+
 [[package]]
 name = "redox_syscall"
 version = "0.5.17"
@@ -8162,7 +8542,7 @@ dependencies = [
  "js-sys",
  "log",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-core-foundation",
  "objc2-foundation 0.3.1",
  "raw-window-handle",
@@ -8492,7 +8872,7 @@ dependencies = [
  "futures",
  "inquire",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-foundation 0.3.1",
  "scap-targets",
  "tokio",
@@ -8637,6 +9017,19 @@ dependencies = [
  "untrusted",
 ]
 
+[[package]]
+name = "sctk-adwaita"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6277f0217056f77f1d8f49f2950ac6c278c0d607c45f5ee99328d792ede24ec"
+dependencies = [
+ "ab_glyph",
+ "log",
+ "memmap2",
+ "smithay-client-toolkit",
+ "tiny-skia",
+]
+
 [[package]]
 name = "security-framework"
 version = "2.11.1"
@@ -9124,6 +9517,16 @@ version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
 
+[[package]]
+name = "simd_cesu8"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33"
+dependencies = [
+ "rustc_version",
+ "simdutf8",
+]
+
 [[package]]
 name = "simd_helpers"
 version = "0.1.0"
@@ -9133,6 +9536,12 @@ dependencies = [
  "quote",
 ]
 
+[[package]]
+name = "simdutf8"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
+
 [[package]]
 name = "simplecss"
 version = "0.2.2"
@@ -9224,11 +9633,39 @@ dependencies = [
  "syn 2.0.106",
 ]
 
+[[package]]
+name = "smithay-client-toolkit"
+version = "0.19.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3457dea1f0eb631b4034d61d4d8c32074caa6cd1ab2d59f2327bd8461e2c0016"
+dependencies = [
+ "bitflags 2.9.4",
+ "calloop",
+ "calloop-wayland-source",
+ "cursor-icon",
+ "libc",
+ "log",
+ "memmap2",
+ "rustix 0.38.44",
+ "thiserror 1.0.69",
+ "wayland-backend",
+ "wayland-client",
+ "wayland-csd-frame",
+ "wayland-cursor",
+ "wayland-protocols",
+ "wayland-protocols-wlr",
+ "wayland-scanner",
+ "xkeysym",
+]
+
 [[package]]
 name = "smol_str"
 version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dd538fb6910ac1099850255cf94a94df6551fbdd602454387d0adb2d1ca6dead"
+dependencies = [
+ "serde",
+]
 
 [[package]]
 name = "socket2"
@@ -9267,20 +9704,30 @@ version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "18051cdd562e792cad055119e0cdb2cfc137e44e3987532e0f9659a77931bb08"
 dependencies = [
+ "as-raw-xcb-connection",
  "bytemuck",
  "cfg_aliases 0.2.1",
  "core-graphics 0.24.0",
+ "drm",
+ "fastrand",
  "foreign-types 0.5.0",
  "js-sys",
  "log",
+ "memmap2",
  "objc2 0.5.2",
  "objc2-foundation 0.2.2",
  "objc2-quartz-core 0.2.2",
  "raw-window-handle",
- "redox_syscall",
+ "redox_syscall 0.5.17",
+ "rustix 0.38.44",
+ "tiny-xlib",
  "wasm-bindgen",
+ "wayland-backend",
+ "wayland-client",
+ "wayland-sys",
  "web-sys",
  "windows-sys 0.59.0",
+ "x11rb",
 ]
 
 [[package]]
@@ -9748,7 +10195,7 @@ dependencies = [
  "gdkwayland-sys",
  "gdkx11-sys",
  "gtk",
- "jni",
+ "jni 0.21.1",
  "lazy_static",
  "libc",
  "log",
@@ -9756,7 +10203,7 @@ dependencies = [
  "ndk-context",
  "ndk-sys 0.6.0+11769913",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-foundation 0.3.1",
  "once_cell",
  "parking_lot",
@@ -9824,15 +10271,15 @@ dependencies = [
  "http 1.3.1",
  "http-range",
  "image 0.25.8",
- "jni",
+ "jni 0.21.1",
  "libc",
  "log",
  "mime",
  "muda",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-foundation 0.3.1",
- "objc2-ui-kit",
+ "objc2-ui-kit 0.3.1",
  "objc2-web-kit",
  "percent-encoding",
  "plist",
@@ -10112,7 +10559,7 @@ checksum = "786156aa8e89e03d271fbd3fe642207da8e65f3c961baa9e2930f332bf80a1f5"
 dependencies = [
  "dunce",
  "glob",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-foundation 0.3.1",
  "open",
  "schemars 0.8.22",
@@ -10295,9 +10742,9 @@ dependencies = [
  "dpi",
  "gtk",
  "http 1.3.1",
- "jni",
+ "jni 0.21.1",
  "objc2 0.6.2",
- "objc2-ui-kit",
+ "objc2-ui-kit 0.3.1",
  "objc2-web-kit",
  "raw-window-handle",
  "serde",
@@ -10318,10 +10765,10 @@ checksum = "c1fe9d48bd122ff002064e88cfcd7027090d789c4302714e68fcccba0f4b7807"
 dependencies = [
  "gtk",
  "http 1.3.1",
- "jni",
+ "jni 0.21.1",
  "log",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-foundation 0.3.1",
  "once_cell",
  "percent-encoding",
@@ -10374,7 +10821,7 @@ dependencies = [
  "anyhow",
  "brotli",
  "cargo_metadata",
- "ctor",
+ "ctor 0.2.9",
  "dunce",
  "glob",
  "html5ever",
@@ -10603,6 +11050,19 @@ dependencies = [
  "strict-num",
 ]
 
+[[package]]
+name = "tiny-xlib"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a90a0ca3ee6a69f2ad28fd11621a4c3f03b371f366be500b64df260c4ffbafb4"
+dependencies = [
+ "as-raw-xcb-connection",
+ "ctor 0.10.1",
+ "libloading 0.8.8",
+ "pkg-config",
+ "tracing",
+]
+
 [[package]]
 name = "tinystr"
 version = "0.8.1"
@@ -11084,7 +11544,7 @@ dependencies = [
  "libappindicator",
  "muda",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-core-foundation",
  "objc2-core-graphics",
  "objc2-foundation 0.3.1",
@@ -11710,6 +12170,28 @@ dependencies = [
  "wayland-scanner",
 ]
 
+[[package]]
+name = "wayland-csd-frame"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "625c5029dbd43d25e6aa9615e88b829a5cad13b2819c4ae129fdbb7c31ab4c7e"
+dependencies = [
+ "bitflags 2.9.4",
+ "cursor-icon",
+ "wayland-backend",
+]
+
+[[package]]
+name = "wayland-cursor"
+version = "0.31.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "447ccc440a881271b19e9989f75726d60faa09b95b0200a9b7eb5cc47c3eeb29"
+dependencies = [
+ "rustix 1.1.2",
+ "wayland-client",
+ "xcursor",
+]
+
 [[package]]
 name = "wayland-protocols"
 version = "0.32.9"
@@ -11722,6 +12204,19 @@ dependencies = [
  "wayland-scanner",
 ]
 
+[[package]]
+name = "wayland-protocols-plasma"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a07a14257c077ab3279987c4f8bb987851bf57081b93710381daea94f2c2c032"
+dependencies = [
+ "bitflags 2.9.4",
+ "wayland-backend",
+ "wayland-client",
+ "wayland-protocols",
+ "wayland-scanner",
+]
+
 [[package]]
 name = "wayland-protocols-wlr"
 version = "0.3.9"
@@ -11754,6 +12249,7 @@ checksum = "34949b42822155826b41db8e5d0c1be3a2bd296c747577a43a3e6daefc296142"
 dependencies = [
  "dlib",
  "log",
+ "once_cell",
  "pkg-config",
 ]
 
@@ -12112,7 +12608,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9bec5a31f3f9362f2258fd0e9c9dd61a9ca432e7306cc78c444258f0dce9a9c"
 dependencies = [
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-core-foundation",
  "objc2-foundation 0.3.1",
  "raw-window-handle",
@@ -12763,6 +13259,58 @@ version = "0.53.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
 
+[[package]]
+name = "winit"
+version = "0.30.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6755fa58a9f8350bd1e472d4c3fcc25f824ec358933bba33306d0b63df5978d"
+dependencies = [
+ "ahash",
+ "android-activity",
+ "atomic-waker",
+ "bitflags 2.9.4",
+ "block2 0.5.1",
+ "bytemuck",
+ "calloop",
+ "cfg_aliases 0.2.1",
+ "concurrent-queue",
+ "core-foundation 0.9.4",
+ "core-graphics 0.23.2",
+ "cursor-icon",
+ "dpi",
+ "js-sys",
+ "libc",
+ "memmap2",
+ "ndk 0.9.0",
+ "objc2 0.5.2",
+ "objc2-app-kit 0.2.2",
+ "objc2-foundation 0.2.2",
+ "objc2-ui-kit 0.2.2",
+ "orbclient",
+ "percent-encoding",
+ "pin-project",
+ "raw-window-handle",
+ "redox_syscall 0.4.1",
+ "rustix 0.38.44",
+ "sctk-adwaita",
+ "smithay-client-toolkit",
+ "smol_str",
+ "tracing",
+ "unicode-segmentation",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "wayland-backend",
+ "wayland-client",
+ "wayland-protocols",
+ "wayland-protocols-plasma",
+ "web-sys",
+ "web-time",
+ "windows-sys 0.52.0",
+ "x11-dl",
+ "x11rb",
+ "xkbcommon-dl",
+]
+
 [[package]]
 name = "winnow"
 version = "0.5.40"
@@ -12925,15 +13473,15 @@ dependencies = [
  "html5ever",
  "http 1.3.1",
  "javascriptcore-rs",
- "jni",
+ "jni 0.21.1",
  "kuchikiki",
  "libc",
  "ndk 0.9.0",
  "objc2 0.6.2",
- "objc2-app-kit",
+ "objc2-app-kit 0.3.1",
  "objc2-core-foundation",
  "objc2-foundation 0.3.1",
- "objc2-ui-kit",
+ "objc2-ui-kit 0.3.1",
  "objc2-web-kit",
  "once_cell",
  "percent-encoding",
@@ -12979,7 +13527,11 @@ version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9993aa5be5a26815fe2c3eacfc1fde061fc1a1f094bf1ad2a18bf9c495dd7414"
 dependencies = [
+ "as-raw-xcb-connection",
  "gethostname",
+ "libc",
+ "libloading 0.8.8",
+ "once_cell",
  "rustix 1.1.2",
  "x11rb-protocol",
 ]
@@ -13000,6 +13552,25 @@ dependencies = [
  "rustix 1.1.2",
 ]
 
+[[package]]
+name = "xcursor"
+version = "0.3.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bec9e4a500ca8864c5b47b8b482a73d62e4237670e5b5f1d6b9e3cae50f28f2b"
+
+[[package]]
+name = "xkbcommon-dl"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d039de8032a9a8856a6be89cea3e5d12fdd82306ab7c94d74e6deab2460651c5"
+dependencies = [
+ "bitflags 2.9.4",
+ "dlib",
+ "log",
+ "once_cell",
+ "xkeysym",
+]
+
 [[package]]
 name = "xkeysym"
 version = "0.2.1"
diff --git a/apps/cli/Cargo.toml b/apps/cli/Cargo.toml
index 9d7f7d024b..082df3cb90 100644
--- a/apps/cli/Cargo.toml
+++ b/apps/cli/Cargo.toml
@@ -8,7 +8,11 @@ clap = { version = "4.5.23", features = ["derive"] }
 clap_complete = "4.5.38"
 cap-project = { path = "../../crates/project" }
 cap-recording = { path = "../../crates/recording" }
+cap-editor = { path = "../../crates/editor" }
 cap-export = { path = "../../crates/export" }
+cap-media-info = { path = "../../crates/media-info" }
+cap-timestamp = { path = "../../crates/timestamp" }
+relative-path = "1.9.3"
 cap-automation = { path = "../../crates/automation" }
 cap-camera = { path = "../../crates/camera" }
 cap-cli-install = { path = "../../crates/cli-install" }
@@ -28,6 +32,9 @@ image = "0.25.2"
 chrono = "0.4.31"
 tracing.workspace = true
 tracing-subscriber = "0.3.19"
+cpal = { workspace = true }
+winit = "0.30"
+softbuffer = "0.4"
 workspace-hack = { version = "0.1", path = "../../crates/workspace-hack" }
 
 [target.'cfg(unix)'.dependencies]
diff --git a/apps/cli/src/main.rs b/apps/cli/src/main.rs
index 0782da5b36..af1f9e4f24 100644
--- a/apps/cli/src/main.rs
+++ b/apps/cli/src/main.rs
@@ -7,6 +7,7 @@ mod project;
 mod record;
 mod recordings;
 mod screenshot;
+mod selftest;
 mod session;
 mod targets;
 mod update;
@@ -196,6 +197,8 @@ enum Commands {
     Targets(TargetsArgs),
     /// Report CLI environment and capture-readiness diagnostics
     Doctor(FormatArgs),
+    /// Run end-to-end diagnostics that verify Cap works on this machine
+    Selftest(selftest::SelftestArgs),
     /// Print CLI version and execution context
     Version(FormatArgs),
     /// Inspect or manage the desktop-installed `cap` shim
@@ -210,7 +213,12 @@ enum Commands {
 
 impl Commands {
     fn exit_after_success(&self) -> bool {
-        matches!(self, Self::Export(_) | Self::ExportPreview(_))
+        // Selftest runs an export, so it shares export's teardown-crash
+        // avoidance on Windows.
+        matches!(
+            self,
+            Self::Export(_) | Self::ExportPreview(_) | Self::Selftest(_)
+        )
     }
 }
 
@@ -425,6 +433,12 @@ fn main() {
 
     let exit_after_success = cli.exit_after_success();
 
+    // The self-test opens a window, which AppKit requires to live on the real
+    // process main thread — so the main thread stays here to serve pattern
+    // requests while the command itself runs on the runtime thread.
+    let pattern_rx = matches!(cli.command, Some(Commands::Selftest(_)))
+        .then(selftest::pattern::install_main_thread_runner);
+
     // Windows export exercises deep WGPU/MediaFoundation/FFmpeg stacks. Running the CLI runtime
     // on an explicitly large stack is what stopped the export worker from overflowing before
     // the first frame; keep the sidecar and desktop runtimes in sync.
@@ -432,6 +446,17 @@ fn main() {
         .name("cap-cli-runtime".to_string())
         .stack_size(TOKIO_WORKER_THREAD_STACK_SIZE)
         .spawn(move || -> Result<(), String> {
+            // serve_main_thread blocks the main thread until this shutdown
+            // runs; a drop guard keeps that true on every exit path, including
+            // the runtime failing to build and panics unwinding out of run().
+            struct PatternShutdown;
+            impl Drop for PatternShutdown {
+                fn drop(&mut self) {
+                    selftest::pattern::shutdown_main_thread_runner();
+                }
+            }
+            let _pattern_shutdown = PatternShutdown;
+
             let runtime = tokio::runtime::Builder::new_multi_thread()
                 .enable_all()
                 .thread_stack_size(TOKIO_WORKER_THREAD_STACK_SIZE)
@@ -451,17 +476,25 @@ fn main() {
             result
         });
 
-    // Surface failures as a clean, unquoted `error: ...` line on stderr (the default
-    // `Result`-returning main prints `Error: "debug-quoted"`, which is noisy for humans and brittle
-    // for agents scraping stderr). clap already exits 2 for usage/parse errors before we get here.
-    let outcome = match runtime_thread {
-        Ok(handle) => handle.join(),
+    // A failed spawn means nothing will ever call shutdown_main_thread_runner, so the
+    // pattern server below would block forever — bail out before serving.
+    let runtime_thread = match runtime_thread {
+        Ok(handle) => handle,
         Err(e) => {
             eprintln!("error: Failed to spawn CLI runtime thread: {e}");
             std::process::exit(1);
         }
     };
 
+    if let Some(rx) = pattern_rx {
+        selftest::pattern::serve_main_thread(rx);
+    }
+
+    // Surface failures as a clean, unquoted `error: ...` line on stderr (the default
+    // `Result`-returning main prints `Error: "debug-quoted"`, which is noisy for humans and brittle
+    // for agents scraping stderr). clap already exits 2 for usage/parse errors before we get here.
+    let outcome = runtime_thread.join();
+
     match outcome {
         Ok(Ok(())) => {}
         Ok(Err(message)) => {
@@ -484,6 +517,7 @@ async fn run(cli: Cli) -> Result<(), String> {
     match command {
         Commands::Export(e) => e.run(json).await,
         Commands::ExportPreview(e) => e.run().await,
+        Commands::Selftest(args) => args.run(json).await,
         Commands::Project(args) => args.run(json),
         Commands::Record(RecordArgs { command, args }) => match command {
             Some(RecordCommands::Start(args)) => args.run(json).await,
diff --git a/apps/cli/src/selftest/measure.rs b/apps/cli/src/selftest/measure.rs
new file mode 100644
index 0000000000..defc65cc25
--- /dev/null
+++ b/apps/cli/src/selftest/measure.rs
@@ -0,0 +1,528 @@
+//! Content-level A/V sync measurement for the self-test.
+//!
+//! Extracts flash onsets from a video track (mean luma over the frame
+//! center with adaptive hysteresis) and beep onsets from an audio track
+//! (RMS envelope with an adaptive threshold), pairs them, and computes
+//! robust offset and drift statistics. Positive offset means audio is late.
+
+use std::path::Path;
+
+use ffmpeg::{codec, format, frame, media};
+use serde::Serialize;
+
+/// Fraction of the frame (centered) used for luma measurement, avoiding
+/// menu bars, notches and window chrome at the edges.
+const CENTER_CROP: f64 = 0.5;
+/// Minimum spacing between onsets, guarding against double-triggers within
+/// a single tone burst.
+const MIN_ONSET_GAP_SECS: f64 = 0.5;
+
+pub fn video_flash_onsets(path: &Path) -> Result<Vec<f64>, String> {
+    let mut ictx =
+        format::input(&path).map_err(|e| format!("open video {}: {e}", path.display()))?;
+    let stream = ictx
+        .streams()
+        .best(media::Type::Video)
+        .ok_or("no video stream")?;
+    let stream_index = stream.index();
+    let time_base = stream.time_base();
+    let tb = f64::from(time_base.numerator()) / f64::from(time_base.denominator());
+
+    let ctx = codec::context::Context::from_parameters(stream.parameters())
+        .map_err(|e| format!("video codec params: {e}"))?;
+    let mut decoder = ctx
+        .decoder()
+        .video()
+        .map_err(|e| format!("video decoder: {e}"))?;
+
+    let mut samples: Vec<(f64, f64)> = Vec::new();
+    let mut take_frame = |decoded: &frame::Video| {
+        let Some(pts) = decoded.pts() else { return };
+        let t = pts as f64 * tb;
+        if let Some(luma) = mean_center_luma(decoded) {
+            samples.push((t, luma));
+        }
+    };
+
+    let mut decoded = frame::Video::empty();
+    for (s, packet) in ictx.packets() {
+        if s.index() != stream_index {
+            continue;
+        }
+        if decoder.send_packet(&packet).is_ok() {
+            while decoder.receive_frame(&mut decoded).is_ok() {
+                take_frame(&decoded);
+            }
+        }
+    }
+    let _ = decoder.send_eof();
+    while decoder.receive_frame(&mut decoded).is_ok() {
+        take_frame(&decoded);
+    }
+
+    flash_onsets_from_luma(&samples)
+}
+
+/// Flash onsets from a time-ordered `(seconds, mean luma)` series, shared by
+/// the file analyzers and the playback harness (which samples luma at the
+/// renderer's presentation boundary).
+pub fn flash_onsets_from_luma(samples: &[(f64, f64)]) -> Result<Vec<f64>, String> {
+    if samples.len() < 10 {
+        return Err(format!(
+            "only {} video frames decoded; recording too short to analyze",
+            samples.len()
+        ));
+    }
+
+    // Adaptive hysteresis from the observed luma range so exact black/white
+    // levels (color range, HDR tone mapping) don't matter. The high anchor is
+    // the peak, not a percentile: flashes are a small duty cycle of frames.
+    let mut lumas: Vec<f64> = samples.iter().map(|(_, l)| *l).collect();
+    lumas.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+    let lo = percentile_sorted(&lumas, 0.10);
+    let hi = percentile_sorted(&lumas, 0.998);
+    if hi - lo < 40.0 {
+        return Err(format!(
+            "test pattern not visible in the recording (luma range {lo:.0}..{hi:.0}); \
+             the test window may have been covered or moved"
+        ));
+    }
+    let on = lo + 0.7 * (hi - lo);
+    let off = lo + 0.3 * (hi - lo);
+
+    let mut onsets = Vec::new();
+    let mut armed = true;
+    for (t, luma) in samples {
+        if *luma >= on && armed {
+            onsets.push(*t);
+            armed = false;
+        } else if *luma <= off {
+            armed = true;
+        }
+    }
+    Ok(onsets)
+}
+
+fn mean_center_luma(frame: &frame::Video) -> Option<f64> {
+    // Plane 0 is luma for all YUV formats the decoders produce. Reject
+    // non-planar/packed formats rather than misreading them.
+    use ffmpeg::format::Pixel;
+    if !matches!(
+        frame.format(),
+        Pixel::YUV420P | Pixel::NV12 | Pixel::YUV422P | Pixel::YUV444P | Pixel::YUVJ420P
+    ) {
+        return None;
+    }
+    let width = frame.width() as usize;
+    let height = frame.height() as usize;
+    let stride = frame.stride(0);
+    let data = frame.data(0);
+
+    let x0 = (width as f64 * (0.5 - CENTER_CROP / 2.0)) as usize;
+    let x1 = (width as f64 * (0.5 + CENTER_CROP / 2.0)) as usize;
+    let y0 = (height as f64 * (0.5 - CENTER_CROP / 2.0)) as usize;
+    let y1 = (height as f64 * (0.5 + CENTER_CROP / 2.0)) as usize;
+
+    let mut sum = 0u64;
+    let mut count = 0u64;
+    let mut y = y0;
+    while y < y1.min(height) {
+        let row = &data[y * stride..y * stride + width];
+        let mut x = x0;
+        while x < x1.min(width) {
+            sum += u64::from(row[x]);
+            count += 1;
+            x += 4;
+        }
+        y += 4;
+    }
+    (count > 0).then(|| sum as f64 / count as f64)
+}
+
+pub struct AudioOnsets {
+    pub onsets: Vec<f64>,
+    /// Ratio of tone peak to noise floor; low values mean the beep was not
+    /// reliably captured (e.g. muted output).
+    pub snr: f64,
+}
+
+pub fn audio_beep_onsets(path: &Path) -> Result<AudioOnsets, String> {
+    let mut ictx =
+        format::input(&path).map_err(|e| format!("open audio {}: {e}", path.display()))?;
+    let stream = ictx
+        .streams()
+        .best(media::Type::Audio)
+        .ok_or("no audio stream")?;
+    let stream_index = stream.index();
+
+    let ctx = codec::context::Context::from_parameters(stream.parameters())
+        .map_err(|e| format!("audio codec params: {e}"))?;
+    let mut decoder = ctx
+        .decoder()
+        .audio()
+        .map_err(|e| format!("audio decoder: {e}"))?;
+
+    let mut mono: Vec<f32> = Vec::new();
+    let mut sample_rate = 0u32;
+    let mut take_frame = |decoded: &frame::Audio| {
+        sample_rate = decoded.rate();
+        append_mono(decoded, &mut mono);
+    };
+
+    let mut decoded = frame::Audio::empty();
+    for (s, packet) in ictx.packets() {
+        if s.index() != stream_index {
+            continue;
+        }
+        if decoder.send_packet(&packet).is_ok() {
+            while decoder.receive_frame(&mut decoded).is_ok() {
+                take_frame(&decoded);
+            }
+        }
+    }
+    let _ = decoder.send_eof();
+    while decoder.receive_frame(&mut decoded).is_ok() {
+        take_frame(&decoded);
+    }
+
+    beep_onsets_from_mono(mono, sample_rate)
+}
+
+/// Beep onsets from a mono sample stream, shared by the file analyzers and
+/// the playback harness (which taps samples at the device handoff).
+pub fn beep_onsets_from_mono(mut mono: Vec<f32>, sample_rate: u32) -> Result<AudioOnsets, String> {
+    if sample_rate == 0 || mono.len() < sample_rate as usize {
+        return Err("audio track too short to analyze".to_string());
+    }
+
+    bandpass_1khz_in_place(&mut mono, sample_rate);
+
+    // 1 ms RMS envelope.
+    let chunk = (sample_rate / 1000).max(1) as usize;
+    let mut env: Vec<f32> = mono
+        .chunks(chunk)
+        .map(|c| (c.iter().map(|s| s * s).sum::<f32>() / c.len() as f32).sqrt())
+        .collect();
+    let chunk_secs = chunk as f64 / f64::from(sample_rate);
+
+    let mut sorted = env.clone();
+    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+    let noise_floor = f64::from(percentile_sorted_f32(&sorted, 0.20)).max(1e-6);
+    let peak = f64::from(percentile_sorted_f32(&sorted, 0.999));
+    let snr = peak / noise_floor;
+    // Edge-triggered hysteresis: after bandpassing, the tone bursts dominate
+    // the envelope peak while background audio (music, speech) sits well
+    // below it. A rising edge through the high threshold marks an onset; the
+    // detector re-arms only after the envelope falls back through the low
+    // threshold, so sustained background level cannot mask or spam onsets.
+    let hi = ((noise_floor * 8.0).max(peak * 0.35)) as f32;
+    let lo = hi * 0.5;
+
+    let mut onsets = Vec::new();
+    let mut armed = true;
+    let mut last = f64::NEG_INFINITY;
+    for (i, value) in env.drain(..).enumerate() {
+        let t = i as f64 * chunk_secs;
+        if value >= hi && armed && t - last >= MIN_ONSET_GAP_SECS {
+            onsets.push(t);
+            last = t;
+            armed = false;
+        } else if value < lo {
+            armed = true;
+        }
+    }
+
+    Ok(AudioOnsets { onsets, snr })
+}
+
+/// Second-order (RBJ) bandpass centered on the 1 kHz test tone. Real
+/// machines play music/speech during a self-test; narrowband filtering lets
+/// the constant-frequency beep dominate the envelope regardless.
+fn bandpass_1khz_in_place(samples: &mut [f32], sample_rate: u32) {
+    let f0 = 1000.0f64;
+    let q = 8.0f64;
+    let w0 = 2.0 * std::f64::consts::PI * f0 / f64::from(sample_rate.max(2001));
+    let alpha = w0.sin() / (2.0 * q);
+    let cos_w0 = w0.cos();
+    let a0 = 1.0 + alpha;
+    let b0 = (alpha / a0) as f32;
+    let b2 = (-alpha / a0) as f32;
+    let a1 = (-2.0 * cos_w0 / a0) as f32;
+    let a2 = ((1.0 - alpha) / a0) as f32;
+
+    let (mut x1, mut x2, mut y1, mut y2) = (0.0f32, 0.0f32, 0.0f32, 0.0f32);
+    for sample in samples.iter_mut() {
+        let x0 = *sample;
+        let y0 = b0 * x0 + b2 * x2 - a1 * y1 - a2 * y2;
+        x2 = x1;
+        x1 = x0;
+        y2 = y1;
+        y1 = y0;
+        *sample = y0;
+    }
+}
+
+fn append_mono(decoded: &frame::Audio, out: &mut Vec<f32>) {
+    use ffmpeg::format::Sample;
+    use ffmpeg::format::sample::Type;
+    let samples = decoded.samples();
+    let channels = decoded.channels() as usize;
+    if samples == 0 || channels == 0 {
+        return;
+    }
+    match decoded.format() {
+        Sample::F32(Type::Planar) => {
+            let planes: Vec<&[f32]> = (0..channels.min(decoded.planes()))
+                .map(|p| &decoded.plane::<f32>(p)[..samples])
+                .collect();
+            for i in 0..samples {
+                let sum: f32 = planes.iter().map(|p| p[i]).sum();
+                out.push(sum / planes.len() as f32);
+            }
+        }
+        Sample::F32(Type::Packed) => {
+            let data = &decoded.plane::<f32>(0)[..samples * channels];
+            for frame in data.chunks_exact(channels) {
+                out.push(frame.iter().sum::<f32>() / channels as f32);
+            }
+        }
+        Sample::I16(Type::Planar) => {
+            let planes: Vec<&[i16]> = (0..channels.min(decoded.planes()))
+                .map(|p| &decoded.plane::<i16>(p)[..samples])
+                .collect();
+            for i in 0..samples {
+                let sum: f32 = planes.iter().map(|p| f32::from(p[i])).sum();
+                out.push(sum / (planes.len() as f32 * f32::from(i16::MAX)));
+            }
+        }
+        Sample::I16(Type::Packed) => {
+            let data = &decoded.plane::<i16>(0)[..samples * channels];
+            for frame in data.chunks_exact(channels) {
+                let sum: f32 = frame.iter().map(|s| f32::from(*s)).sum();
+                out.push(sum / (channels as f32 * f32::from(i16::MAX)));
+            }
+        }
+        _ => {
+            // Unknown format; skip frame rather than misread it.
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct SyncMeasurement {
+    pub paired_events: usize,
+    pub inlier_events: usize,
+    pub median_offset_ms: f64,
+    pub mad_ms: f64,
+    pub drift_ms_per_min: f64,
+    /// Drift accumulated across the observed window (slope × span). More
+    /// robust to slope noise on short runs than the per-minute rate.
+    pub total_drift_ms: f64,
+    pub span_secs: f64,
+    pub min_offset_ms: f64,
+    pub max_offset_ms: f64,
+    /// (flash time, offset ms) per inlier event.
+    pub events: Vec<(f64, f64)>,
+}
+
+/// Pairs flash onsets with the nearest beep onset (both on the same clock)
+/// and computes robust statistics. The first event after settle is dropped:
+/// window creation/compositor transitions make it unrepresentative.
+pub fn measure_sync(
+    flash_onsets: &[f64],
+    beep_onsets: &[f64],
+    min_events: usize,
+) -> Result<SyncMeasurement, String> {
+    if flash_onsets.len() < 2 {
+        return Err(format!(
+            "only {} flash events detected in the recording",
+            flash_onsets.len()
+        ));
+    }
+    if beep_onsets.is_empty() {
+        return Err("no beeps detected in the recording".to_string());
+    }
+
+    let mut pairs: Vec<(f64, f64)> = Vec::new();
+    for flash in flash_onsets {
+        let Some(beep) = beep_onsets
+            .iter()
+            .min_by(|a, b| {
+                (*a - flash)
+                    .abs()
+                    .partial_cmp(&(*b - flash).abs())
+                    .unwrap_or(std::cmp::Ordering::Equal)
+            })
+            .copied()
+        else {
+            continue;
+        };
+        let offset = beep - flash;
+        if offset.abs() <= 0.9 {
+            pairs.push((*flash, offset * 1000.0));
+        }
+    }
+    // Drop the first event: window-creation transitions make it noisy.
+    if pairs.len() > min_events {
+        pairs.remove(0);
+    }
+    if pairs.len() < min_events {
+        return Err(format!(
+            "only {} usable flash/beep pairs (need {min_events}; detected {} flashes, {} beeps); \
+             the test window may have been covered or the beeps too quiet",
+            pairs.len(),
+            flash_onsets.len(),
+            beep_onsets.len()
+        ));
+    }
+
+    // Anchor on the densest offset cluster: mispaired events (a flash
+    // matching the wrong beep because the true one was masked) land seconds
+    // away, and with enough of them the median itself becomes junk. The true
+    // pairs all share one physical offset, so they form the tightest cluster.
+    let cluster_center = {
+        let mut best_center = 0.0;
+        let mut best_count = 0usize;
+        for (_, candidate) in &pairs {
+            let count = pairs
+                .iter()
+                .filter(|(_, o)| (o - candidate).abs() <= 60.0)
+                .count();
+            if count > best_count {
+                best_count = count;
+                best_center = *candidate;
+            }
+        }
+        best_center
+    };
+
+    let mut inliers: Vec<(f64, f64)> = pairs
+        .iter()
+        .filter(|(_, o)| (o - cluster_center).abs() <= 90.0)
+        .copied()
+        .collect();
+    if inliers.len() < min_events {
+        // No dominant cluster: report statistics over every pair and let the
+        // caller's thresholds judge them, rather than discarding the run.
+        inliers = pairs.clone();
+    }
+
+    let mut offsets: Vec<f64> = inliers.iter().map(|(_, o)| *o).collect();
+    offsets.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+    let median = percentile_sorted(&offsets, 0.5);
+    let mut deviations: Vec<f64> = offsets.iter().map(|o| (o - median).abs()).collect();
+    deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+    let mad = percentile_sorted(&deviations, 0.5);
+
+    // Least-squares slope of offset over time = drift.
+    let n = inliers.len() as f64;
+    let mean_t = inliers.iter().map(|(t, _)| t).sum::<f64>() / n;
+    let mean_o = inliers.iter().map(|(_, o)| o).sum::<f64>() / n;
+    let mut num = 0.0;
+    let mut den = 0.0;
+    for (t, o) in &inliers {
+        num += (t - mean_t) * (o - mean_o);
+        den += (t - mean_t) * (t - mean_t);
+    }
+    let slope_ms_per_sec = if den > 0.0 { num / den } else { 0.0 };
+
+    let min = inliers
+        .iter()
+        .map(|(_, o)| *o)
+        .fold(f64::INFINITY, f64::min);
+    let max = inliers
+        .iter()
+        .map(|(_, o)| *o)
+        .fold(f64::NEG_INFINITY, f64::max);
+    let span_secs = inliers.last().map(|(t, _)| *t).unwrap_or(0.0)
+        - inliers.first().map(|(t, _)| *t).unwrap_or(0.0);
+
+    Ok(SyncMeasurement {
+        paired_events: pairs.len(),
+        inlier_events: inliers.len(),
+        median_offset_ms: median,
+        mad_ms: mad,
+        drift_ms_per_min: slope_ms_per_sec * 60.0,
+        total_drift_ms: slope_ms_per_sec * span_secs,
+        span_secs,
+        min_offset_ms: min,
+        max_offset_ms: max,
+        events: inliers,
+    })
+}
+
+fn percentile_sorted(sorted: &[f64], q: f64) -> f64 {
+    if sorted.is_empty() {
+        return 0.0;
+    }
+    let idx = ((sorted.len() - 1) as f64 * q).round() as usize;
+    sorted[idx.min(sorted.len() - 1)]
+}
+
+fn percentile_sorted_f32(sorted: &[f32], q: f64) -> f32 {
+    if sorted.is_empty() {
+        return 0.0;
+    }
+    let idx = ((sorted.len() - 1) as f64 * q).round() as usize;
+    sorted[idx.min(sorted.len() - 1)]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn measure_sync_detects_constant_offset() {
+        let flashes: Vec<f64> = (1..12).map(|k| k as f64 * 2.0).collect();
+        let beeps: Vec<f64> = flashes.iter().map(|f| f + 0.030).collect();
+        let m = measure_sync(&flashes, &beeps, 6).unwrap();
+        assert!((m.median_offset_ms - 30.0).abs() < 1.0);
+        assert!(m.total_drift_ms.abs() < 1.0);
+    }
+
+    #[test]
+    fn measure_sync_detects_drift() {
+        // 5 ms/s of drift = 300 ms/min.
+        let flashes: Vec<f64> = (1..12).map(|k| k as f64 * 2.0).collect();
+        let beeps: Vec<f64> = flashes.iter().map(|f| f + 0.005 * f).collect();
+        let m = measure_sync(&flashes, &beeps, 6).unwrap();
+        assert!(
+            (m.drift_ms_per_min - 300.0).abs() < 30.0,
+            "drift {}",
+            m.drift_ms_per_min
+        );
+        assert!(m.total_drift_ms > 50.0, "total {}", m.total_drift_ms);
+    }
+
+    #[test]
+    fn measure_sync_rejects_outliers() {
+        let mut flashes: Vec<f64> = (1..12).map(|k| k as f64 * 2.0).collect();
+        let beeps: Vec<f64> = flashes.iter().map(|f| f + 0.020).collect();
+        // A wild first event, like a window-transition artifact.
+        flashes[0] -= 0.6;
+        let m = measure_sync(&flashes, &beeps, 6).unwrap();
+        assert!((m.median_offset_ms - 20.0).abs() < 2.0);
+        assert!(m.inlier_events >= 9);
+    }
+
+    #[test]
+    fn measure_sync_scattered_offsets_still_report_stats() {
+        // Half the events displaced by 60ms: no clean inlier set exists, but
+        // the caller still needs numbers (large MAD) to fail on, not an error.
+        let flashes: Vec<f64> = (1..12).map(|k| k as f64 * 2.0).collect();
+        let beeps: Vec<f64> = flashes
+            .iter()
+            .enumerate()
+            .map(|(i, f)| f + if i % 2 == 0 { 0.010 } else { 0.070 })
+            .collect();
+        let m = measure_sync(&flashes, &beeps, 6).unwrap();
+        assert!(m.mad_ms >= 20.0, "mad {}", m.mad_ms);
+    }
+
+    #[test]
+    fn measure_sync_fails_with_too_few_events() {
+        let flashes = vec![2.0, 4.0];
+        let beeps = vec![2.02, 4.02];
+        assert!(measure_sync(&flashes, &beeps, 6).is_err());
+    }
+}
diff --git a/apps/cli/src/selftest/mod.rs b/apps/cli/src/selftest/mod.rs
new file mode 100644
index 0000000000..4edb0b19ce
--- /dev/null
+++ b/apps/cli/src/selftest/mod.rs
@@ -0,0 +1,713 @@
+//! `cap selftest` — diagnostics that verify Cap works correctly on this
+//! machine, starting with an end-to-end A/V sync test: record a known
+//! flash+beep pattern through the real capture pipeline, then measure the
+//! flash-to-beep offset in both the raw recording and an export of it.
+
+pub mod measure;
+pub mod pattern;
+pub mod playback;
+
+use std::{
+    path::{Path, PathBuf},
+    time::Duration,
+};
+
+use cap_project::{RecordingMeta, RecordingMetaInner, StudioRecordingMeta};
+use clap::{Args, Subcommand};
+use serde::Serialize;
+
+use measure::SyncMeasurement;
+use pattern::PatternSpec;
+
+const DEFAULT_PATTERN_SECS: u64 = 20;
+const EVENT_PERIOD: Duration = Duration::from_secs(2);
+const FLASH_LEN: Duration = Duration::from_millis(120);
+const SETTLE: Duration = Duration::from_secs(2);
+const MIN_EVENTS: usize = 6;
+
+const PASS_OFFSET_MS: f64 = 80.0;
+const PASS_TOTAL_DRIFT_MS: f64 = 20.0;
+const PASS_MAD_MS: f64 = 20.0;
+const WARN_OFFSET_MS: f64 = 120.0;
+const WARN_TOTAL_DRIFT_MS: f64 = 40.0;
+const WARN_MAD_MS: f64 = 40.0;
+const MAX_RAW_EXPORT_DELTA_MS: f64 = 25.0;
+const MIN_BEEP_SNR: f64 = 8.0;
+/// Extra offset budget for the acoustic microphone path: sound flight time
+/// plus input device latency.
+const MIC_EXTRA_OFFSET_MS: f64 = 60.0;
+/// Acoustic pickup competes with room noise; a lower SNR still yields sharp
+/// onsets for a 1 kHz tone.
+const MIN_MIC_SNR: f64 = 4.0;
+
+#[derive(Args)]
+pub struct SelftestArgs {
+    #[command(subcommand)]
+    pub command: SelftestCommands,
+}
+
+#[derive(Subcommand)]
+pub enum SelftestCommands {
+    /// Record a test pattern and verify audio/video sync end-to-end
+    #[command(name = "av-sync")]
+    AvSync(AvSyncArgs),
+    /// Verify the editor playback path preserves audio/video sync
+    #[command(name = "playback")]
+    Playback(playback::PlaybackArgs),
+    /// Internal: measure flash/beep onsets in an existing recording or export
+    #[command(name = "analyze", hide = true)]
+    Analyze(AnalyzeArgs),
+}
+
+#[derive(Args)]
+pub struct AnalyzeArgs {
+    /// Video file (or file containing both tracks)
+    video: PathBuf,
+    /// Separate audio file (defaults to the video file's audio track)
+    #[arg(long)]
+    audio: Option<PathBuf>,
+    /// Added to flash times (track start offset)
+    #[arg(long, default_value_t = 0.0)]
+    voffset: f64,
+    /// Added to beep times (track start offset)
+    #[arg(long, default_value_t = 0.0)]
+    aoffset: f64,
+}
+
+#[derive(Args)]
+pub struct AvSyncArgs {
+    /// Seconds of test pattern to record (longer = more sensitive to drift)
+    #[arg(long, default_value_t = DEFAULT_PATTERN_SECS)]
+    duration: u64,
+    /// Maximum fps to record at (defaults to the standard recording fps)
+    #[arg(long)]
+    fps: Option<u32>,
+    /// Also record a microphone and verify its sync acoustically (the mic
+    /// must be able to hear the test beeps through your speakers)
+    #[arg(long)]
+    mic: bool,
+    /// Microphone device name to use with --mic (defaults to the default mic)
+    #[arg(long)]
+    mic_name: Option<String>,
+    /// Skip exporting the recording (tests only the recording stage)
+    #[arg(long)]
+    skip_export: bool,
+    /// Keep the recorded project on disk for inspection
+    #[arg(long)]
+    keep: bool,
+}
+
+#[derive(Serialize, Clone, Copy, PartialEq, Eq, Debug)]
+#[serde(rename_all = "lowercase")]
+enum Verdict {
+    Pass,
+    Warn,
+    Fail,
+    Inconclusive,
+}
+
+#[derive(Serialize)]
+struct Thresholds {
+    pass_offset_ms: f64,
+    pass_total_drift_ms: f64,
+    warn_offset_ms: f64,
+    warn_total_drift_ms: f64,
+    max_raw_export_delta_ms: f64,
+}
+
+#[derive(Serialize)]
+struct Diagnostics {
+    beep_snr: Option<f64>,
+    audio_output_latency_ms: Option<f64>,
+    /// Median (beep DAC time − flash present time) at emission; the part of
+    /// the measured offset contributed by the test rig itself.
+    emission_skew_ms: Option<f64>,
+    project_path: Option<String>,
+}
+
+#[derive(Serialize)]
+struct AvSyncReport {
+    verdict: Verdict,
+    summary: String,
+    recording: Option<SyncMeasurement>,
+    microphone: Option<SyncMeasurement>,
+    export: Option<SyncMeasurement>,
+    thresholds: Thresholds,
+    diagnostics: Diagnostics,
+}
+
+impl SelftestArgs {
+    pub async fn run(self, json: bool) -> Result<(), String> {
+        match self.command {
+            SelftestCommands::AvSync(args) => run_av_sync(args, json).await,
+            SelftestCommands::Playback(args) => playback::run_playback(args, json).await,
+            SelftestCommands::Analyze(args) => run_analyze(args),
+        }
+    }
+}
+
+fn run_analyze(args: AnalyzeArgs) -> Result<(), String> {
+    let flashes: Vec<f64> = measure::video_flash_onsets(&args.video)?
+        .into_iter()
+        .map(|t| t + args.voffset)
+        .collect();
+    let audio_path = args.audio.as_ref().unwrap_or(&args.video);
+    let audio = measure::audio_beep_onsets(audio_path)?;
+    let beeps: Vec<f64> = audio.onsets.iter().map(|t| t + args.aoffset).collect();
+    eprintln!(
+        "flashes: {} beeps: {} (snr {:.1})",
+        flashes.len(),
+        beeps.len(),
+        audio.snr
+    );
+    let measurement = measure::measure_sync(&flashes, &beeps, MIN_EVENTS)?;
+    println!(
+        "{}",
+        serde_json::to_string_pretty(&measurement)
+            .map_err(|e| format!("failed to serialize: {e}"))?
+    );
+    Ok(())
+}
+
+fn progress(json: bool, msg: &str) {
+    if !json {
+        eprintln!("{msg}");
+    }
+}
+
+async fn run_av_sync(args: AvSyncArgs, json: bool) -> Result<(), String> {
+    // ffmpeg's own stderr chatter (muxer segment writes, codec notes) drowns
+    // the progress output; measurement errors are surfaced through Results.
+    ffmpeg::util::log::set_level(ffmpeg::util::log::Level::Quiet);
+
+    // The floor guarantees enough events for measure_sync's minimum after the
+    // first event is dropped: 14s -> 7 events -> 6 usable pairs.
+    let pattern_secs = args.duration.clamp(14, 120);
+    let events = (pattern_secs / EVENT_PERIOD.as_secs()).max(3) as u32;
+    let spec = PatternSpec {
+        settle: SETTLE,
+        events,
+        period: EVENT_PERIOD,
+        flash_len: FLASH_LEN,
+    };
+
+    // Rough wall-clock estimate: settle + pattern + finalize + analysis (+ export).
+    let estimate_secs = spec.total_runtime().as_secs() + 4 + if args.skip_export { 0 } else { 6 };
+
+    progress(json, "Cap A/V sync self-test");
+    progress(
+        json,
+        &format!(
+            "This will take about {} seconds.",
+            (estimate_secs as f64 / 10.0).round() as u64 * 10
+        ),
+    );
+    progress(
+        json,
+        "A black window will appear with brief white flashes and short beeps.\n\
+         Leave the window visible and make sure output volume is not muted.\n",
+    );
+
+    let project_path =
+        std::env::temp_dir().join(format!("cap-selftest-{}.cap", uuid::Uuid::new_v4()));
+
+    progress(
+        json,
+        &format!("[1/4] Recording test pattern ({pattern_secs}s)..."),
+    );
+    let mic_name =
+        if args.mic || args.mic_name.is_some() {
+            match args.mic_name.clone().or_else(|| {
+                cap_recording::MicrophoneFeed::default_device().map(|(label, _, _)| label)
+            }) {
+                Some(label) => {
+                    progress(json, &format!("Including microphone: {label}"));
+                    Some(label)
+                }
+                None => return Err("no microphone available for --mic".to_string()),
+            }
+        } else {
+            None
+        };
+
+    let handle = start_recording(&project_path, args.fps, mic_name.clone()).await?;
+
+    // Give capture a moment to deliver first frames before the pattern starts.
+    tokio::time::sleep(Duration::from_millis(500)).await;
+
+    let pattern_result = pattern::request_pattern(spec).await;
+
+    let report = match pattern_result {
+        Ok(report) => report,
+        Err(e) => {
+            let _ = handle.stop().await;
+            let _ = std::fs::remove_dir_all(&project_path);
+            if e == "cancelled" {
+                return Err("self-test cancelled".to_string());
+            }
+            return Err(format!("test pattern failed: {e}"));
+        }
+    };
+
+    // Let the tail of the last beep land in the recording.
+    tokio::time::sleep(Duration::from_secs(1)).await;
+
+    progress(json, "[2/4] Finalizing recording...");
+    let completed = handle
+        .stop()
+        .await
+        .map_err(|e| format!("failed to stop recording: {e}"))?;
+    let project_path = completed.project_path.clone();
+
+    // Fragmented recordings need the shared remux step before their segment
+    // files are directly readable (the same step the desktop app runs).
+    {
+        let project_path = project_path.clone();
+        tokio::task::spawn_blocking(move || {
+            cap_recording::recovery::RecoveryManager::remux_if_needed(&project_path)
+        })
+        .await
+        .map_err(|e| format!("remux task join error: {e}"))?
+        .map_err(|e| format!("failed to finalize recording segments: {e}"))?;
+    }
+
+    let emission_skew_ms = median_emission_skew_ms(&report);
+
+    progress(json, "[3/4] Analyzing recording...");
+    let raw = analyze_raw(&project_path);
+    let mic = mic_name.is_some().then(|| analyze_mic(&project_path));
+
+    let export = if args.skip_export {
+        Ok(None)
+    } else {
+        progress(json, "[4/4] Exporting and verifying the export...");
+        match crate::export::export_project_default(project_path.clone()).await {
+            Ok(output) => analyze_export(&output).map(Some),
+            Err(e) => Err(format!("export failed: {e}")),
+        }
+    };
+
+    let (verdict, summary, raw_m, mic_m, export_m, snr) = evaluate(raw, mic, export);
+
+    let keep = args.keep || verdict != Verdict::Pass;
+    if keep {
+        progress(
+            json,
+            &format!("Recorded project kept at {}", project_path.display()),
+        );
+    } else {
+        let _ = std::fs::remove_dir_all(&project_path);
+    }
+
+    let report = AvSyncReport {
+        verdict,
+        summary: summary.clone(),
+        recording: raw_m,
+        microphone: mic_m,
+        export: export_m,
+        thresholds: Thresholds {
+            pass_offset_ms: PASS_OFFSET_MS,
+            pass_total_drift_ms: PASS_TOTAL_DRIFT_MS,
+            warn_offset_ms: WARN_OFFSET_MS,
+            warn_total_drift_ms: WARN_TOTAL_DRIFT_MS,
+            max_raw_export_delta_ms: MAX_RAW_EXPORT_DELTA_MS,
+        },
+        diagnostics: Diagnostics {
+            beep_snr: snr,
+            audio_output_latency_ms: report.audio_latency_ms,
+            emission_skew_ms,
+            project_path: keep.then(|| project_path.display().to_string()),
+        },
+    };
+
+    if json {
+        println!(
+            "{}",
+            serde_json::to_string_pretty(&report)
+                .map_err(|e| format!("failed to serialize report: {e}"))?
+        );
+    } else {
+        print_human(&report);
+    }
+
+    match verdict {
+        Verdict::Pass | Verdict::Warn => Ok(()),
+        Verdict::Fail => Err(format!("A/V sync check failed: {summary}")),
+        Verdict::Inconclusive => Err(format!("A/V sync check inconclusive: {summary}")),
+    }
+}
+
+fn print_human(report: &AvSyncReport) {
+    println!();
+    if let Some(m) = &report.recording {
+        println!(
+            "Recording: offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events (spread ±{:.0} ms)",
+            m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events, m.mad_ms
+        );
+    }
+    if let Some(m) = &report.microphone {
+        println!(
+            "Microphone: offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events (spread ±{:.0} ms)",
+            m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events, m.mad_ms
+        );
+    }
+    if let Some(m) = &report.export {
+        println!(
+            "Export:    offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events (spread ±{:.0} ms)",
+            m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events, m.mad_ms
+        );
+    }
+    let label = match report.verdict {
+        Verdict::Pass => "PASS",
+        Verdict::Warn => "WARN",
+        Verdict::Fail => "FAIL",
+        Verdict::Inconclusive => "INCONCLUSIVE",
+    };
+    println!("\nResult: {label} — {}", report.summary);
+}
+
+async fn start_recording(
+    path: &Path,
+    fps: Option<u32>,
+    mic_name: Option<String>,
+) -> Result<cap_recording::studio_recording::ActorHandle, String> {
+    use cap_recording::{
+        MicrophoneFeed, feeds::microphone, screen_capture::ScreenCaptureTarget, studio_recording,
+    };
+    use kameo::Actor as _;
+
+    let display = scap_targets::Display::primary();
+    let target = ScreenCaptureTarget::Display { id: display.id() };
+
+    let mut builder =
+        studio_recording::Actor::builder(path.to_path_buf(), target).with_system_audio(true);
+
+    if let Some(label) = mic_name {
+        let (error_tx, _error_rx) = flume::bounded(16);
+        let mic_feed = MicrophoneFeed::spawn(MicrophoneFeed::new(error_tx));
+        mic_feed
+            .ask(microphone::SetInput {
+                label: label.clone(),
+                settings: None,
+            })
+            .await
+            .map_err(|e| format!("failed to set microphone input '{label}': {e}"))?
+            .await
+            .map_err(|e| format!("microphone '{label}' failed to connect: {e}"))?;
+        // The stream needs a moment to warm up before locking on slower devices.
+        tokio::time::sleep(Duration::from_millis(100)).await;
+        let lock = mic_feed
+            .ask(microphone::Lock)
+            .await
+            .map_err(|e| format!("failed to lock microphone feed: {e}"))?;
+        builder = builder.with_mic_feed(std::sync::Arc::new(lock));
+    }
+
+    let builder =
+        cap_recording::RecordingDefaults::default().apply_to_studio_builder(builder, false, fps);
+
+    #[cfg(target_os = "macos")]
+    let shareable_content = cidre::sc::ShareableContent::current()
+        .await
+        .map_err(|e| {
+            format!(
+                "screen recording permission unavailable: {e}. \
+                 Grant Cap screen recording access in System Settings and retry."
+            )
+        })
+        .map(cap_recording::SendableShareableContent::from)?;
+
+    builder
+        .build(
+            #[cfg(target_os = "macos")]
+            Some(shareable_content),
+        )
+        .await
+        .map_err(|e| format!("failed to start recording: {e}"))
+}
+
+struct RawTracks {
+    display: PathBuf,
+    system_audio: PathBuf,
+    display_start: f64,
+    audio_start: f64,
+    mic: Option<(PathBuf, f64)>,
+}
+
+fn locate_raw_tracks(project_path: &Path) -> Result<RawTracks, String> {
+    let meta = RecordingMeta::load_for_project(project_path)
+        .map_err(|e| format!("failed to load recording meta: {e}"))?;
+    let RecordingMetaInner::Studio(studio) = &meta.inner else {
+        return Err("self-test recording is not a studio recording".to_string());
+    };
+    let StudioRecordingMeta::MultipleSegments { inner, .. } = &**studio else {
+        return Err("unexpected single-segment recording".to_string());
+    };
+    let segment = inner.segments.first().ok_or("recording has no segments")?;
+    let audio = segment
+        .system_audio
+        .as_ref()
+        .ok_or("recording has no system audio track")?;
+
+    // Fragmented recordings write meta before remux, so the display path may
+    // still reference the fragments directory; the remuxed file sits next to it.
+    let mut display = meta.path(&segment.display.path);
+    if display.is_dir() {
+        display = display.with_extension("mp4");
+    }
+    if !display.is_file() {
+        return Err(format!("display track not found at {}", display.display()));
+    }
+
+    let mic = segment
+        .mic
+        .as_ref()
+        .map(|mic| (meta.path(&mic.path), mic.start_time.unwrap_or(0.0)));
+
+    Ok(RawTracks {
+        display,
+        system_audio: meta.path(&audio.path),
+        display_start: segment.display.start_time.unwrap_or(0.0),
+        audio_start: audio.start_time.unwrap_or(0.0),
+        mic,
+    })
+}
+
+type MeasureOutcome = Result<(SyncMeasurement, f64), String>;
+
+fn analyze_raw(project_path: &Path) -> MeasureOutcome {
+    let tracks = locate_raw_tracks(project_path)?;
+
+    let flashes: Vec<f64> = measure::video_flash_onsets(&tracks.display)?
+        .into_iter()
+        .map(|t| t + tracks.display_start)
+        .collect();
+    let audio = measure::audio_beep_onsets(&tracks.system_audio)?;
+    let beeps: Vec<f64> = audio
+        .onsets
+        .iter()
+        .map(|t| t + tracks.audio_start)
+        .collect();
+
+    if audio.snr < MIN_BEEP_SNR {
+        return Err(format!(
+            "test tone barely audible in the recording (SNR {:.1}); \
+             check that output volume is not muted",
+            audio.snr
+        ));
+    }
+
+    measure::measure_sync(&flashes, &beeps, MIN_EVENTS).map(|m| (m, audio.snr))
+}
+
+/// Measures the microphone track against the display flashes. The beeps
+/// reach the mic acoustically, so this validates the real input-device path
+/// end to end (device rate, resampling, timestamping).
+fn analyze_mic(project_path: &Path) -> MeasureOutcome {
+    let tracks = locate_raw_tracks(project_path)?;
+    let (mic_path, mic_start) = tracks
+        .mic
+        .ok_or("recording has no microphone track despite --mic")?;
+
+    let flashes: Vec<f64> = measure::video_flash_onsets(&tracks.display)?
+        .into_iter()
+        .map(|t| t + tracks.display_start)
+        .collect();
+    let audio = measure::audio_beep_onsets(&mic_path)?;
+    let beeps: Vec<f64> = audio.onsets.iter().map(|t| t + mic_start).collect();
+
+    if audio.snr < MIN_MIC_SNR {
+        return Err(format!(
+            "test tone barely audible through the microphone (SNR {:.1}); \
+             raise the output volume or move the mic closer to the speakers",
+            audio.snr
+        ));
+    }
+
+    measure::measure_sync(&flashes, &beeps, MIN_EVENTS).map(|m| (m, audio.snr))
+}
+
+fn analyze_export(output: &Path) -> MeasureOutcome {
+    let flashes = measure::video_flash_onsets(output)?;
+    let audio = measure::audio_beep_onsets(output)?;
+    measure::measure_sync(&flashes, &audio.onsets, MIN_EVENTS).map(|m| (m, audio.snr))
+}
+
+fn median_emission_skew_ms(report: &pattern::PatternReport) -> Option<f64> {
+    let mut skews: Vec<f64> = report
+        .flash_presents
+        .iter()
+        .filter_map(|(event, flash)| {
+            let (_, beep) = report.beep_outputs.iter().find(|(e, _)| e == event)?;
+            Some(if beep >= flash {
+                (*beep - *flash).as_secs_f64() * 1000.0
+            } else {
+                -((*flash - *beep).as_secs_f64() * 1000.0)
+            })
+        })
+        .collect();
+    if skews.is_empty() {
+        return None;
+    }
+    skews.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+    Some(skews[skews.len() / 2])
+}
+
+fn classify(m: &SyncMeasurement) -> Verdict {
+    let offset = m.median_offset_ms.abs();
+    let drift = m.total_drift_ms.abs();
+    if offset <= PASS_OFFSET_MS && drift <= PASS_TOTAL_DRIFT_MS && m.mad_ms <= PASS_MAD_MS {
+        Verdict::Pass
+    } else if offset <= WARN_OFFSET_MS && drift <= WARN_TOTAL_DRIFT_MS && m.mad_ms <= WARN_MAD_MS {
+        Verdict::Warn
+    } else {
+        Verdict::Fail
+    }
+}
+
+#[allow(clippy::type_complexity)]
+/// Classifies the acoustic microphone measurement: same drift/spread rules
+/// as the digital path, with extra offset budget for sound flight time and
+/// input device latency.
+fn classify_mic(m: &SyncMeasurement) -> Verdict {
+    let offset = m.median_offset_ms.abs();
+    let drift = m.total_drift_ms.abs();
+    if offset <= PASS_OFFSET_MS + MIC_EXTRA_OFFSET_MS
+        && drift <= PASS_TOTAL_DRIFT_MS
+        && m.mad_ms <= PASS_MAD_MS
+    {
+        Verdict::Pass
+    } else if offset <= WARN_OFFSET_MS + MIC_EXTRA_OFFSET_MS
+        && drift <= WARN_TOTAL_DRIFT_MS
+        && m.mad_ms <= WARN_MAD_MS
+    {
+        Verdict::Warn
+    } else {
+        Verdict::Fail
+    }
+}
+
+/// Merges verdicts: Fail dominates everything, otherwise the worse one wins.
+fn merge_verdicts(a: Verdict, b: Verdict) -> Verdict {
+    if a == Verdict::Fail || b == Verdict::Fail {
+        Verdict::Fail
+    } else {
+        a.max(b)
+    }
+}
+
+#[allow(clippy::type_complexity)]
+fn evaluate(
+    raw: MeasureOutcome,
+    mic: Option<MeasureOutcome>,
+    export: Result<Option<(SyncMeasurement, f64)>, String>,
+) -> (
+    Verdict,
+    String,
+    Option<SyncMeasurement>,
+    Option<SyncMeasurement>,
+    Option<SyncMeasurement>,
+    Option<f64>,
+) {
+    let (raw_m, snr) = match raw {
+        Ok((m, snr)) => (m, snr),
+        Err(reason) => {
+            return (Verdict::Inconclusive, reason, None, None, None, None);
+        }
+    };
+
+    let export_m = match export {
+        Ok(Some((m, _))) => Some(m),
+        Ok(None) => None,
+        Err(reason) => {
+            // A recording that measures fine but cannot be exported is a hard
+            // failure: the export path is part of the product.
+            return (Verdict::Fail, reason, Some(raw_m), None, None, Some(snr));
+        }
+    };
+
+    let mut verdict = classify(&raw_m);
+    let mut reasons: Vec<String> = Vec::new();
+
+    if verdict != Verdict::Pass {
+        reasons.push(format!(
+            "recording offset {:+.0} ms / drift {:+.0} ms over {:.0}s",
+            raw_m.median_offset_ms, raw_m.total_drift_ms, raw_m.span_secs
+        ));
+    }
+
+    let mic_m = match mic {
+        None => None,
+        Some(Ok((m, _))) => {
+            let mic_verdict = classify_mic(&m);
+            if mic_verdict != Verdict::Pass {
+                reasons.push(format!(
+                    "microphone offset {:+.0} ms / drift {:+.0} ms over {:.0}s",
+                    m.median_offset_ms, m.total_drift_ms, m.span_secs
+                ));
+            }
+            verdict = merge_verdicts(verdict, mic_verdict);
+            Some(m)
+        }
+        Some(Err(reason)) => {
+            // The mic leg was explicitly requested; not being able to measure
+            // it makes the run inconclusive (unless something already failed).
+            verdict = merge_verdicts(verdict, Verdict::Inconclusive);
+            reasons.push(reason);
+            None
+        }
+    };
+
+    if let Some(export_m) = &export_m {
+        let export_verdict = classify(export_m);
+        if export_verdict != Verdict::Pass {
+            reasons.push(format!(
+                "export offset {:+.0} ms / drift {:+.0} ms over {:.0}s",
+                export_m.median_offset_ms, export_m.total_drift_ms, export_m.span_secs
+            ));
+        }
+        verdict = merge_verdicts(verdict, export_verdict);
+        let delta = (export_m.median_offset_ms - raw_m.median_offset_ms).abs();
+        if delta > MAX_RAW_EXPORT_DELTA_MS {
+            verdict = Verdict::Fail;
+            reasons.push(format!(
+                "export changes sync by {delta:.0} ms vs the recording"
+            ));
+        }
+    }
+
+    let summary = match verdict {
+        Verdict::Pass => format!(
+            "audio/video sync is healthy (offset {:+.0} ms, drift {:+.0} ms over {:.0}s)",
+            raw_m.median_offset_ms, raw_m.total_drift_ms, raw_m.span_secs
+        ),
+        Verdict::Warn => format!(
+            "sync is within tolerance but not ideal: {}",
+            reasons.join("; ")
+        ),
+        Verdict::Fail => format!("sync problem detected: {}", reasons.join("; ")),
+        Verdict::Inconclusive => reasons.join("; "),
+    };
+
+    (verdict, summary, Some(raw_m), mic_m, export_m, Some(snr))
+}
+
+impl PartialOrd for Verdict {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for Verdict {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        fn rank(v: &Verdict) -> u8 {
+            match v {
+                Verdict::Pass => 0,
+                Verdict::Warn => 1,
+                Verdict::Fail => 2,
+                Verdict::Inconclusive => 3,
+            }
+        }
+        rank(self).cmp(&rank(other))
+    }
+}
diff --git a/apps/cli/src/selftest/pattern.rs b/apps/cli/src/selftest/pattern.rs
new file mode 100644
index 0000000000..d96b3037bc
--- /dev/null
+++ b/apps/cli/src/selftest/pattern.rs
@@ -0,0 +1,530 @@
+//! Main-thread test pattern for the A/V sync self-test.
+//!
+//! Renders a fullscreen black window that flashes white at a fixed period
+//! while playing a 1 kHz beep through the default audio output at the same
+//! scheduled instants. The window must run on the process main thread
+//! (required by AppKit); the async side of the self-test requests a pattern
+//! run through [`request_pattern`] and the real main thread services it via
+//! [`serve_main_thread`].
+
+use std::{
+    num::NonZeroU32,
+    sync::{
+        Arc, Mutex, OnceLock,
+        atomic::{AtomicBool, Ordering},
+        mpsc,
+    },
+    time::{Duration, Instant},
+};
+
+use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
+use winit::{
+    application::ApplicationHandler,
+    event::{ElementState, WindowEvent},
+    event_loop::{ActiveEventLoop, ControlFlow, EventLoop},
+    keyboard::{Key, NamedKey},
+    platform::run_on_demand::EventLoopExtRunOnDemand,
+    window::{Fullscreen, Window, WindowId, WindowLevel},
+};
+
+#[derive(Clone, Copy, Debug)]
+pub struct PatternSpec {
+    /// Time to sit black before the first event, letting capture settle.
+    pub settle: Duration,
+    /// Number of flash+beep events.
+    pub events: u32,
+    /// Time between event onsets.
+    pub period: Duration,
+    /// Duration of each flash / beep.
+    pub flash_len: Duration,
+}
+
+impl PatternSpec {
+    pub fn total_runtime(&self) -> Duration {
+        self.settle + self.period * self.events + Duration::from_millis(500)
+    }
+
+    /// Event onsets relative to the pattern epoch. Nominally periodic, with a
+    /// deterministic per-event jitter of up to ±300 ms: a perfectly periodic
+    /// schedule would let an A/V shift of exactly one period pair every flash
+    /// with the wrong beep and alias to a zero measured offset.
+    pub fn event_offsets_secs(&self) -> Vec<f64> {
+        (0..self.events)
+            .map(|k| {
+                let jitter = (u64::from(k).wrapping_mul(2_654_435_761) % 601) as f64 / 1000.0 - 0.3;
+                (f64::from(k) * self.period.as_secs_f64() + jitter).max(0.0)
+            })
+            .collect()
+    }
+}
+
+#[derive(Debug)]
+pub struct PatternReport {
+    /// Instants at which each flash was actually presented (post-present).
+    pub flash_presents: Vec<(u32, Instant)>,
+    /// Estimated instants at which each beep hit the output (DAC time).
+    pub beep_outputs: Vec<(u32, Instant)>,
+    /// Mean reported output latency of the audio stream, if available.
+    pub audio_latency_ms: Option<f64>,
+}
+
+pub struct PatternRequest {
+    pub spec: PatternSpec,
+    pub reply: mpsc::Sender<Result<PatternReport, String>>,
+}
+
+static PATTERN_TX: OnceLock<Mutex<Option<mpsc::Sender<PatternRequest>>>> = OnceLock::new();
+
+/// Called from `main()` before the runtime thread spawns, when the parsed
+/// command is a self-test. Returns the receiver the main thread must serve.
+pub fn install_main_thread_runner() -> mpsc::Receiver<PatternRequest> {
+    let (tx, rx) = mpsc::channel();
+    let _ = PATTERN_TX.set(Mutex::new(Some(tx)));
+    rx
+}
+
+/// Called by the runtime thread once the command finishes, releasing the main
+/// thread from its serve loop.
+pub fn shutdown_main_thread_runner() {
+    if let Some(slot) = PATTERN_TX.get() {
+        slot.lock().unwrap().take();
+    }
+}
+
+/// Runs pattern requests on the main thread until the sender is dropped via
+/// [`shutdown_main_thread_runner`].
+pub fn serve_main_thread(rx: mpsc::Receiver<PatternRequest>) {
+    while let Ok(request) = rx.recv() {
+        let result = run_pattern(request.spec);
+        let _ = request.reply.send(result);
+    }
+}
+
+/// Called from the async side; blocks the calling task until the pattern
+/// window has run to completion on the main thread.
+pub async fn request_pattern(spec: PatternSpec) -> Result<PatternReport, String> {
+    let tx = PATTERN_TX
+        .get()
+        .and_then(|slot| slot.lock().unwrap().clone())
+        .ok_or("self-test pattern runner is not installed on the main thread")?;
+    tokio::task::spawn_blocking(move || {
+        let (reply_tx, reply_rx) = mpsc::channel();
+        tx.send(PatternRequest {
+            spec,
+            reply: reply_tx,
+        })
+        .map_err(|_| "main thread pattern runner is gone".to_string())?;
+        reply_rx
+            .recv()
+            .map_err(|_| "main thread pattern runner dropped the request".to_string())?
+    })
+    .await
+    .map_err(|e| format!("pattern task join error: {e}"))?
+}
+
+struct BeepState {
+    epoch: Instant,
+    /// Sorted (start, end) sample windows of each beep, relative to epoch.
+    event_windows: Vec<(u64, u64)>,
+    sample_rate: u32,
+    channels: usize,
+    /// Absolute sample index of pattern epoch, fixed on the first callback.
+    epoch_sample: Mutex<Option<i64>>,
+    samples_written: Mutex<u64>,
+    beep_outputs: Mutex<Vec<(u32, Instant)>>,
+    latency_sum_ms: Mutex<(f64, u64)>,
+}
+
+impl BeepState {
+    fn fill(&self, data: &mut [f32], info: &cpal::OutputCallbackInfo) {
+        let now = Instant::now();
+        let mut written = self.samples_written.lock().unwrap();
+        let buffer_start_sample = *written as i64;
+
+        let mut epoch_sample = self.epoch_sample.lock().unwrap();
+        let epoch_sample = *epoch_sample.get_or_insert_with(|| {
+            let until_epoch = if self.epoch > now {
+                (self.epoch - now).as_secs_f64()
+            } else {
+                -(now - self.epoch).as_secs_f64()
+            };
+            buffer_start_sample + (until_epoch * self.sample_rate as f64) as i64
+        });
+
+        let latency = info
+            .timestamp()
+            .playback
+            .duration_since(&info.timestamp().callback);
+        if let Some(latency) = latency {
+            let mut acc = self.latency_sum_ms.lock().unwrap();
+            acc.0 += latency.as_secs_f64() * 1000.0;
+            acc.1 += 1;
+        }
+
+        let frames = data.len() / self.channels.max(1);
+        for frame_idx in 0..frames {
+            let abs_sample = buffer_start_sample + frame_idx as i64;
+            let rel = abs_sample - epoch_sample;
+            let mut value = 0.0f32;
+            if rel >= 0 {
+                let rel = rel as u64;
+                let idx = self
+                    .event_windows
+                    .partition_point(|&(start, _)| start <= rel);
+                if idx > 0 {
+                    let (start, end) = self.event_windows[idx - 1];
+                    if rel < end {
+                        // 1 kHz tone with a 2 ms fade-in/out to avoid clicks while
+                        // keeping the onset sharp for detection.
+                        let within = rel - start;
+                        let t = within as f32 / self.sample_rate as f32;
+                        let fade_len = 0.002 * self.sample_rate as f32;
+                        let fade_in = (within as f32 / fade_len).min(1.0);
+                        let remaining = (end - rel) as f32;
+                        let fade_out = (remaining / fade_len).min(1.0);
+                        value = 0.4
+                            * fade_in
+                            * fade_out
+                            * (t * 1000.0 * 2.0 * std::f32::consts::PI).sin();
+
+                        if within == 0 {
+                            let dac = now
+                                + latency.unwrap_or_default()
+                                + Duration::from_secs_f64(
+                                    frame_idx as f64 / self.sample_rate as f64,
+                                );
+                            self.beep_outputs
+                                .lock()
+                                .unwrap()
+                                .push(((idx - 1) as u32, dac));
+                        }
+                    }
+                }
+            }
+            for ch in 0..self.channels {
+                data[frame_idx * self.channels + ch] = value;
+            }
+        }
+
+        *written += frames as u64;
+    }
+}
+
+fn build_beep_stream(
+    epoch: Instant,
+    spec: &PatternSpec,
+) -> Result<(cpal::Stream, Arc<BeepState>), String> {
+    let host = cpal::default_host();
+    let device = host
+        .default_output_device()
+        .ok_or("no default audio output device; cannot run the sync test")?;
+    let config = device
+        .default_output_config()
+        .map_err(|e| format!("failed to query audio output config: {e}"))?;
+
+    let sample_rate = config.sample_rate().0;
+    let channels = config.channels() as usize;
+    let beep_samples = (spec.flash_len.as_secs_f64() * sample_rate as f64) as u64;
+    let state = Arc::new(BeepState {
+        epoch,
+        event_windows: spec
+            .event_offsets_secs()
+            .into_iter()
+            .map(|offset| {
+                let start = (offset * sample_rate as f64) as u64;
+                (start, start + beep_samples)
+            })
+            .collect(),
+        sample_rate,
+        channels,
+        epoch_sample: Mutex::new(None),
+        samples_written: Mutex::new(0),
+        beep_outputs: Mutex::new(Vec::new()),
+        latency_sum_ms: Mutex::new((0.0, 0)),
+    });
+
+    let err_fn = |e| tracing::warn!("selftest audio stream error: {e}");
+    let stream_config = config.config();
+
+    let stream = match config.sample_format() {
+        cpal::SampleFormat::F32 => {
+            let state = state.clone();
+            device
+                .build_output_stream(
+                    &stream_config,
+                    move |data: &mut [f32], info: &cpal::OutputCallbackInfo| {
+                        state.fill(data, info);
+                    },
+                    err_fn,
+                    None,
+                )
+                .map_err(|e| format!("failed to build audio output stream: {e}"))?
+        }
+        cpal::SampleFormat::I16 => {
+            let state = state.clone();
+            let mut scratch = Vec::new();
+            device
+                .build_output_stream(
+                    &stream_config,
+                    move |data: &mut [i16], info: &cpal::OutputCallbackInfo| {
+                        scratch.clear();
+                        scratch.resize(data.len(), 0.0f32);
+                        state.fill(&mut scratch, info);
+                        for (dst, src) in data.iter_mut().zip(&scratch) {
+                            *dst = (src * f32::from(i16::MAX)) as i16;
+                        }
+                    },
+                    err_fn,
+                    None,
+                )
+                .map_err(|e| format!("failed to build audio output stream: {e}"))?
+        }
+        other => {
+            return Err(format!(
+                "unsupported audio output sample format for the sync test: {other:?}"
+            ));
+        }
+    };
+
+    Ok((stream, state))
+}
+
+struct PatternApp {
+    spec: PatternSpec,
+    /// Event onsets in seconds from epoch, from `PatternSpec::event_offsets_secs`.
+    event_offsets: Vec<f64>,
+    run_start: Instant,
+    epoch: Instant,
+    window: Option<Arc<Window>>,
+    surface: Option<softbuffer::Surface<Arc<Window>, Arc<Window>>>,
+    size: (u32, u32),
+    last_drawn_white: bool,
+    flash_presents: Vec<(u32, Instant)>,
+    aborted: Arc<AtomicBool>,
+    error: Option<String>,
+}
+
+impl PatternApp {
+    /// Returns whether the pattern should currently show white, and the event
+    /// index if so.
+    fn desired_state(&self, now: Instant) -> Option<u32> {
+        if now < self.epoch {
+            return None;
+        }
+        let rel = (now - self.epoch).as_secs_f64();
+        let flash = self.spec.flash_len.as_secs_f64();
+        self.event_offsets
+            .iter()
+            .position(|&start| rel >= start && rel < start + flash)
+            .map(|idx| idx as u32)
+    }
+
+    fn next_transition(&self, now: Instant) -> Instant {
+        if now < self.epoch {
+            return self.epoch;
+        }
+        let rel = (now - self.epoch).as_secs_f64();
+        let flash = self.spec.flash_len.as_secs_f64();
+        let next_rel = self
+            .event_offsets
+            .iter()
+            .flat_map(|&start| [start, start + flash])
+            .filter(|&boundary| boundary > rel)
+            .fold(f64::INFINITY, f64::min);
+        if next_rel.is_finite() {
+            self.epoch + Duration::from_secs_f64(next_rel)
+        } else {
+            self.done_at()
+        }
+    }
+
+    fn done_at(&self) -> Instant {
+        self.run_start + self.spec.total_runtime()
+    }
+
+    fn draw(&mut self, event_loop: &ActiveEventLoop) {
+        let now = Instant::now();
+        let desired = self.desired_state(now);
+        let white = desired.is_some();
+
+        let Some(surface) = self.surface.as_mut() else {
+            return;
+        };
+        let (w, h) = self.size;
+        if w == 0 || h == 0 {
+            return;
+        }
+        if surface
+            .resize(NonZeroU32::new(w).unwrap(), NonZeroU32::new(h).unwrap())
+            .is_err()
+        {
+            return;
+        }
+        let Ok(mut buffer) = surface.buffer_mut() else {
+            return;
+        };
+        // Keep the high byte opaque: some softbuffer backends (macOS layers)
+        // treat it as alpha rather than ignoring it.
+        let color: u32 = if white { 0xFFFF_FFFF } else { 0xFF00_0000 };
+        buffer.fill(color);
+        let presented = buffer.present().is_ok();
+
+        if presented && white && !self.last_drawn_white {
+            let event = desired.unwrap_or(0);
+            if self
+                .flash_presents
+                .last()
+                .is_none_or(|(last, _)| *last != event)
+            {
+                self.flash_presents.push((event, Instant::now()));
+            }
+        }
+        self.last_drawn_white = white;
+
+        if Instant::now() >= self.done_at() {
+            event_loop.exit();
+        }
+    }
+}
+
+impl ApplicationHandler for PatternApp {
+    fn resumed(&mut self, event_loop: &ActiveEventLoop) {
+        if self.window.is_some() {
+            return;
+        }
+        let attrs = Window::default_attributes()
+            .with_title("Cap Sync Test")
+            .with_fullscreen(Some(Fullscreen::Borderless(None)))
+            .with_window_level(WindowLevel::AlwaysOnTop);
+        let window = match event_loop.create_window(attrs) {
+            Ok(w) => Arc::new(w),
+            Err(e) => {
+                self.error = Some(format!("failed to create test window: {e}"));
+                event_loop.exit();
+                return;
+            }
+        };
+        let size = window.inner_size();
+        self.size = (size.width, size.height);
+
+        let context = match softbuffer::Context::new(window.clone()) {
+            Ok(c) => c,
+            Err(e) => {
+                self.error = Some(format!("failed to create draw context: {e}"));
+                event_loop.exit();
+                return;
+            }
+        };
+        match softbuffer::Surface::new(&context, window.clone()) {
+            Ok(s) => self.surface = Some(s),
+            Err(e) => {
+                self.error = Some(format!("failed to create draw surface: {e}"));
+                event_loop.exit();
+                return;
+            }
+        }
+        window.request_redraw();
+        self.window = Some(window);
+    }
+
+    fn window_event(&mut self, event_loop: &ActiveEventLoop, _id: WindowId, event: WindowEvent) {
+        match event {
+            WindowEvent::CloseRequested => {
+                self.aborted.store(true, Ordering::Release);
+                event_loop.exit();
+            }
+            WindowEvent::KeyboardInput { event, .. } => {
+                if event.state == ElementState::Pressed
+                    && event.logical_key == Key::Named(NamedKey::Escape)
+                {
+                    self.aborted.store(true, Ordering::Release);
+                    event_loop.exit();
+                }
+            }
+            WindowEvent::Resized(size) => {
+                self.size = (size.width, size.height);
+            }
+            WindowEvent::RedrawRequested => {
+                self.draw(event_loop);
+            }
+            _ => {}
+        }
+    }
+
+    fn about_to_wait(&mut self, event_loop: &ActiveEventLoop) {
+        let now = Instant::now();
+        if now >= self.done_at() {
+            event_loop.exit();
+            return;
+        }
+        let next = self.next_transition(now).min(self.done_at());
+        if let Some(window) = &self.window {
+            // Redraw slightly eagerly so the flip lands at (not after) the
+            // scheduled transition.
+            window.request_redraw();
+        }
+        event_loop.set_control_flow(ControlFlow::WaitUntil(next));
+    }
+}
+
+fn run_pattern(spec: PatternSpec) -> Result<PatternReport, String> {
+    #[allow(unused_mut)]
+    let mut builder = EventLoop::builder();
+    #[cfg(target_os = "macos")]
+    {
+        use winit::platform::macos::{ActivationPolicy, EventLoopBuilderExtMacOS};
+        builder
+            .with_activation_policy(ActivationPolicy::Regular)
+            .with_activate_ignoring_other_apps(true);
+    }
+    let mut event_loop = builder
+        .build()
+        .map_err(|e| format!("failed to create event loop: {e}"))?;
+
+    let run_start = Instant::now();
+    let epoch = run_start + spec.settle;
+
+    let (stream, beep_state) = build_beep_stream(epoch, &spec)?;
+    stream
+        .play()
+        .map_err(|e| format!("failed to start audio output: {e}"))?;
+
+    let mut app = PatternApp {
+        event_offsets: spec.event_offsets_secs(),
+        spec,
+        run_start,
+        epoch,
+        window: None,
+        surface: None,
+        size: (0, 0),
+        last_drawn_white: false,
+        flash_presents: Vec::new(),
+        aborted: Arc::new(AtomicBool::new(false)),
+        error: None,
+    };
+
+    event_loop
+        .run_app_on_demand(&mut app)
+        .map_err(|e| format!("event loop error: {e}"))?;
+
+    drop(stream);
+
+    if let Some(error) = app.error {
+        return Err(error);
+    }
+    if app.aborted.load(Ordering::Acquire) {
+        return Err("cancelled".to_string());
+    }
+
+    let latency = {
+        let acc = beep_state.latency_sum_ms.lock().unwrap();
+        (acc.1 > 0).then(|| acc.0 / acc.1 as f64)
+    };
+    let beep_outputs = beep_state.beep_outputs.lock().unwrap().clone();
+
+    Ok(PatternReport {
+        flash_presents: app.flash_presents,
+        beep_outputs,
+        audio_latency_ms: latency,
+    })
+}
diff --git a/apps/cli/src/selftest/playback.rs b/apps/cli/src/selftest/playback.rs
new file mode 100644
index 0000000000..44ec082931
--- /dev/null
+++ b/apps/cli/src/selftest/playback.rs
@@ -0,0 +1,846 @@
+//! `cap selftest playback` — verifies A/V sync of the editor's playback
+//! path: what the renderer presents vs what the audio output plays.
+//!
+//! The harness opens a flash+beep recording with the real editor machinery
+//! (`EditorInstance`: real decoders, real frame scheduling, real audio
+//! pipeline) and taps both presentation boundaries — the renderer's frame
+//! callback and a headless audio sink that pulls blocks on a device-like
+//! real-time schedule. Flash/beep onsets measured in those taps are compared
+//! against the same onsets measured in the recording's raw tracks; playback
+//! must reproduce the recording's sync within one frame and without drift.
+//!
+//! Without `--project` the fixture is generated through the real recording
+//! pipeline (the same channel-source path the sync matrix uses), so the test
+//! runs headless on CI where no capture hardware exists.
+
+use std::{
+    path::{Path, PathBuf},
+    sync::{Arc, Mutex},
+    time::{Duration, Instant},
+};
+
+use cap_editor::{EditorFrameOutput, EditorInstance, HEADLESS_CHANNELS, HEADLESS_SAMPLE_RATE};
+use cap_project::XY;
+use clap::Args;
+use serde::Serialize;
+
+use super::measure::{self, SyncMeasurement};
+
+/// Flash/beep schedule of the generated fixture. Mirrors the av-sync pattern:
+/// events every two seconds after a settle period, 120 ms flash+beep each.
+const FIXTURE_SETTLE_SECS: f64 = 2.0;
+const FIXTURE_PERIOD_SECS: f64 = 2.0;
+/// Longer than the live pattern's 120 ms so CI runners with slow virtualized
+/// GPUs still present at least one frame inside every flash window; onset
+/// detection is edge-triggered, so the extra length does not blur the onset.
+const FIXTURE_FLASH_SECS: f64 = 0.36;
+const FIXTURE_TAIL_SECS: f64 = 1.0;
+const FIXTURE_FPS: u32 = 30;
+const FIXTURE_WIDTH: u32 = 320;
+const FIXTURE_HEIGHT: u32 = 240;
+/// A video emission gap after the second event: the screen is static, no
+/// frames are captured, and playback/export must hold the last frame (the
+/// VFR hold path) without disturbing audio sync. Longer than the decoders'
+/// FRAME_CACHE_SIZE (90 frames = 3s at 30fps) so the pre-gap hold frame is
+/// guaranteed to face cache eviction while requests march through the hole —
+/// the regression class where post-gap content got served mid-hold. Events
+/// whose flashes fall inside the gap are still beeped; their unpaired beeps
+/// are rejected by the measurement's pairing window.
+const FIXTURE_GAP_START_SECS: f64 = FIXTURE_SETTLE_SECS + FIXTURE_PERIOD_SECS + 0.4;
+const FIXTURE_GAP_LEN_SECS: f64 = 4.2;
+/// A second, narrow gap (~30 frames) between the fifth and sixth events.
+/// Narrower than the decoders' cache read-ahead window, so it exercises the
+/// in-loop narrow-hole answer paths that the long gap's cache-bounds exit
+/// never reaches.
+const FIXTURE_GAP2_START_SECS: f64 = FIXTURE_SETTLE_SECS + 4.0 * FIXTURE_PERIOD_SECS + 0.4;
+const FIXTURE_GAP2_LEN_SECS: f64 = 1.0;
+
+/// The playback-vs-raw delta window is asymmetric because every presentation
+/// boundary in the harness shifts it the same way: video content appears at
+/// the first playback frame tick at-or-after its pts (0..1 frame late), the
+/// renderer adds its render latency, and the zero-latency headless sink
+/// consumes audio up to one block before the video clock starts. Audio can
+/// therefore legitimately read EARLY by up to a frame plus a block plus a
+/// render margin, but reading LATE (or early beyond that window) means the
+/// editor's playback mapping itself is off.
+const RENDER_MARGIN_MS: f64 = 35.0;
+const DELTA_LATE_TOLERANCE_MS: f64 = 15.0;
+/// Gated on the DIFFERENCE from the raw recording's drift: the fixture's own
+/// emission jitter shows up identically in both legs and must not count
+/// against playback.
+const PASS_TOTAL_DRIFT_MS: f64 = 40.0;
+const PASS_MAD_MS: f64 = 25.0;
+/// The export decodes the same tracks offline, so its sync must match the
+/// raw recording almost exactly (same budget as the av-sync selftest).
+const EXPORT_DELTA_TOLERANCE_MS: f64 = 25.0;
+/// Ceiling for waiting on playback to finish beyond the timeline duration.
+const PLAYBACK_EXTRA_TIMEOUT: Duration = Duration::from_secs(30);
+
+#[derive(Args)]
+pub struct PlaybackArgs {
+    /// Existing flash+beep .cap project to measure (defaults to generating a
+    /// synthetic fixture through the real recording pipeline)
+    #[arg(long)]
+    project: Option<PathBuf>,
+    /// Seconds of synthetic fixture pattern to generate
+    #[arg(long, default_value_t = 20)]
+    duration: u64,
+    /// Frame rate to drive editor playback at
+    #[arg(long, default_value_t = 30)]
+    fps: u32,
+    /// Skip exporting the project (tests only the playback stage)
+    #[arg(long)]
+    skip_export: bool,
+    /// Keep the generated fixture project on disk for inspection
+    #[arg(long)]
+    keep: bool,
+}
+
+#[derive(Serialize, Clone, Copy, PartialEq, Eq, Debug)]
+#[serde(rename_all = "lowercase")]
+enum Verdict {
+    Pass,
+    Fail,
+    Inconclusive,
+}
+
+#[derive(Serialize)]
+struct PlaybackReport {
+    verdict: Verdict,
+    summary: String,
+    /// Sync measured in the recording's raw tracks (ground truth).
+    raw: Option<SyncMeasurement>,
+    /// Sync measured at the editor playback presentation boundaries.
+    playback: Option<SyncMeasurement>,
+    /// Sync measured in an export of the same project.
+    export: Option<SyncMeasurement>,
+    /// playback median offset − raw median offset. Negative = audio early.
+    delta_ms: Option<f64>,
+    delta_early_tolerance_ms: f64,
+    delta_late_tolerance_ms: f64,
+    pass_total_drift_ms: f64,
+    frames_presented: usize,
+    project_path: Option<String>,
+}
+
+/// How early audio may legitimately read at the presentation taps: one video
+/// frame (content quantization) + one audio block (sink start quantization)
+/// + the render margin.
+fn delta_early_tolerance_ms(fps: u32) -> f64 {
+    1000.0 / f64::from(fps)
+        + 1000.0 * cap_editor::HEADLESS_BLOCK_FRAMES as f64 / f64::from(HEADLESS_SAMPLE_RATE)
+        + RENDER_MARGIN_MS
+}
+
+pub async fn run_playback(args: PlaybackArgs, json: bool) -> Result<(), String> {
+    ffmpeg::util::log::set_level(ffmpeg::util::log::Level::Quiet);
+
+    if !(1..=240).contains(&args.fps) {
+        return Err(format!("invalid playback fps: {}", args.fps));
+    }
+
+    let progress = |msg: &str| {
+        if !json {
+            eprintln!("{msg}");
+        }
+    };
+
+    let (project_path, generated) = match &args.project {
+        Some(path) => (path.clone(), false),
+        None => {
+            let path = std::env::temp_dir().join(format!(
+                "cap-selftest-playback-{}.cap",
+                uuid::Uuid::new_v4()
+            ));
+            // The floor guarantees enough events for measure_sync's minimum
+            // after the first event is dropped AND the two events whose
+            // flashes fall inside the video gap: 18s -> 9 events -> 7 visible
+            // -> 6 pairs.
+            let pattern_secs = args.duration.clamp(18, 120) as f64;
+            progress(&format!(
+                "[1/3] Generating synthetic flash+beep recording ({pattern_secs:.0}s, real-time)..."
+            ));
+            fixture::generate(&path, pattern_secs).await?;
+            (path, true)
+        }
+    };
+
+    progress("[2/3] Measuring the raw recording...");
+    let raw = super::analyze_raw(&project_path);
+
+    progress("[3/3] Playing back through the editor and measuring what it presents...");
+    let playback = measure_playback(&project_path, args.fps).await;
+
+    // The export drives the same decoders and timeline mapping as playback
+    // through the offline path; on CI this is the only place the export-side
+    // VFR gap handling is exercised at all.
+    let export = if args.skip_export {
+        Ok(None)
+    } else {
+        progress("Exporting and verifying the export...");
+        match crate::export::export_project_default(project_path.clone()).await {
+            Ok(output) => super::analyze_export(&output).map(Some),
+            Err(e) => Err(format!("export failed: {e}")),
+        }
+    };
+
+    let (verdict, summary, raw_m, playback_m, export_m, delta_ms, frames_presented) =
+        evaluate(&args, raw, playback, export);
+
+    let keep = args.keep || (generated && verdict != Verdict::Pass);
+    if generated {
+        if keep {
+            progress(&format!(
+                "Fixture project kept at {}",
+                project_path.display()
+            ));
+        } else {
+            let _ = std::fs::remove_dir_all(&project_path);
+        }
+    }
+
+    let report = PlaybackReport {
+        verdict,
+        summary: summary.clone(),
+        raw: raw_m,
+        playback: playback_m,
+        export: export_m,
+        delta_ms,
+        delta_early_tolerance_ms: delta_early_tolerance_ms(args.fps),
+        delta_late_tolerance_ms: DELTA_LATE_TOLERANCE_MS,
+        pass_total_drift_ms: PASS_TOTAL_DRIFT_MS,
+        frames_presented,
+        project_path: (keep || !generated).then(|| project_path.display().to_string()),
+    };
+
+    if json {
+        println!(
+            "{}",
+            serde_json::to_string_pretty(&report)
+                .map_err(|e| format!("failed to serialize report: {e}"))?
+        );
+    } else {
+        if let Some(m) = &report.raw {
+            println!(
+                "\nRecording: offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events",
+                m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events
+            );
+        }
+        if let Some(m) = &report.playback {
+            println!(
+                "Playback:  offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events",
+                m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events
+            );
+        }
+        if let Some(m) = &report.export {
+            println!(
+                "Export:    offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events",
+                m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events
+            );
+        }
+        let label = match verdict {
+            Verdict::Pass => "PASS",
+            Verdict::Fail => "FAIL",
+            Verdict::Inconclusive => "INCONCLUSIVE",
+        };
+        println!("\nResult: {label} — {summary}");
+    }
+
+    match verdict {
+        Verdict::Pass => Ok(()),
+        Verdict::Fail => Err(format!("editor playback sync check failed: {summary}")),
+        Verdict::Inconclusive => Err(format!(
+            "editor playback sync check inconclusive: {summary}"
+        )),
+    }
+}
+
+#[allow(clippy::type_complexity)]
+fn evaluate(
+    args: &PlaybackArgs,
+    raw: Result<(SyncMeasurement, f64), String>,
+    playback: Result<(SyncMeasurement, usize), String>,
+    export: Result<Option<(SyncMeasurement, f64)>, String>,
+) -> (
+    Verdict,
+    String,
+    Option<SyncMeasurement>,
+    Option<SyncMeasurement>,
+    Option<SyncMeasurement>,
+    Option<f64>,
+    usize,
+) {
+    let (raw_m, _snr) = match raw {
+        Ok(v) => v,
+        Err(reason) => {
+            return (
+                Verdict::Inconclusive,
+                format!("could not measure the raw recording: {reason}"),
+                None,
+                None,
+                None,
+                None,
+                0,
+            );
+        }
+    };
+
+    let (playback_m, frames_presented) = match playback {
+        Ok(v) => v,
+        Err(reason) => {
+            return (
+                Verdict::Fail,
+                format!("editor playback could not be measured: {reason}"),
+                Some(raw_m),
+                None,
+                None,
+                None,
+                0,
+            );
+        }
+    };
+
+    let export_m = match export {
+        Ok(v) => v.map(|(m, _)| m),
+        Err(reason) => {
+            // A project that plays back but cannot be exported is a hard
+            // failure: the export path is part of the product.
+            return (
+                Verdict::Fail,
+                reason,
+                Some(raw_m),
+                Some(playback_m),
+                None,
+                None,
+                frames_presented,
+            );
+        }
+    };
+
+    let delta = playback_m.median_offset_ms - raw_m.median_offset_ms;
+    let early_tolerance = delta_early_tolerance_ms(args.fps);
+
+    let mut reasons = Vec::new();
+    if delta < -early_tolerance || delta > DELTA_LATE_TOLERANCE_MS {
+        reasons.push(format!(
+            "playback shifts sync by {delta:+.0} ms vs the recording \
+             (allowed -{early_tolerance:.0}..+{DELTA_LATE_TOLERANCE_MS:.0} ms)"
+        ));
+    }
+    let drift_delta = playback_m.total_drift_ms - raw_m.total_drift_ms;
+    if drift_delta.abs() > PASS_TOTAL_DRIFT_MS {
+        reasons.push(format!(
+            "playback adds {drift_delta:+.0} ms of drift over {:.0}s vs the recording",
+            playback_m.span_secs
+        ));
+    }
+    if playback_m.mad_ms > PASS_MAD_MS {
+        reasons.push(format!(
+            "playback offsets are unstable (spread ±{:.0} ms)",
+            playback_m.mad_ms
+        ));
+    }
+    if let Some(export_m) = &export_m {
+        let export_delta = (export_m.median_offset_ms - raw_m.median_offset_ms).abs();
+        if export_delta > EXPORT_DELTA_TOLERANCE_MS {
+            reasons.push(format!(
+                "export changes sync by {export_delta:.0} ms vs the recording"
+            ));
+        }
+        let export_drift_delta = export_m.total_drift_ms - raw_m.total_drift_ms;
+        if export_drift_delta.abs() > PASS_TOTAL_DRIFT_MS {
+            reasons.push(format!(
+                "export adds {export_drift_delta:+.0} ms of drift over {:.0}s vs the recording",
+                export_m.span_secs
+            ));
+        }
+    }
+
+    let verdict = if reasons.is_empty() {
+        Verdict::Pass
+    } else {
+        Verdict::Fail
+    };
+    let summary = if reasons.is_empty() {
+        format!(
+            "editor playback preserves sync (playback {:+.0} ms vs recording {:+.0} ms, drift {:+.0} ms)",
+            playback_m.median_offset_ms, raw_m.median_offset_ms, playback_m.total_drift_ms
+        )
+    } else {
+        reasons.join("; ")
+    };
+
+    (
+        verdict,
+        summary,
+        Some(raw_m),
+        Some(playback_m),
+        export_m,
+        Some(delta),
+        frames_presented,
+    )
+}
+
+/// Drives the editor's real playback over the project and measures
+/// flash-vs-beep alignment in what it presents. Returns the measurement and
+/// the number of frames the renderer actually presented.
+async fn measure_playback(
+    project_path: &Path,
+    fps: u32,
+) -> Result<(SyncMeasurement, usize), String> {
+    // Wall-clock epoch shared by both presentation taps.
+    let epoch = Instant::now();
+
+    let video_events: Arc<Mutex<Vec<(f64, f64)>>> = Arc::new(Mutex::new(Vec::new()));
+    let frame_cb: Box<dyn FnMut(EditorFrameOutput) + Send> = Box::new({
+        let video_events = video_events.clone();
+        move |output| {
+            let now = Instant::now();
+            if let EditorFrameOutput::Rgba(frame) = output {
+                if let Some(luma) = mean_center_luma_rgba(
+                    &frame.data,
+                    frame.width,
+                    frame.height,
+                    frame.padded_bytes_per_row,
+                ) && let Ok(mut events) = video_events.lock()
+                {
+                    events.push((now.duration_since(epoch).as_secs_f64(), luma));
+                }
+            }
+        }
+    });
+
+    struct AudioTapState {
+        base_secs: Option<f64>,
+        mono: Vec<f32>,
+    }
+    let audio_tap_state = Arc::new(Mutex::new(AudioTapState {
+        base_secs: None,
+        mono: Vec::new(),
+    }));
+    let audio_tap: cap_editor::HeadlessAudioTap = Box::new({
+        let state = audio_tap_state.clone();
+        move |block: &[f32], deadline: Instant| {
+            let Ok(mut state) = state.lock() else {
+                return;
+            };
+            if state.base_secs.is_none() {
+                // The pump's schedule is absolute, so the first block deadline
+                // anchors an exact sample-index -> wall-time mapping.
+                state.base_secs = Some(
+                    deadline
+                        .checked_duration_since(epoch)
+                        .map(|d| d.as_secs_f64())
+                        .unwrap_or_else(|| -epoch.duration_since(deadline).as_secs_f64()),
+                );
+            }
+            for frame in block.chunks_exact(usize::from(HEADLESS_CHANNELS)) {
+                state
+                    .mono
+                    .push(frame.iter().sum::<f32>() / frame.len() as f32);
+            }
+        }
+    });
+
+    let audio_output = Arc::new(cap_editor::AudioOutput::new_headless(audio_tap));
+
+    let instance = EditorInstance::new_with_audio_output(
+        project_path.to_path_buf(),
+        |_| {},
+        frame_cb,
+        None,
+        audio_output,
+    )
+    .await
+    .map_err(|e| format!("failed to open the project in the editor: {e}"))?;
+
+    let resolution_base = {
+        let display = &instance.recordings.segments[0].display;
+        XY::new(display.width, display.height)
+    };
+
+    let total_frames = instance.get_total_frames(fps);
+    let expected_duration = Duration::from_secs_f64(f64::from(total_frames) / f64::from(fps));
+
+    instance.start_playback(fps, resolution_base).await;
+
+    let mut handle = instance
+        .state
+        .lock()
+        .await
+        .playback_task
+        .clone()
+        .ok_or("editor playback did not start")?;
+
+    let wait = tokio::time::timeout(expected_duration + PLAYBACK_EXTRA_TIMEOUT, async {
+        loop {
+            let event = *handle.receive_event().await;
+            if matches!(event, cap_editor::PlaybackEvent::Stop) {
+                break;
+            }
+        }
+    })
+    .await;
+
+    instance.dispose().await;
+
+    if wait.is_err() {
+        return Err(format!(
+            "playback did not finish within {:?}",
+            expected_duration + PLAYBACK_EXTRA_TIMEOUT
+        ));
+    }
+
+    let video_samples = video_events
+        .lock()
+        .map_err(|_| "video tap poisoned".to_string())?
+        .clone();
+    let frames_presented = video_samples.len();
+    let (audio_base_secs, mono) = {
+        let mut state = audio_tap_state
+            .lock()
+            .map_err(|_| "audio tap poisoned".to_string())?;
+        (
+            state.base_secs.unwrap_or(0.0),
+            std::mem::take(&mut state.mono),
+        )
+    };
+
+    let flashes = measure::flash_onsets_from_luma(&video_samples)
+        .map_err(|e| format!("playback video ({frames_presented} frames presented): {e}"))?;
+    let audio = measure::beep_onsets_from_mono(mono, HEADLESS_SAMPLE_RATE)
+        .map_err(|e| format!("playback audio: {e}"))?;
+    let beeps: Vec<f64> = audio.onsets.iter().map(|t| t + audio_base_secs).collect();
+
+    measure::measure_sync(&flashes, &beeps, super::MIN_EVENTS)
+        .map(|m| (m, frames_presented))
+        .map_err(|e| format!("playback pairing ({frames_presented} frames presented): {e}"))
+}
+
+/// Mean luma over the center crop of an RGBA/BGRA presentation frame.
+/// Channel order doesn't matter for the black/white test pattern.
+fn mean_center_luma_rgba(
+    data: &[u8],
+    width: u32,
+    height: u32,
+    padded_bytes_per_row: u32,
+) -> Option<f64> {
+    let width = width as usize;
+    let height = height as usize;
+    let stride = padded_bytes_per_row as usize;
+    if width == 0 || height == 0 || stride < width * 4 || data.len() < stride * height {
+        return None;
+    }
+
+    let x0 = width / 4;
+    let x1 = width * 3 / 4;
+    let y0 = height / 4;
+    let y1 = height * 3 / 4;
+
+    let mut sum = 0u64;
+    let mut count = 0u64;
+    let mut y = y0;
+    while y < y1 {
+        let row = &data[y * stride..y * stride + width * 4];
+        let mut x = x0;
+        while x < x1 {
+            let px = &row[x * 4..x * 4 + 3];
+            sum += u64::from(px[0]) + u64::from(px[1]) + u64::from(px[2]);
+            count += 3;
+            x += 4;
+        }
+        y += 4;
+    }
+    (count > 0).then(|| sum as f64 / count as f64)
+}
+
+/// Generates a real `.cap` studio project containing a flash+beep pattern by
+/// driving the production recording pipeline with synthetic sources — the
+/// same real-time channel-source path the sync matrix uses. Only the media
+/// origin is synthetic; encoding, muxing and metadata are the real product
+/// code paths.
+mod fixture {
+    use std::{path::Path, time::Duration};
+
+    use cap_media_info::{AudioInfo, RawVideoFormat, Sample, Type, VideoInfo};
+    use cap_project::{
+        AudioMeta, ClipConfiguration, MultipleSegment, MultipleSegments, Platform,
+        ProjectConfiguration, RecordingMeta, RecordingMetaInner, StudioRecordingMeta,
+        StudioRecordingStatus, TimelineConfiguration, TimelineSegment, VideoMeta,
+    };
+    use cap_recording::{
+        AudioFrame, ChannelAudioSource, ChannelAudioSourceConfig, ChannelVideoSource,
+        ChannelVideoSourceConfig, OutputPipeline,
+        ffmpeg::{FFmpegVideoFrame, Mp4Muxer, OggMuxer},
+    };
+    use cap_timestamp::{Timestamp, Timestamps};
+    use relative_path::RelativePathBuf;
+
+    use super::{
+        FIXTURE_FLASH_SECS, FIXTURE_FPS, FIXTURE_GAP_LEN_SECS, FIXTURE_GAP_START_SECS,
+        FIXTURE_GAP2_LEN_SECS, FIXTURE_GAP2_START_SECS, FIXTURE_HEIGHT, FIXTURE_PERIOD_SECS,
+        FIXTURE_SETTLE_SECS, FIXTURE_TAIL_SECS, FIXTURE_WIDTH,
+    };
+
+    const AUDIO_RATE: u32 = 48_000;
+    const AUDIO_CHUNK_SECS: f64 = 0.02;
+    const BEEP_FREQ: f32 = 1_000.0;
+    const BEEP_AMPLITUDE: f32 = 0.5;
+
+    struct Pattern {
+        events: Vec<f64>,
+        total_secs: f64,
+    }
+
+    fn pattern(pattern_secs: f64) -> Pattern {
+        let events = ((pattern_secs / FIXTURE_PERIOD_SECS) as u32).max(4);
+        // Same deterministic anti-aliasing jitter as the live pattern window
+        // (PatternSpec::event_offsets_secs): a perfectly periodic schedule
+        // would let a one-period A/V shift alias to a zero measured offset.
+        let events: Vec<f64> = (0..events)
+            .map(|k| {
+                let jitter = (u64::from(k).wrapping_mul(2_654_435_761) % 601) as f64 / 1000.0 - 0.3;
+                FIXTURE_SETTLE_SECS + (f64::from(k) * FIXTURE_PERIOD_SECS + jitter).max(0.0)
+            })
+            .collect();
+        let total_secs =
+            events.last().copied().unwrap_or(0.0) + FIXTURE_FLASH_SECS + FIXTURE_TAIL_SECS;
+        Pattern { events, total_secs }
+    }
+
+    fn in_flash(events: &[f64], t: f64) -> bool {
+        events.iter().any(|&e| t >= e && t < e + FIXTURE_FLASH_SECS)
+    }
+
+    fn in_video_gap(t: f64) -> bool {
+        (FIXTURE_GAP_START_SECS..FIXTURE_GAP_START_SECS + FIXTURE_GAP_LEN_SECS).contains(&t)
+            || (FIXTURE_GAP2_START_SECS..FIXTURE_GAP2_START_SECS + FIXTURE_GAP2_LEN_SECS)
+                .contains(&t)
+    }
+
+    pub async fn generate(project_dir: &Path, pattern_secs: f64) -> Result<(), String> {
+        let pattern = pattern(pattern_secs);
+
+        let segment_dir = project_dir.join("content/segments/segment-0");
+        std::fs::create_dir_all(&segment_dir)
+            .map_err(|e| format!("failed to create fixture directories: {e}"))?;
+        let display_path = segment_dir.join("display.mp4");
+        let audio_path = segment_dir.join("system_audio.ogg");
+
+        let timestamps = Timestamps::now();
+
+        // Video leg: black frames with white flashes; nothing is emitted
+        // inside the gap window, like a static screen under VFR capture.
+        let video_info = VideoInfo::from_raw(
+            RawVideoFormat::Bgra,
+            FIXTURE_WIDTH,
+            FIXTURE_HEIGHT,
+            FIXTURE_FPS,
+        );
+        let (video_tx, video_rx) = flume::bounded::<FFmpegVideoFrame>(32);
+        let video_emit = {
+            let events = pattern.events.clone();
+            let total_secs = pattern.total_secs;
+            let base = timestamps.instant();
+            tokio::spawn(async move {
+                let period = 1.0 / f64::from(FIXTURE_FPS);
+                let frame_count = (total_secs * f64::from(FIXTURE_FPS)) as u64;
+                for k in 0..frame_count {
+                    let t = k as f64 * period;
+                    if in_video_gap(t) {
+                        continue;
+                    }
+                    tokio::time::sleep_until((base + Duration::from_secs_f64(t)).into()).await;
+                    let mut frame = ffmpeg::frame::Video::new(
+                        ffmpeg::format::Pixel::BGRA,
+                        FIXTURE_WIDTH,
+                        FIXTURE_HEIGHT,
+                    );
+                    let shade = if in_flash(&events, t) { 0xFF } else { 0x00 };
+                    frame.data_mut(0).fill(shade);
+                    let frame = FFmpegVideoFrame {
+                        inner: frame,
+                        timestamp: Timestamp::Instant(base + Duration::from_secs_f64(t)),
+                    };
+                    if video_tx.send_async(frame).await.is_err() {
+                        break;
+                    }
+                }
+            })
+        };
+
+        // Audio leg: silence with 1 kHz beep bursts aligned to the flashes.
+        let audio_info = AudioInfo::new(Sample::F32(Type::Packed), AUDIO_RATE, 2)
+            .map_err(|e| format!("audio info: {e:?}"))?;
+        let (audio_tx, audio_rx) = futures::channel::mpsc::channel::<AudioFrame>(32);
+        let audio_emit = {
+            let events = pattern.events.clone();
+            let total_secs = pattern.total_secs;
+            let base = timestamps.instant();
+            let mut tx = audio_tx;
+            let info = audio_info;
+            tokio::spawn(async move {
+                use futures::SinkExt;
+                let chunk_frames = (f64::from(AUDIO_RATE) * AUDIO_CHUNK_SECS) as usize;
+                let total_chunks = (total_secs / AUDIO_CHUNK_SECS).ceil() as usize;
+                for k in 0..total_chunks {
+                    let chunk_t = k as f64 * AUDIO_CHUNK_SECS;
+                    tokio::time::sleep_until((base + Duration::from_secs_f64(chunk_t)).into())
+                        .await;
+                    let mut frame = ffmpeg::frame::Audio::new(
+                        ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed),
+                        chunk_frames,
+                        info.channel_layout(),
+                    );
+                    frame.set_rate(AUDIO_RATE);
+                    let data = frame.data_mut(0);
+                    let samples = unsafe {
+                        std::slice::from_raw_parts_mut(
+                            data.as_mut_ptr().cast::<f32>(),
+                            data.len() / 4,
+                        )
+                    };
+                    for (i, sample) in samples.iter_mut().enumerate() {
+                        let n = (k * chunk_frames + i / 2) as f64;
+                        let t = n / f64::from(AUDIO_RATE);
+                        *sample = if in_flash(&events, t) {
+                            (t as f32 * BEEP_FREQ * 2.0 * std::f32::consts::PI).sin()
+                                * BEEP_AMPLITUDE
+                        } else {
+                            0.0
+                        };
+                    }
+                    let frame = AudioFrame::new(
+                        frame,
+                        Timestamp::Instant(base + Duration::from_secs_f64(chunk_t)),
+                    );
+                    if tx.send(frame).await.is_err() {
+                        break;
+                    }
+                }
+            })
+        };
+
+        let video_pipeline = OutputPipeline::builder(display_path.clone())
+            .with_video::<ChannelVideoSource<FFmpegVideoFrame>>(ChannelVideoSourceConfig::new(
+                video_info, video_rx,
+            ))
+            .with_timestamps(timestamps)
+            .build::<Mp4Muxer>(())
+            .await
+            .map_err(|e| format!("video pipeline: {e}"))?;
+        let audio_pipeline = OutputPipeline::builder(audio_path.clone())
+            .with_audio_source::<ChannelAudioSource>(ChannelAudioSourceConfig::new(
+                audio_info, audio_rx,
+            ))
+            .with_timestamps(timestamps)
+            .build::<OggMuxer>(())
+            .await
+            .map_err(|e| format!("audio pipeline: {e}"))?;
+
+        video_emit
+            .await
+            .map_err(|e| format!("video emit join: {e}"))?;
+        audio_emit
+            .await
+            .map_err(|e| format!("audio emit join: {e}"))?;
+        // Let the stream tails flush through the encoders.
+        tokio::time::sleep(Duration::from_millis(500)).await;
+
+        let finished_video = video_pipeline
+            .stop()
+            .await
+            .map_err(|e| format!("video pipeline stop: {e}"))?;
+        let finished_audio = audio_pipeline
+            .stop()
+            .await
+            .map_err(|e| format!("audio pipeline stop: {e}"))?;
+
+        // Persist metadata the way the studio recorder does: start times are
+        // each track's first timestamp on the shared clock, and the timeline
+        // covers the real muxed video span.
+        let display_start = finished_video
+            .first_timestamp
+            .signed_duration_since_secs(timestamps);
+        let audio_start = finished_audio
+            .first_timestamp
+            .signed_duration_since_secs(timestamps);
+        let display_duration = finished_video
+            .video_timestamp_span
+            .map(|(first, last)| (last - first).as_secs_f64() + 1.0 / f64::from(FIXTURE_FPS))
+            .ok_or("fixture video reported no timestamp span")?;
+
+        let meta = StudioRecordingMeta::MultipleSegments {
+            inner: MultipleSegments {
+                segments: vec![MultipleSegment {
+                    display: VideoMeta {
+                        path: RelativePathBuf::from("content/segments/segment-0/display.mp4"),
+                        fps: FIXTURE_FPS,
+                        start_time: Some(display_start),
+                        device_id: None,
+                    },
+                    camera: None,
+                    mic: None,
+                    system_audio: Some(AudioMeta {
+                        path: RelativePathBuf::from("content/segments/segment-0/system_audio.ogg"),
+                        start_time: Some(audio_start),
+                        device_id: None,
+                        gap_summary: finished_audio.audio_gap_summary.map(|s| {
+                            cap_project::AudioGapSummary {
+                                total_overlap_trimmed_ms: s.total_overlap_trimmed_ms,
+                                startup_overlap_trimmed_ms: s.startup_overlap_trimmed_ms,
+                                overlap_dropped_frames: s.overlap_dropped_frames,
+                                startup_overlap_drops: s.startup_overlap_drops,
+                            }
+                        }),
+                    }),
+                    cursor: None,
+                    keyboard: None,
+                }],
+                cursors: Default::default(),
+                status: Some(StudioRecordingStatus::Complete),
+            },
+        };
+
+        let recording_meta = RecordingMeta {
+            platform: Some(Platform::default()),
+            project_path: project_dir.to_path_buf(),
+            pretty_name: "Cap Playback Selftest Fixture".to_string(),
+            sharing: None,
+            inner: RecordingMetaInner::Studio(Box::new(meta)),
+            upload: None,
+        };
+        recording_meta
+            .save_for_project()
+            .map_err(|e| format!("failed to write recording meta: {e:?}"))?;
+
+        let project_config = ProjectConfiguration {
+            timeline: Some(TimelineConfiguration {
+                segments: vec![TimelineSegment {
+                    recording_clip: 0,
+                    start: 0.0,
+                    end: display_duration,
+                    timescale: 1.0,
+                    name: None,
+                }],
+                zoom_segments: Vec::new(),
+                scene_segments: Vec::new(),
+                mask_segments: Vec::new(),
+                text_segments: Vec::new(),
+                caption_segments: Vec::new(),
+                keyboard_segments: Vec::new(),
+                audio_segments: Vec::new(),
+            }),
+            clips: vec![ClipConfiguration {
+                index: 0,
+                offsets: Default::default(),
+            }],
+            ..Default::default()
+        };
+        project_config
+            .write(project_dir)
+            .map_err(|e| format!("failed to write project config: {e}"))?;
+
+        Ok(())
+    }
+}
diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs
index 61b3006196..2e30e93847 100644
--- a/apps/desktop/src-tauri/src/lib.rs
+++ b/apps/desktop/src-tauri/src/lib.rs
@@ -115,9 +115,11 @@ use tracing::*;
 use upload::{create_or_get_video, upload_screenshot_bytes, upload_screenshot_file, upload_video};
 use web_api::AuthedApiError;
 use web_api::ManagerExt as WebManagerExt;
+#[cfg(target_os = "macos")]
+use windows::hide_overlay;
 use windows::{
     CapWindowId, EditorRecordingTarget, EditorWindowIds, ScreenshotEditorWindowIds, ShowCapWindow,
-    hide_overlay, set_window_transparent, show_overlay,
+    set_window_transparent, show_overlay,
 };
 
 use crate::{recording::start_recording, upload::build_video_meta};
diff --git a/apps/desktop/src-tauri/src/logging.rs b/apps/desktop/src-tauri/src/logging.rs
index 64c690c3ef..f70d042c83 100644
--- a/apps/desktop/src-tauri/src/logging.rs
+++ b/apps/desktop/src-tauri/src/logging.rs
@@ -211,7 +211,7 @@ pub async fn upload_log_file(app: &AppHandle) -> Result<(), String> {
         .path()
         .app_data_dir()
         .map_err(|e| format!("Failed to get app data dir: {e}"))?;
-    let recordings_dir = GeneralSettingsStore::recordings_dir(&app);
+    let recordings_dir = GeneralSettingsStore::recordings_dir(app);
 
     let is_recording = {
         let app_lock = app.state::<ArcLock<crate::App>>();
diff --git a/crates/editor/src/audio_output.rs b/crates/editor/src/audio_output.rs
index 080ffdc030..655003ecb6 100644
--- a/crates/editor/src/audio_output.rs
+++ b/crates/editor/src/audio_output.rs
@@ -13,10 +13,10 @@ use std::{
         atomic::{AtomicBool, AtomicU64, Ordering},
         mpsc as std_mpsc,
     },
-    time::Duration,
+    time::{Duration, Instant},
 };
 
-use cap_audio::FromSampleBytes;
+use cap_audio::{AudioData, FromSampleBytes};
 #[cfg(not(target_os = "windows"))]
 use cap_audio::{LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint};
 use cap_media_info::AudioInfo;
@@ -77,6 +77,18 @@ impl Default for AudioOutput {
     }
 }
 
+/// Sample rate of the headless sink; matches the pipeline's master clock.
+pub const HEADLESS_SAMPLE_RATE: u32 = 48_000;
+/// Channel count of the headless sink.
+pub const HEADLESS_CHANNELS: u16 = 2;
+/// Frames per pulled block in the headless sink (a typical device period).
+pub const HEADLESS_BLOCK_FRAMES: usize = 512;
+
+/// Receives every interleaved f32 block the headless sink pulls, together
+/// with the deadline at which a real output device would start playing the
+/// block's first sample.
+pub type HeadlessAudioTap = Box<dyn FnMut(&[f32], Instant) + Send>;
+
 impl AudioOutput {
     pub fn new() -> Self {
         let (control_tx, control_rx) = std_mpsc::channel();
@@ -96,6 +108,27 @@ impl AudioOutput {
         }
     }
 
+    /// An output that renders into `tap` instead of a device, pulling blocks
+    /// on a real-time schedule the way a sound card would. Runs the exact
+    /// production source pipeline (pre-render buffer, playhead sync policy),
+    /// so sync harnesses can observe what a device would have played without
+    /// needing audio hardware.
+    pub fn new_headless(tap: HeadlessAudioTap) -> Self {
+        let (control_tx, control_rx) = std_mpsc::channel();
+
+        if let Err(e) = std::thread::Builder::new()
+            .name("cap-audio-headless".into())
+            .spawn(move || control_thread_headless(control_rx, tap))
+        {
+            error!("Failed to spawn headless audio output thread: {e}");
+        }
+
+        Self {
+            control_tx,
+            next_generation: AtomicU64::new(0),
+        }
+    }
+
     /// Opens the output stream ahead of the first play so even the first
     /// press doesn't wait on the device (Bluetooth wake, etc.). Non-blocking.
     pub fn prewarm(&self) {
@@ -210,6 +243,239 @@ fn control_thread(control_rx: std_mpsc::Receiver<ControlMsg>) {
     info!("Audio output thread finished");
 }
 
+/// Applies pending install/remove commands to the active source. Shared by
+/// the live cpal callback and the headless sink.
+fn drain_source_commands<T: FromSampleBytes>(
+    active: &mut Option<ActiveSource<T>>,
+    source_rx: &std_mpsc::Receiver<SourceCommand<T>>,
+) {
+    while let Ok(command) = source_rx.try_recv() {
+        match command {
+            SourceCommand::Install(source) => *active = Some(*source),
+            SourceCommand::Remove { generation } => {
+                let matches = generation.is_none()
+                    || active
+                        .as_ref()
+                        .map(|s| Some(s.generation) == generation)
+                        .unwrap_or(false);
+                if matches {
+                    *active = None;
+                }
+            }
+        }
+    }
+}
+
+/// Renders one output block from the active source: applies the video
+/// playhead sync policy, fills the buffer and acknowledges the first
+/// consumed block. Shared by the live cpal callback and the headless sink so
+/// harnesses exercise the exact production logic.
+fn render_source_block<T: FromSampleBytes + cpal::FromSample<f32>>(
+    source: &mut ActiveSource<T>,
+    buffer: &mut [T],
+    latency_secs: f64,
+) {
+    if source.playhead_rx.has_changed().unwrap_or(false) {
+        let video_playhead = *source.playhead_rx.borrow_and_update();
+        let jump = (video_playhead - source.last_video_playhead).abs();
+        let audible_playhead = source.buffer.current_audible_playhead(latency_secs);
+        let drift = (video_playhead - audible_playhead).abs();
+
+        if jump > 0.05 || drift > 0.04 {
+            source.buffer.set_playhead(video_playhead + latency_secs);
+        }
+
+        source.last_video_playhead = video_playhead;
+    }
+
+    source.buffer.fill(buffer);
+
+    if let Some(ack) = source.ack.take() {
+        let _ = ack.send(());
+    }
+}
+
+/// Builds the per-playback source from a play spec and hands it to the
+/// output via `install_tx`. `use_device_latency_hint` is false for the
+/// headless sink, which models a zero-latency device.
+fn install_source<T: FromSampleBytes + cpal::FromSample<f32>>(
+    spec: Box<PlaySpec>,
+    generation: u64,
+    ack: std_mpsc::Sender<()>,
+    output_info: AudioInfo,
+    use_device_latency_hint: bool,
+    install_tx: &std_mpsc::Sender<SourceCommand<T>>,
+) -> Result<(), String> {
+    let PlaySpec {
+        segments,
+        music,
+        project,
+        duration_secs,
+        start_playhead_secs,
+        playhead_rx,
+    } = *spec;
+
+    if !(duration_secs.is_finite() && duration_secs > 0.0) {
+        return Err(format!(
+            "Invalid audio pre-render duration: {duration_secs}"
+        ));
+    }
+
+    #[cfg(not(target_os = "windows"))]
+    let latency_corrector = {
+        let hint = if use_device_latency_hint {
+            default_output_latency_hint(output_info.sample_rate, output_info.buffer_size)
+        } else {
+            None
+        };
+        if let Some(hint) = hint
+            && hint.latency_secs > 0.0
+        {
+            if hint.transport.is_wireless() {
+                info!(
+                    "Applying wireless audio output latency hint: {:.1} ms",
+                    hint.latency_secs * 1_000.0
+                );
+            } else {
+                info!(
+                    "Applying audio output latency hint: {:.1} ms",
+                    hint.latency_secs * 1_000.0
+                );
+            }
+        }
+        LatencyCorrector::new(hint, LatencyCorrectionConfig::default())
+    };
+    #[cfg(not(target_os = "windows"))]
+    let initial_latency_secs = latency_corrector.initial_output_latency_secs();
+    #[cfg(target_os = "windows")]
+    let initial_latency_secs = {
+        let _ = use_device_latency_hint;
+        0.0
+    };
+
+    let start_playhead = start_playhead_secs + initial_latency_secs;
+    let mut buffer = PrerenderedAudioBuffer::<T>::new(
+        segments,
+        music,
+        &project,
+        output_info,
+        duration_secs,
+        start_playhead,
+    );
+    buffer.set_playhead(start_playhead);
+    // A few ms: guarantees the callback reads real samples at the
+    // playhead, never leading silence.
+    buffer.wait_until_ready(PRERENDER_READY_TIMEOUT);
+
+    install_tx
+        .send(SourceCommand::Install(Box::new(ActiveSource {
+            generation,
+            buffer,
+            playhead_rx,
+            last_video_playhead: start_playhead_secs,
+            ack: Some(ack),
+            #[cfg(not(target_os = "windows"))]
+            latency_corrector,
+        })))
+        .map_err(|_| "Audio callback channel closed".to_string())
+}
+
+/// Control loop for the headless sink: a pump thread pulls blocks on a
+/// real-time schedule (as a device would) and hands every block to `tap`.
+fn control_thread_headless(control_rx: std_mpsc::Receiver<ControlMsg>, mut tap: HeadlessAudioTap) {
+    let output_info = AudioInfo::new_raw(
+        AudioData::SAMPLE_FORMAT,
+        HEADLESS_SAMPLE_RATE,
+        HEADLESS_CHANNELS,
+    );
+
+    let (source_tx, source_rx) = std_mpsc::channel::<SourceCommand<f32>>();
+    let stop = Arc::new(AtomicBool::new(false));
+
+    let pump = {
+        let stop = stop.clone();
+        let channels = usize::from(HEADLESS_CHANNELS);
+        std::thread::Builder::new()
+            .name("cap-audio-headless-pump".into())
+            .spawn(move || {
+                let mut buffer = vec![0.0f32; HEADLESS_BLOCK_FRAMES * channels];
+                let mut active: Option<ActiveSource<f32>> = None;
+                let block = Duration::from_secs_f64(
+                    HEADLESS_BLOCK_FRAMES as f64 / f64::from(HEADLESS_SAMPLE_RATE),
+                );
+                let start = Instant::now();
+                let mut n: u32 = 0;
+
+                while !stop.load(Ordering::Acquire) {
+                    // Absolute schedule: a device consumes samples isochronously,
+                    // so late wakeups must not stretch the sample clock.
+                    let deadline = start + block * n;
+                    let now = Instant::now();
+                    if deadline > now {
+                        std::thread::sleep(deadline - now);
+                    }
+
+                    drain_source_commands(&mut active, &source_rx);
+                    match active.as_mut() {
+                        Some(source) => render_source_block(source, &mut buffer, 0.0),
+                        None => buffer.fill(0.0),
+                    }
+                    tap(&buffer, deadline);
+                    n = n.saturating_add(1);
+                }
+            })
+    };
+    let pump = match pump {
+        Ok(handle) => Some(handle),
+        Err(e) => {
+            error!("Failed to spawn headless audio pump: {e}");
+            None
+        }
+    };
+
+    while let Ok(msg) = control_rx.recv() {
+        match msg {
+            ControlMsg::EnsureStream => {}
+            ControlMsg::Play {
+                spec,
+                generation,
+                result_tx,
+            } => {
+                let (ack_tx, ack_rx) = std_mpsc::channel();
+                let ok = pump.is_some()
+                    && match install_source::<f32>(
+                        spec,
+                        generation,
+                        ack_tx,
+                        output_info,
+                        false,
+                        &source_tx,
+                    ) {
+                        Ok(()) => ack_rx.recv_timeout(SOURCE_ACK_TIMEOUT).is_ok(),
+                        Err(e) => {
+                            error!("Failed to install headless audio source: {e}");
+                            false
+                        }
+                    };
+                let _ = result_tx.send(ok);
+            }
+            ControlMsg::StopPlayback { generation } => {
+                let _ = source_tx.send(SourceCommand::Remove {
+                    generation: Some(generation),
+                });
+            }
+            ControlMsg::Shutdown => break,
+        }
+    }
+
+    stop.store(true, Ordering::Release);
+    if let Some(pump) = pump {
+        let _ = pump.join();
+    }
+
+    info!("Headless audio output thread finished");
+}
+
 fn handle_play(state: &mut Option<StreamState>, spec: Box<PlaySpec>, generation: u64) -> bool {
     if !ensure_stream(state) {
         return false;
@@ -318,21 +584,7 @@ where
         .build_output_stream(
             &config,
             move |buffer: &mut [T], info| {
-                while let Ok(command) = source_rx.try_recv() {
-                    match command {
-                        SourceCommand::Install(source) => active = Some(*source),
-                        SourceCommand::Remove { generation } => {
-                            let matches = generation.is_none()
-                                || active
-                                    .as_ref()
-                                    .map(|s| Some(s.generation) == generation)
-                                    .unwrap_or(false);
-                            if matches {
-                                active = None;
-                            }
-                        }
-                    }
-                }
+                drain_source_commands(&mut active, &source_rx);
 
                 let Some(source) = active.as_mut() else {
                     buffer.fill(T::EQUILIBRIUM);
@@ -347,24 +599,7 @@ where
                     0.0
                 };
 
-                if source.playhead_rx.has_changed().unwrap_or(false) {
-                    let video_playhead = *source.playhead_rx.borrow_and_update();
-                    let jump = (video_playhead - source.last_video_playhead).abs();
-                    let audible_playhead = source.buffer.current_audible_playhead(latency_secs);
-                    let drift = (video_playhead - audible_playhead).abs();
-
-                    if jump > 0.05 || drift > 0.04 {
-                        source.buffer.set_playhead(video_playhead + latency_secs);
-                    }
-
-                    source.last_video_playhead = video_playhead;
-                }
-
-                source.buffer.fill(buffer);
-
-                if let Some(ack) = source.ack.take() {
-                    let _ = ack.send(());
-                }
+                render_source_block(source, buffer, latency_secs);
             },
             {
                 let failed = failed.clone();
@@ -384,72 +619,7 @@ where
     let install_tx = source_tx.clone();
     let install = Box::new(
         move |spec: Box<PlaySpec>, generation: u64, ack: std_mpsc::Sender<()>| {
-            let PlaySpec {
-                segments,
-                music,
-                project,
-                duration_secs,
-                start_playhead_secs,
-                playhead_rx,
-            } = *spec;
-
-            if !(duration_secs.is_finite() && duration_secs > 0.0) {
-                return Err(format!(
-                    "Invalid audio pre-render duration: {duration_secs}"
-                ));
-            }
-
-            #[cfg(not(target_os = "windows"))]
-            let latency_corrector = {
-                let hint =
-                    default_output_latency_hint(output_info.sample_rate, output_info.buffer_size);
-                if let Some(hint) = hint
-                    && hint.latency_secs > 0.0
-                {
-                    if hint.transport.is_wireless() {
-                        info!(
-                            "Applying wireless audio output latency hint: {:.1} ms",
-                            hint.latency_secs * 1_000.0
-                        );
-                    } else {
-                        info!(
-                            "Applying audio output latency hint: {:.1} ms",
-                            hint.latency_secs * 1_000.0
-                        );
-                    }
-                }
-                LatencyCorrector::new(hint, LatencyCorrectionConfig::default())
-            };
-            #[cfg(not(target_os = "windows"))]
-            let initial_latency_secs = latency_corrector.initial_output_latency_secs();
-            #[cfg(target_os = "windows")]
-            let initial_latency_secs = 0.0;
-
-            let start_playhead = start_playhead_secs + initial_latency_secs;
-            let mut buffer = PrerenderedAudioBuffer::<T>::new(
-                segments,
-                music,
-                &project,
-                output_info,
-                duration_secs,
-                start_playhead,
-            );
-            buffer.set_playhead(start_playhead);
-            // A few ms: guarantees the callback reads real samples at the
-            // playhead, never leading silence.
-            buffer.wait_until_ready(PRERENDER_READY_TIMEOUT);
-
-            install_tx
-                .send(SourceCommand::Install(Box::new(ActiveSource {
-                    generation,
-                    buffer,
-                    playhead_rx,
-                    last_video_playhead: start_playhead_secs,
-                    ack: Some(ack),
-                    #[cfg(not(target_os = "windows"))]
-                    latency_corrector,
-                })))
-                .map_err(|_| "Audio callback channel closed".to_string())
+            install_source::<T>(spec, generation, ack, output_info, true, &install_tx)
         },
     );
 
diff --git a/crates/editor/src/editor_instance.rs b/crates/editor/src/editor_instance.rs
index 5ef4e0a837..c093037846 100644
--- a/crates/editor/src/editor_instance.rs
+++ b/crates/editor/src/editor_instance.rs
@@ -115,6 +115,26 @@ impl EditorInstance {
         on_state_change: impl Fn(&EditorState) + Send + Sync + 'static,
         frame_cb: Box<dyn FnMut(editor::EditorFrameOutput) + Send>,
         shared_device: Option<SharedWgpuDevice>,
+    ) -> Result<Arc<Self>, String> {
+        Self::new_with_audio_output(
+            project_path,
+            on_state_change,
+            frame_cb,
+            shared_device,
+            Arc::new(crate::AudioOutput::new()),
+        )
+        .await
+    }
+
+    /// Like [`EditorInstance::new`] but with a caller-provided audio output,
+    /// letting harnesses substitute a headless sink while everything else
+    /// (decoders, renderer, playback) runs the production path.
+    pub async fn new_with_audio_output(
+        project_path: PathBuf,
+        on_state_change: impl Fn(&EditorState) + Send + Sync + 'static,
+        frame_cb: Box<dyn FnMut(editor::EditorFrameOutput) + Send>,
+        shared_device: Option<SharedWgpuDevice>,
+        audio_output: Arc<crate::AudioOutput>,
     ) -> Result<Arc<Self>, String> {
         if !project_path.exists() {
             return Err(format!("Video path {} not found!", project_path.display()));
@@ -246,9 +266,13 @@ impl EditorInstance {
         // Segment setup (decoder init + kicking off audio decodes) is
         // independent of the GPU/render setup below, so run it concurrently on
         // its own task.
-        let force_ffmpeg_for_editor = cfg!(target_os = "windows");
+        // The env override lets headless harnesses on runners whose
+        // VideoToolbox is too slow for real-time playback fall back to the
+        // FFmpeg decoder.
+        let force_ffmpeg_for_editor = cfg!(target_os = "windows")
+            || std::env::var_os("CAP_EDITOR_FORCE_FFMPEG_DECODER").is_some();
         if force_ffmpeg_for_editor {
-            tracing::info!("Using FFmpeg decoder for Windows editor preview");
+            tracing::info!("Using FFmpeg decoder for editor preview");
         }
 
         let segments_task = tokio::spawn({
@@ -260,7 +284,6 @@ impl EditorInstance {
         // Open the session's audio output stream now (in the background) so
         // the first play press doesn't wait on the device — Bluetooth outputs
         // in particular can take seconds to wake.
-        let audio_output = Arc::new(crate::AudioOutput::new());
         let has_declared_audio = match meta.as_ref() {
             StudioRecordingMeta::SingleSegment { segment } => segment.audio.is_some(),
             StudioRecordingMeta::MultipleSegments { inner } => inner
diff --git a/crates/editor/src/lib.rs b/crates/editor/src/lib.rs
index 1c4d2fd3b3..6b4dfd1bde 100644
--- a/crates/editor/src/lib.rs
+++ b/crates/editor/src/lib.rs
@@ -7,7 +7,9 @@ mod segments;
 mod telemetry;
 
 pub use audio::{AudioRenderer, MusicTracks};
-pub use audio_output::AudioOutput;
+pub use audio_output::{
+    AudioOutput, HEADLESS_BLOCK_FRAMES, HEADLESS_CHANNELS, HEADLESS_SAMPLE_RATE, HeadlessAudioTap,
+};
 pub use editor::{
     EditorFrameOutput, Renderer, RendererHandle, finish_renderer_layers_creation,
     start_renderer_layers_creation,
diff --git a/crates/enc-ffmpeg/src/audio/base.rs b/crates/enc-ffmpeg/src/audio/base.rs
index 5f352188db..44ac33659e 100644
--- a/crates/enc-ffmpeg/src/audio/base.rs
+++ b/crates/enc-ffmpeg/src/audio/base.rs
@@ -24,8 +24,11 @@ impl AudioEncoderBase {
         timestamp: Duration,
         output: &mut format::context::Output,
     ) -> Result<(), ffmpeg::Error> {
+        // Input frames are stamped in input-rate units; BufferedResampler
+        // rescales them to the encoder's output rate.
+        let input_rate = f64::from(self.resampler.input().rate);
         self.inner
-            .update_pts(&mut frame, timestamp, &mut self.encoder);
+            .update_pts_with_rate(&mut frame, timestamp, input_rate);
 
         self.resampler.add_frame(frame);
 
diff --git a/crates/enc-ffmpeg/src/audio/buffered_resampler.rs b/crates/enc-ffmpeg/src/audio/buffered_resampler.rs
index d350920856..34924bb4d6 100644
--- a/crates/enc-ffmpeg/src/audio/buffered_resampler.rs
+++ b/crates/enc-ffmpeg/src/audio/buffered_resampler.rs
@@ -42,8 +42,10 @@ impl BufferedResampler {
         };
 
         for buffer in self.buffer.iter().skip(1) {
-            // fill in gap
-            remaining_samples += (buffer.1 - pts) as usize;
+            // Fill in gaps between buffered frames. Non-integer rate ratios
+            // (44.1k -> 48k) can round consecutive pts to overlap by a sample,
+            // making the difference negative; that is an overlap, not a gap.
+            remaining_samples += (buffer.1 - pts).max(0) as usize;
             remaining_samples += buffer.0.samples();
             pts += buffer.0.samples() as i64;
         }
@@ -55,6 +57,10 @@ impl BufferedResampler {
         *self.resampler.output()
     }
 
+    pub fn input(&self) -> resampling::context::Definition {
+        *self.resampler.input()
+    }
+
     pub fn add_frame(&mut self, mut frame: ffmpeg::frame::Audio) {
         if let Some(min_next_pts) = self.min_next_pts
             && let Some(pts) = frame.pts()
@@ -351,6 +357,61 @@ mod test {
             let last = bufferer.buffer.back().unwrap();
             assert_eq!(last.1 + last.0.samples() as i64, 600);
         }
+
+        #[test]
+        fn overlapping_resampled_pts_treated_as_zero_gap() {
+            // Non-integer rate ratios (44.1k -> 48k) round consecutive
+            // resampled pts so a frame can start a sample before the previous
+            // frame ends. The overlap must count as zero gap — the unclamped
+            // subtraction used to wrap to a huge unsigned "gap" and blow up
+            // frame retrieval.
+            let mut bufferer = create_resampler(IN_RATE);
+
+            bufferer.buffer.push_back((make_input_frame(100, 0), 0));
+            bufferer.buffer.push_back((make_input_frame(100, 0), 99));
+
+            assert_eq!(bufferer.remaining_samples(), 200);
+
+            let out_frame = bufferer.get_frame(200).expect("both frames are buffered");
+            assert_eq!(out_frame.samples(), 200);
+            assert_eq!(out_frame.pts(), Some(0));
+        }
+
+        #[test]
+        fn non_integer_ratio_stream_preserves_duration() {
+            // Real-world 44.1k -> 48k with device-sized buffers: pts rounding
+            // must neither panic nor lose samples over a sustained stream.
+            let mut bufferer = BufferedResampler::new(
+                AudioInfo::new_raw(format::Sample::U8(cap_media_info::Type::Packed), 44_100, 1),
+                AudioInfo::new_raw(format::Sample::U8(cap_media_info::Type::Packed), 48_000, 1),
+            )
+            .unwrap();
+
+            let mut total = 0usize;
+            for k in 0..64i64 {
+                let mut frame = ffmpeg::frame::Audio::new(
+                    cap_media_info::Sample::U8(cap_media_info::Type::Packed),
+                    1024,
+                    ChannelLayout::MONO,
+                );
+                frame.data_mut(0).fill(69);
+                frame.set_rate(44_100);
+                frame.set_pts(Some(k * 1024));
+                bufferer.add_frame(frame);
+                while let Some(out) = bufferer.get_frame(960) {
+                    total += out.samples();
+                }
+            }
+            while let Some(out) = bufferer.flush(960) {
+                total += out.samples();
+            }
+
+            let expected = (64.0 * 1024.0 * 48_000.0 / 44_100.0) as isize;
+            assert!(
+                ((total as isize) - expected).abs() < 2_000,
+                "drained {total} output samples, expected about {expected}"
+            );
+        }
     }
 
     mod get_frame {
diff --git a/crates/enc-ffmpeg/src/audio/opus.rs b/crates/enc-ffmpeg/src/audio/opus.rs
index 1b9c144624..34e1902dae 100644
--- a/crates/enc-ffmpeg/src/audio/opus.rs
+++ b/crates/enc-ffmpeg/src/audio/opus.rs
@@ -67,6 +67,11 @@ impl OpusEncoder {
         let mut output_config = input_config;
         output_config.sample_format = Self::SAMPLE_FORMAT;
         output_config.sample_rate = rate as u32;
+        // libopus rejects surround layouts without an explicit mapping
+        // family; multichannel interfaces (5.1 mics) would fail to start a
+        // recording at all. Voice capture doesn't need surround: downmix to
+        // stereo via the resampler instead.
+        output_config.channels = output_config.channels.min(2);
 
         let resampler = BufferedResampler::new(input_config, output_config)
             .map_err(OpusEncoderError::Resampler)?;
diff --git a/crates/enc-ffmpeg/src/base.rs b/crates/enc-ffmpeg/src/base.rs
index 88ffbef5ba..60179c8fa1 100644
--- a/crates/enc-ffmpeg/src/base.rs
+++ b/crates/enc-ffmpeg/src/base.rs
@@ -66,6 +66,44 @@ impl EncoderBase {
         }
     }
 
+    /// Stamps the frame's pts from its capture timestamp using an explicit
+    /// tick rate. Audio input frames must be stamped in *input sample rate*
+    /// units — the resampler rescales them to the encoder's output rate —
+    /// whereas [`Self::update_pts`] uses the encoder's own (output) time
+    /// base. Mixing the two conventions plays non-48kHz microphones at the
+    /// wrong speed.
+    pub fn update_pts_with_rate(
+        &mut self,
+        frame: &mut frame::Frame,
+        timestamp: Duration,
+        rate: f64,
+    ) {
+        if timestamp != Duration::MAX {
+            let pts = (timestamp.as_secs_f64() * rate).round() as i64;
+            let first_pts = *self.first_pts.get_or_insert(pts);
+            let mut pts = pts - first_pts;
+            if let Some(last) = self.last_frame_pts
+                && pts <= last
+            {
+                pts = last + 1;
+            }
+            self.last_frame_pts = Some(pts);
+            frame.set_pts(Some(pts));
+        } else if let Some(pts) = frame.pts() {
+            let first_pts = *self.first_pts.get_or_insert(pts);
+            let mut pts = pts - first_pts;
+            if let Some(last) = self.last_frame_pts
+                && pts <= last
+            {
+                pts = last + 1;
+            }
+            self.last_frame_pts = Some(pts);
+            frame.set_pts(Some(pts));
+        } else {
+            tracing::error!("Frame has no pts");
+        }
+    }
+
     pub fn send_frame(
         &mut self,
         frame: &frame::Frame,
diff --git a/crates/enc-ffmpeg/src/mux/segmented_stream.rs b/crates/enc-ffmpeg/src/mux/segmented_stream.rs
index 5605044b45..f6ec79014a 100644
--- a/crates/enc-ffmpeg/src/mux/segmented_stream.rs
+++ b/crates/enc-ffmpeg/src/mux/segmented_stream.rs
@@ -84,7 +84,6 @@ pub struct SegmentedVideoEncoder {
     segment_start_time: Option<Duration>,
     last_frame_timestamp: Option<Duration>,
     frames_in_segment: u32,
-    encoded_frame_count: u64,
 
     completed_segments: Vec<VideoSegmentInfo>,
 
@@ -281,7 +280,6 @@ impl SegmentedVideoEncoder {
             segment_start_time: None,
             last_frame_timestamp: None,
             frames_in_segment: 0,
-            encoded_frame_count: 0,
             completed_segments: Vec::new(),
             pending_segment_indices: Vec::new(),
             frames_since_pending_flush: 0,
@@ -341,10 +339,12 @@ impl SegmentedVideoEncoder {
 
         self.last_frame_timestamp = Some(timestamp);
 
-        let encoder_timestamp = self.next_encoder_timestamp();
+        // Encode with the frame's real capture-derived timestamp. The encoder
+        // anchors pts at the first frame, so capture gaps (static content,
+        // stream restarts, dropped frames) stay in the timeline instead of
+        // compressing it and drifting video ahead of audio.
         self.encoder
-            .queue_frame(frame, encoder_timestamp, &mut self.output)?;
-        self.encoded_frame_count += 1;
+            .queue_frame(frame, timestamp, &mut self.output)?;
         self.frames_in_segment += 1;
 
         if is_first_frame {
@@ -367,12 +367,6 @@ impl SegmentedVideoEncoder {
         Ok(())
     }
 
-    fn next_encoder_timestamp(&self) -> Duration {
-        let frame_rate_num = self.codec_info.frame_rate_num.max(1) as f64;
-        let frame_rate_den = self.codec_info.frame_rate_den.max(1) as f64;
-        Duration::from_secs_f64(self.encoded_frame_count as f64 * frame_rate_den / frame_rate_num)
-    }
-
     fn notify_segment(&self, event: SegmentCompletedEvent) {
         if let Some(tx) = &self.segment_tx
             && let Err(e) = tx.send(event)
@@ -995,6 +989,88 @@ mod tests {
         assert!(all_video, "all events should be video type");
     }
 
+    #[test]
+    fn encoded_pts_preserve_capture_timestamps_across_gaps() {
+        ffmpeg::init().ok();
+
+        let temp = tempfile::tempdir().unwrap();
+        let base_path = temp.path().to_path_buf();
+
+        let mut encoder = SegmentedVideoEncoder::init(
+            base_path.clone(),
+            test_video_info(),
+            SegmentedVideoEncoderConfig {
+                segment_duration: Duration::from_millis(500),
+                ..Default::default()
+            },
+        )
+        .unwrap();
+
+        // Three frames at ~30fps, a 1.9s capture gap (static screen /
+        // stream restart), then three more frames. The encoded pts must
+        // reflect the gap instead of collapsing to a frame-counter grid,
+        // otherwise every dropped frame desyncs video from audio.
+        let timestamps_ms: [u64; 6] = [0, 33, 66, 2000, 2033, 2066];
+        for ts_ms in timestamps_ms {
+            let frame = create_test_frame(320, 240);
+            encoder
+                .queue_frame(frame, Duration::from_millis(ts_ms))
+                .unwrap();
+        }
+
+        encoder.finish().unwrap();
+
+        // fMP4 segments concatenated after the init segment form a valid mp4.
+        let mut segment_paths: Vec<PathBuf> = std::fs::read_dir(&base_path)
+            .unwrap()
+            .filter_map(|e| e.ok().map(|e| e.path()))
+            .filter(|p| p.extension().is_some_and(|ext| ext == "m4s"))
+            .collect();
+        segment_paths.sort();
+        assert!(
+            !segment_paths.is_empty(),
+            "encoder should have produced media segments"
+        );
+
+        let concat_path = base_path.join("concat_test.mp4");
+        let mut concatenated = std::fs::read(base_path.join(INIT_SEGMENT_NAME)).unwrap();
+        for segment in &segment_paths {
+            concatenated.extend(std::fs::read(segment).unwrap());
+        }
+        std::fs::write(&concat_path, concatenated).unwrap();
+
+        let mut input = format::input(&concat_path).unwrap();
+        let stream_index = input
+            .streams()
+            .best(ffmpeg::media::Type::Video)
+            .unwrap()
+            .index();
+        let time_base = input.stream(stream_index).unwrap().time_base();
+        let tb = time_base.numerator() as f64 / time_base.denominator() as f64;
+
+        let mut pts_secs: Vec<f64> = input
+            .packets()
+            .filter_map(|(stream, packet)| {
+                (stream.index() == stream_index)
+                    .then_some(packet.pts())
+                    .flatten()
+            })
+            .map(|pts| pts as f64 * tb)
+            .collect();
+        pts_secs.sort_by(|a, b| a.partial_cmp(b).unwrap());
+
+        assert_eq!(pts_secs.len(), timestamps_ms.len());
+
+        for (pts, expected_ms) in pts_secs.iter().zip(timestamps_ms) {
+            let expected = expected_ms as f64 / 1000.0;
+            assert!(
+                (pts - expected).abs() < 0.005,
+                "encoded pts {pts:.3}s should match capture timestamp {expected:.3}s \
+                 (all pts: {pts_secs:?})"
+            );
+        }
+    }
+
     #[test]
     fn manifest_updated_on_segment_boundary() {
         ffmpeg::init().ok();
diff --git a/crates/recording/src/output_pipeline/core.rs b/crates/recording/src/output_pipeline/core.rs
index 42390d899a..298724048f 100644
--- a/crates/recording/src/output_pipeline/core.rs
+++ b/crates/recording/src/output_pipeline/core.rs
@@ -620,6 +620,7 @@ fn video_mux_send_error(frame_count: u64, error: anyhow::Error) -> anyhow::Error
 pub(crate) struct AudioTimestampGenerator {
     sample_rate: u32,
     total_samples: u64,
+    clock_samples_advanced: u64,
     master_clock: Option<Arc<MasterClock>>,
 }
 
@@ -631,37 +632,69 @@ impl AudioTimestampGenerator {
         Self {
             sample_rate,
             total_samples: 0,
+            clock_samples_advanced: 0,
             master_clock: None,
         }
     }
 
+    #[cfg(test)]
     fn from_master_clock(master_clock: Arc<MasterClock>) -> Self {
+        let rate = master_clock.sample_rate();
+        Self::from_master_clock_with_rate(master_clock, rate)
+    }
+
+    /// The generator converts counted samples into time, so it must run at
+    /// the audio source's real sample rate. The shared master clock may run
+    /// at a different (default 48kHz) rate: counting a 44.1kHz mic's samples
+    /// against a 48kHz clock makes the audio timeline lag real time and the
+    /// gap tracker "corrects" the difference with bogus silence — the
+    /// recording then plays at the wrong speed.
+    fn from_master_clock_with_rate(master_clock: Arc<MasterClock>, sample_rate: u32) -> Self {
         Self {
-            sample_rate: master_clock.sample_rate(),
+            sample_rate: if sample_rate > 0 {
+                sample_rate
+            } else {
+                master_clock.sample_rate()
+            },
             total_samples: 0,
+            clock_samples_advanced: 0,
             master_clock: Some(master_clock),
         }
     }
 
+    fn advance_clock(&mut self) {
+        let Some(clock) = &self.master_clock else {
+            return;
+        };
+        // Convert source-rate samples into clock-rate samples so the shared
+        // clock advances by real time regardless of the source's rate. The
+        // conversion runs on the cumulative total: converting each buffer
+        // independently truncates up to one clock sample per call, which
+        // accumulates into real drift for non-integer ratios (44.1k -> 48k).
+        let target = if clock.sample_rate() == self.sample_rate {
+            self.total_samples
+        } else {
+            (self.total_samples as u128 * clock.sample_rate() as u128
+                / u128::from(self.sample_rate.max(1))) as u64
+        };
+        let delta = target.saturating_sub(self.clock_samples_advanced);
+        self.clock_samples_advanced = target;
+        if delta > 0 {
+            clock.advance_samples(delta);
+        }
+    }
+
     fn next_timestamp(&mut self, frame_samples: u64) -> Duration {
         let timestamp_nanos = samples_to_nanos(self.total_samples, self.sample_rate);
         self.total_samples += frame_samples;
-        if let Some(clock) = &self.master_clock
-            && frame_samples > 0
-        {
-            clock.advance_samples(frame_samples);
-        }
+        self.advance_clock();
         Duration::from_nanos(timestamp_nanos)
     }
 
     fn advance_by_duration(&mut self, duration: Duration) -> u64 {
         let samples = (duration.as_secs_f64() * self.sample_rate as f64).round() as u64;
         self.total_samples += samples;
-        if let Some(clock) = &self.master_clock
-            && samples > 0
-        {
-            clock.advance_samples(samples);
-        }
+        self.advance_clock();
         samples
     }
 }
@@ -1134,21 +1167,20 @@ impl TimestampAnomalyTracker {
             self.max_forward_skew_secs = jump_secs;
         }
 
-        let expected_increment = Duration::from_millis(33);
-        let adjusted = last.saturating_add(expected_increment);
-
-        let compensation_secs = current.as_secs_f64() - adjusted.as_secs_f64();
-        self.accumulated_compensation_secs -= compensation_secs;
-        self.resync_count += 1;
-        self.did_resync = true;
-
         if wall_clock_confirmed {
+            // Frame delivery paused for about as long as the timestamp jump:
+            // this is a real gap (static screen, stream restart, sleep/wake),
+            // not a source-clock glitch. The gap must stay in the timeline —
+            // collapsing it desyncs video from audio whenever it happens
+            // before the wall-clock anchor exists to re-expand it.
             let wall_clock_gap_secs = self
                 .last_valid_wall_clock
                 .map(|wc| now.duration_since(wc).as_secs_f64())
                 .unwrap_or(0.0);
 
             self.wall_clock_confirmed_jumps += 1;
+            self.consecutive_anomalies = 0;
+            self.last_valid_duration = Some(current);
 
             info!(
                 stream = self.stream_name,
@@ -1158,9 +1190,21 @@ impl TimestampAnomalyTracker {
                 current_ms = current.as_millis(),
                 resync_count = self.resync_count,
                 confirmed_jumps = self.wall_clock_confirmed_jumps,
-                "Wall-clock-confirmed forward jump (system sleep/wake), accepting new baseline"
+                "Wall-clock-confirmed forward jump (gap in frame delivery), accepting new baseline"
             );
-        } else {
+
+            return Ok(current);
+        }
+
+        let expected_increment = Duration::from_millis(33);
+        let adjusted = last.saturating_add(expected_increment);
+
+        let compensation_secs = current.as_secs_f64() - adjusted.as_secs_f64();
+        self.accumulated_compensation_secs -= compensation_secs;
+        self.resync_count += 1;
+        self.did_resync = true;
+
+        {
             self.anomaly_count += 1;
 
             let wall_clock_gap_secs = self
@@ -1595,6 +1639,7 @@ impl<TVideo: VideoSource> OutputPipelineBuilder<HasVideo<TVideo>> {
 
         let shared_pause = SharedWallClockPause::new(build_ctx.pause_flag.clone());
         let video_frame_count = Arc::new(AtomicU64::new(0));
+        let video_timestamp_span = Arc::new(VideoTimestampSpan::default());
 
         let video_start_gate = has_audio_sources.then(VideoStartGate::new);
 
@@ -1608,6 +1653,7 @@ impl<TVideo: VideoSource> OutputPipelineBuilder<HasVideo<TVideo>> {
             timestamps,
             shared_pause.clone(),
             video_frame_count.clone(),
+            video_timestamp_span.clone(),
             master_clock.clone(),
             video_info,
             video_start_gate.clone(),
@@ -1641,6 +1687,7 @@ impl<TVideo: VideoSource> OutputPipelineBuilder<HasVideo<TVideo>> {
             pause_flag: build_ctx.pause_flag,
             cancel_token: build_ctx.stop_token,
             video_frame_count,
+            video_timestamp_span,
             health_rx: Some(build_ctx.health_rx),
             audio_gap_summary,
         })
@@ -1724,6 +1771,7 @@ impl OutputPipelineBuilder<NoVideo> {
             pause_flag: build_ctx.pause_flag,
             cancel_token: build_ctx.stop_token,
             video_frame_count: Arc::new(AtomicU64::new(0)),
+            video_timestamp_span: Arc::new(VideoTimestampSpan::default()),
             health_rx: Some(build_ctx.health_rx),
             audio_gap_summary,
         })
@@ -1890,6 +1938,42 @@ fn estimate_video_frame_duration_ns(video_info: &VideoInfo) -> u64 {
     1_000_000_000 / fps as u64
 }
 
+/// Span of the video timestamps actually sent to the muxer, used to report
+/// the real encoded media duration. Capture is VFR (static screens, dropped
+/// frames), so `frame_count / fps` under-reports the duration by the length
+/// of every gap.
+#[derive(Debug)]
+pub struct VideoTimestampSpan {
+    first_ns: AtomicU64,
+    last_ns: AtomicU64,
+}
+
+impl Default for VideoTimestampSpan {
+    fn default() -> Self {
+        Self {
+            first_ns: AtomicU64::new(u64::MAX),
+            last_ns: AtomicU64::new(0),
+        }
+    }
+}
+
+impl VideoTimestampSpan {
+    fn record(&self, timestamp: Duration) {
+        let ns = timestamp.as_nanos().min(u64::MAX as u128) as u64;
+        self.first_ns.fetch_min(ns, Ordering::AcqRel);
+        self.last_ns.fetch_max(ns, Ordering::AcqRel);
+    }
+
+    pub fn get(&self) -> Option<(Duration, Duration)> {
+        let first = self.first_ns.load(Ordering::Acquire);
+        if first == u64::MAX {
+            return None;
+        }
+        let last = self.last_ns.load(Ordering::Acquire).max(first);
+        Some((Duration::from_nanos(first), Duration::from_nanos(last)))
+    }
+}
+
 #[allow(clippy::too_many_arguments)]
 fn spawn_video_encoder<TMutex: VideoMuxer<VideoFrame = TVideo::Frame>, TVideo: VideoSource>(
     setup_ctx: &mut SetupCtx,
@@ -1901,6 +1985,7 @@ fn spawn_video_encoder<TMutex: VideoMuxer<VideoFrame = TVideo::Frame>, TVideo: V
     timestamps: Timestamps,
     shared_pause: SharedWallClockPause,
     frame_counter: Arc<AtomicU64>,
+    timestamp_span: Arc<VideoTimestampSpan>,
     master_clock: Arc<MasterClock>,
     video_info: VideoInfo,
     video_start_gate: Option<VideoStartGate>,
@@ -1975,8 +2060,16 @@ fn spawn_video_encoder<TMutex: VideoMuxer<VideoFrame = TVideo::Frame>, TVideo: V
                         );
                     }
 
+                    // Excise accumulated pause time from the content timeline
+                    // before anomaly tracking. Audio already excises pauses
+                    // (paused frames are dropped and sample counting carries
+                    // on), and wall_clock_elapsed below subtracts pauses too;
+                    // leaving the pause in the video timestamps would make a
+                    // resume look like a wall-clock-confirmed capture gap and
+                    // poison the drift anchor with pause-inflated time.
                     let remapped_ts = Timestamp::Instant(
-                        timestamps.instant() + remap.duration(),
+                        timestamps.instant()
+                            + remap.duration().saturating_sub(total_pause_duration),
                     );
 
                     let raw_duration = match anomaly_tracker.process_timestamp(remapped_ts, timestamps) {
@@ -1999,6 +2092,7 @@ fn spawn_video_encoder<TMutex: VideoMuxer<VideoFrame = TVideo::Frame>, TVideo: V
                     let raw_wall_clock = timestamps.instant().elapsed();
                     let wall_clock_elapsed = raw_wall_clock.saturating_sub(total_pause_duration);
                     let duration = drift_tracker.calculate_timestamp(raw_duration, wall_clock_elapsed);
+                    timestamp_span.record(duration);
 
                     if frame_count.is_multiple_of(300) {
                         let drift_ratio = if raw_duration.as_secs_f64() > 0.0 {
@@ -2071,8 +2165,13 @@ fn spawn_video_encoder<TMutex: VideoMuxer<VideoFrame = TVideo::Frame>, TVideo: V
                                 "Published video start timestamp to encoder-pair gate (drain path)"
                             );
                         }
+                        // Excise pauses exactly like the main loop above, so
+                        // drained tail frames stay on the same content timeline.
                         let remapped_ts = Timestamp::Instant(
-                            timestamps.instant() + remap.duration(),
+                            timestamps.instant()
+                                + remap
+                                    .duration()
+                                    .saturating_sub(shared_pause.total_pause_duration()),
                         );
 
                         let raw_duration =
@@ -2092,6 +2191,7 @@ fn spawn_video_encoder<TMutex: VideoMuxer<VideoFrame = TVideo::Frame>, TVideo: V
                         let wall_clock_elapsed = raw_wall_clock.saturating_sub(total_pause);
                         let duration =
                             drift_tracker.calculate_timestamp(raw_duration, wall_clock_elapsed);
+                        timestamp_span.record(duration);
 
                         match muxer.lock().await.send_video_frame(frame, duration) {
                             Ok(()) => {}
@@ -2191,8 +2291,10 @@ impl PreparedAudioSources {
             let stop_token = stop_token.child_token();
             let muxer = muxer.clone();
             async move {
-                let mut timestamp_generator =
-                    AudioTimestampGenerator::from_master_clock(master_clock.clone());
+                let mut timestamp_generator = AudioTimestampGenerator::from_master_clock_with_rate(
+                    master_clock.clone(),
+                    audio_info.sample_rate,
+                );
                 let sample_rate = audio_info.sample_rate;
                 let mut dropped_during_pause: u64 = 0;
                 let mut frame_count: u64 = 0;
@@ -2741,6 +2843,7 @@ pub struct OutputPipeline {
     pause_flag: Arc<AtomicBool>,
     cancel_token: CancellationToken,
     video_frame_count: Arc<AtomicU64>,
+    video_timestamp_span: Arc<VideoTimestampSpan>,
     health_rx: Option<HealthReceiver>,
     audio_gap_summary: Arc<OnceLock<AudioGapSummary>>,
 }
@@ -2750,6 +2853,9 @@ pub struct FinishedOutputPipeline {
     pub first_timestamp: Timestamp,
     pub video_info: Option<VideoInfo>,
     pub video_frame_count: u64,
+    /// First and last video timestamps sent to the muxer; the real encoded
+    /// media span for VFR content.
+    pub video_timestamp_span: Option<(Duration, Duration)>,
     pub audio_gap_summary: Option<AudioGapSummary>,
 }
 
@@ -2842,6 +2948,7 @@ impl OutputPipeline {
             first_timestamp,
             video_info: self.video_info,
             video_frame_count: self.video_frame_count.load(Ordering::Acquire),
+            video_timestamp_span: self.video_timestamp_span.get(),
             audio_gap_summary: self.audio_gap_summary.get().copied(),
         })
     }
@@ -3805,11 +3912,17 @@ mod tests {
             tracker.last_valid_wall_clock = Instant::now().checked_sub(Duration::from_secs(3));
 
             let jump_ts = make_timestamp(timestamps, Duration::from_millis(4 * 33 + 3000));
-            tracker.process_timestamp(jump_ts, timestamps).unwrap();
+            let accepted = tracker.process_timestamp(jump_ts, timestamps).unwrap();
 
+            // A wall-clock-confirmed jump is a real gap in frame delivery and
+            // passes through unmodified — it is not a resync.
             assert!(
-                tracker.take_resync_flag(),
-                "Resync flag should be set after wall-clock-confirmed jump"
+                !tracker.take_resync_flag(),
+                "Confirmed gap must not be treated as a timeline resync"
+            );
+            assert!(
+                (accepted.as_secs_f64() - (4.0 * 0.033 + 3.0)).abs() < 0.05,
+                "confirmed gap must pass through, got {accepted:?}"
             );
 
             let next_ts =
@@ -3851,7 +3964,10 @@ mod tests {
 
             assert_eq!(tracker.anomaly_count, 0);
             assert_eq!(tracker.wall_clock_confirmed_jumps, 2);
-            assert_eq!(tracker.resync_count, 2);
+            assert_eq!(
+                tracker.resync_count, 0,
+                "confirmed gaps pass through; they are not timeline resyncs"
+            );
         }
 
         #[test]
@@ -4365,10 +4481,14 @@ mod tests {
 
         #[test]
         fn returns_timeout_when_thread_does_not_exit_in_time() {
-            let handle = std::thread::spawn(|| {
-                std::thread::sleep(Duration::from_millis(100));
+            // The worker blocks until released, so it can never beat the
+            // timeout however unfairly a loaded machine schedules threads.
+            let (release_tx, release_rx) = std::sync::mpsc::channel::<()>();
+            let handle = std::thread::spawn(move || {
+                let _ = release_rx.recv();
                 Ok(())
             });
+            let _release_tx = release_tx;
 
             match wait_for_blocking_thread_finish(handle, Duration::from_millis(5), "test-worker") {
                 BlockingThreadFinish::TimedOut(error) => {
@@ -5279,5 +5399,53 @@ mod tests {
                 "video drifted from the wall clock by {max_skew:?} (correction failed)"
             );
         }
+
+        // A static screen (or a capture-stream restart) stops frame delivery
+        // entirely. The gap must survive into the output timeline: collapsing
+        // it compresses video relative to audio and desyncs the recording.
+        #[test]
+        fn video_timeline_preserves_capture_gaps() {
+            let mut video = VideoDriftTracker::new();
+            let interval = 1.0 / 30.0;
+
+            let mut outs = Vec::new();
+            for v in 0..150u64 {
+                let t = Duration::from_secs_f64(v as f64 * interval);
+                outs.push(video.calculate_timestamp(t, t));
+            }
+            // 4s with no frames delivered, then delivery resumes with
+            // timestamps that include the gap.
+            for v in 150..300u64 {
+                let t = Duration::from_secs_f64(v as f64 * interval + 4.0);
+                outs.push(video.calculate_timestamp(t, t));
+            }
+
+            let gap = outs[150].saturating_sub(outs[149]);
+            assert!(
+                gap >= Duration::from_secs_f64(3.5),
+                "capture gap collapsed to {gap:?} in the output timeline"
+            );
+
+            let span = outs[299].saturating_sub(outs[0]);
+            let real = 299.0 * interval + 4.0;
+            assert!(
+                (span.as_secs_f64() - real).abs() < 0.3,
+                "output span {span:?} does not match real elapsed time {real:.2}s"
+            );
+        }
+
+        #[test]
+        fn video_timestamp_span_reports_first_and_last_sent() {
+            let span = VideoTimestampSpan::default();
+            assert!(span.get().is_none(), "unset span must be None");
+
+            span.record(Duration::from_millis(100));
+            span.record(Duration::from_millis(133));
+            span.record(Duration::from_millis(4000)); // across a capture gap
+
+            let (first, last) = span.get().expect("span should be set");
+            assert_eq!(first, Duration::from_millis(100));
+            assert_eq!(last, Duration::from_millis(4000));
+        }
     }
 }
diff --git a/crates/recording/src/output_validation.rs b/crates/recording/src/output_validation.rs
index 72da56a4d9..3e045b3a41 100644
--- a/crates/recording/src/output_validation.rs
+++ b/crates/recording/src/output_validation.rs
@@ -103,3 +103,49 @@ pub fn validate_instant_recording(
         output_duration,
     }
 }
+
+/// Tolerated difference between the display track's container duration and the
+/// media span the recorder persisted, before the recording is flagged as
+/// having suspicious sync. Generous enough for muxer rounding and trailing
+/// keyframe padding; far below the hundreds of milliseconds a real timestamp
+/// bug produces.
+const SYNC_SPAN_TOLERANCE_SECS: f64 = 0.5;
+const SYNC_SPAN_TOLERANCE_RATIO: f64 = 0.03;
+
+/// Cross-checks a finalized display track against the media duration the
+/// recorder derived from the capture timestamps it actually muxed.
+///
+/// The two are produced independently: the expected duration comes from the
+/// pipeline's timestamp span, the container duration from what the encoder
+/// and muxer wrote. A container SHORTER than the span means timestamps were
+/// mangled between the pipeline and the file — the class of bug that
+/// silently desyncs audio/video. A LONGER container is legitimate for VFR
+/// content: muxers extend the final frame through any trailing static-screen
+/// hold (AVFoundation ends the session at the wall-clock stop time), so that
+/// direction is only noted at debug level. Non-fatal: logs a structured
+/// warning and returns the mismatch so callers can surface it.
+pub fn check_display_sync_span(display_path: &Path, expected: Duration) -> Option<f64> {
+    let container = get_media_duration(display_path)?;
+    let shortfall = expected.as_secs_f64() - container.as_secs_f64();
+    let tolerance =
+        (expected.as_secs_f64() * SYNC_SPAN_TOLERANCE_RATIO).max(SYNC_SPAN_TOLERANCE_SECS);
+    if shortfall > tolerance {
+        tracing::error!(
+            path = %display_path.display(),
+            container_secs = container.as_secs_f64(),
+            expected_secs = expected.as_secs_f64(),
+            delta_secs = shortfall,
+            "SYNC INVARIANT VIOLATION: display track duration is shorter than \
+             the muxed timestamp span; this recording may have desynced audio/video"
+        );
+        Some(shortfall)
+    } else {
+        debug!(
+            path = %display_path.display(),
+            container_secs = container.as_secs_f64(),
+            expected_secs = expected.as_secs_f64(),
+            "display track duration consistent with muxed timestamp span"
+        );
+        None
+    }
+}
diff --git a/crates/recording/src/recovery.rs b/crates/recording/src/recovery.rs
index e542718b08..572d4b10f7 100644
--- a/crates/recording/src/recovery.rs
+++ b/crates/recording/src/recovery.rs
@@ -757,6 +757,28 @@ impl RecoveryManager {
         if total.is_zero() { None } else { Some(total) }
     }
 
+    /// Reads the display media duration the recorder persisted into the
+    /// project's default timeline, used to cross-check the remuxed container.
+    fn expected_display_duration_from_config(
+        project_path: &Path,
+        segment_index: u32,
+    ) -> Option<std::time::Duration> {
+        let config = std::fs::read_to_string(project_path.join("project-config.json")).ok()?;
+        let value: serde_json::Value = serde_json::from_str(&config).ok()?;
+        let segments = value.get("timeline")?.get("segments")?.as_array()?;
+        let segment = segments.iter().find(|s| {
+            s.get("recordingSegment")
+                .and_then(serde_json::Value::as_u64)
+                == Some(u64::from(segment_index))
+        })?;
+        let end = segment.get("end")?.as_f64()?;
+        let start = segment
+            .get("start")
+            .and_then(serde_json::Value::as_f64)
+            .unwrap_or(0.0);
+        (end > start && end.is_finite()).then(|| std::time::Duration::from_secs_f64(end - start))
+    }
+
     pub fn recover(recording: &IncompleteRecording) -> Result<RecoveredRecording, RecoveryError> {
         Self::finalize_with_purpose(recording, RecoveryPurpose::Recover)
     }
@@ -828,6 +850,17 @@ impl RecoveryManager {
                 }
             }
 
+            // Sync invariant: the remuxed display track must match the media
+            // span the recorder persisted from its capture timestamps.
+            if display_output.is_file()
+                && let Some(expected) = Self::expected_display_duration_from_config(
+                    &recording.project_path,
+                    segment.index,
+                )
+            {
+                crate::output_validation::check_display_sync_span(&display_output, expected);
+            }
+
             if let Some(camera_frags) = &segment.camera_fragments {
                 let camera_output = segment_dir.join("camera.mp4");
                 let camera_dir = segment_dir.join("camera");
diff --git a/crates/recording/src/studio_recording.rs b/crates/recording/src/studio_recording.rs
index 30352fb085..b79278a414 100644
--- a/crates/recording/src/studio_recording.rs
+++ b/crates/recording/src/studio_recording.rs
@@ -1002,17 +1002,39 @@ async fn stop_recording(
                     );
                     DEFAULT_FPS
                 });
-            // Use the encoded display-media duration (frame_count / fps), not the wall-clock
-            // recording span which includes pipeline-drain latency. This is the timeline the
-            // recorder persists to project-config.json, so it is what un-edited recordings use; the
-            // editor/export fallbacks only synthesize a timeline when none is present and read the
-            // muxed container duration, which this closely (not bit-exactly) matches.
-            let display_media_duration = if display_fps > 0 {
-                s.pipeline.screen.video_frame_count as f64 / f64::from(display_fps)
-            } else {
-                0.0
+            // Use the encoded display-media span (first to last muxed timestamp plus one
+            // nominal frame), not the wall-clock recording span which includes
+            // pipeline-drain latency, and not frame_count / fps, which under-reports VFR
+            // content by the length of every capture gap (static screens, dropped frames).
+            // This is the timeline the recorder persists to project-config.json, so it is
+            // what un-edited recordings use.
+            let display_media_duration = match s.pipeline.screen.video_timestamp_span {
+                Some((first, last)) if display_fps > 0 => {
+                    (last - first).as_secs_f64() + 1.0 / f64::from(display_fps)
+                }
+                _ if display_fps > 0 => {
+                    s.pipeline.screen.video_frame_count as f64 / f64::from(display_fps)
+                }
+                _ => 0.0,
             };
 
+            // Non-fragmented recordings have their final display file already;
+            // verify the muxed container matches the timestamps we sent it.
+            // Fragmented recordings get the same check after remux in recovery.
+            if s.pipeline
+                .screen
+                .path
+                .extension()
+                .is_some_and(|e| e == "mp4")
+                && s.pipeline.screen.path.is_file()
+                && display_media_duration > 0.0
+            {
+                crate::output_validation::check_display_sync_span(
+                    &s.pipeline.screen.path,
+                    Duration::from_secs_f64(display_media_duration),
+                );
+            }
+
             SegmentOutput {
                 meta: MultipleSegment {
                     display: VideoMeta {
@@ -1809,6 +1831,7 @@ mod tests {
             first_timestamp,
             video_info,
             video_frame_count,
+            video_timestamp_span: None,
             audio_gap_summary: None,
         }
     }
diff --git a/crates/recording/tests/sync_matrix.rs b/crates/recording/tests/sync_matrix.rs
new file mode 100644
index 0000000000..d7b08a3d19
--- /dev/null
+++ b/crates/recording/tests/sync_matrix.rs
@@ -0,0 +1,905 @@
+//! Synthetic device matrix for A/V sync.
+//!
+//! Drives the real recording pipeline (sources -> mux loop -> encoders ->
+//! containers) with synthetic video and audio across sample rates, channel
+//! counts, frame rates and delivery pathologies (jitter, drops, gaps), then
+//! verifies the muxed output preserves real time. No capture hardware is
+//! required, so this runs identically on macOS, Windows and Linux CI.
+//!
+//! Frames are emitted in real time because the pipeline pins video
+//! timestamps to the wall clock; each case therefore costs its content
+//! duration. Keep cases short.
+//!
+//! Set `CAP_SYNC_MATRIX_REPORT` to write a JSON report of every case.
+
+use std::{
+    path::{Path, PathBuf},
+    time::Duration,
+};
+
+use cap_media_info::{AudioInfo, Sample, Type, VideoInfo};
+use cap_recording::{
+    AudioFrame, ChannelAudioSource, ChannelAudioSourceConfig, ChannelVideoSource,
+    ChannelVideoSourceConfig, OutputPipeline,
+    ffmpeg::{
+        FFmpegVideoFrame, Mp4Muxer, OggMuxer, SegmentedVideoMuxer, SegmentedVideoMuxerConfig,
+    },
+};
+use cap_timestamp::{Timestamp, Timestamps};
+use serde::Serialize;
+
+const CONTENT_SECS: f64 = 4.0;
+/// Absolute tolerance for a muxed pts vs the sent capture timestamp. Covers
+/// warmup anchoring, emission jitter and encoder rounding, plus scheduler
+/// noise on shared CI runners.
+const ABS_TOLERANCE_SECS: f64 = 0.25;
+/// Tolerance for the relative structure (pts deltas vs sent deltas), which is
+/// what actually determines sync drift. The bug class this guards against
+/// produces errors of a second or more.
+const REL_TOLERANCE_SECS: f64 = 0.15;
+/// Tolerance for decoded audio duration vs generated duration.
+const AUDIO_DURATION_TOLERANCE_SECS: f64 = 0.15;
+
+#[derive(Debug, Clone, Copy)]
+enum VideoScenario {
+    Steady,
+    Jitter,
+    Drops,
+    Gap,
+}
+
+impl VideoScenario {
+    fn name(self) -> &'static str {
+        match self {
+            Self::Steady => "steady",
+            Self::Jitter => "jitter",
+            Self::Drops => "drops",
+            Self::Gap => "gap",
+        }
+    }
+
+    /// Deterministic capture timestamps (seconds) for the scenario.
+    fn timestamps(self, fps: u32) -> Vec<f64> {
+        let period = 1.0 / f64::from(fps);
+        let total = (CONTENT_SECS * f64::from(fps)) as u64;
+        let mut out = Vec::new();
+        for k in 0..total {
+            let base = k as f64 * period;
+            match self {
+                Self::Steady => out.push(base),
+                Self::Jitter => {
+                    // Deterministic pseudo-jitter, +-40% of the period, kept
+                    // monotonic (and non-negative) by construction.
+                    let phase = (k as f64 * 0.7368).fract() - 0.5;
+                    out.push((base + phase * period * 0.8).max(0.0));
+                }
+                Self::Drops => {
+                    // Drop every 4th and 7th frame: the capture stream simply
+                    // never delivers them.
+                    if k % 4 != 3 && k % 7 != 6 {
+                        out.push(base);
+                    }
+                }
+                Self::Gap => {
+                    // 1.5s of frames, a 2s static-screen gap, then the rest.
+                    let t = base;
+                    if t < 1.5 {
+                        out.push(t);
+                    } else {
+                        out.push(t + 2.0);
+                    }
+                }
+            }
+        }
+        out.sort_by(|a, b| a.partial_cmp(b).unwrap());
+        out.dedup_by(|a, b| (*a - *b).abs() < period * 0.25);
+        out
+    }
+}
+
+#[derive(Serialize)]
+struct CaseResult {
+    name: String,
+    pass: bool,
+    detail: String,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+enum Content {
+    /// Flat color; encodes trivially.
+    Flat,
+    /// Per-frame pseudo-random noise: worst-case encoder load, exercising
+    /// backpressure the way dense real screen content does.
+    Noise,
+    /// A moving bar over a gradient: typical screen-content motion.
+    Motion,
+}
+
+fn make_video_frame(
+    width: u32,
+    height: u32,
+    frame_index: u64,
+    content: Content,
+    rng: &mut Rng,
+) -> ffmpeg::frame::Video {
+    let mut frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::BGRA, width, height);
+    let stride = frame.stride(0);
+    let data = frame.data_mut(0);
+    match content {
+        Content::Flat => {
+            let shade = ((frame_index * 7) % 200) as u8;
+            data.fill(shade);
+        }
+        Content::Noise => {
+            // Refresh a pseudo-random buffer per frame so no two frames are
+            // alike and inter prediction gets no free lunch.
+            for chunk in data.chunks_mut(8) {
+                let v = rng.next().to_le_bytes();
+                let n = chunk.len();
+                chunk.copy_from_slice(&v[..n]);
+            }
+        }
+        Content::Motion => {
+            let bar = ((frame_index * 6) % u64::from(width)) as usize;
+            for y in 0..height as usize {
+                let row = &mut data[y * stride..y * stride + width as usize * 4];
+                for (x, px) in row.chunks_mut(4).enumerate() {
+                    let base = ((x * 255) / width as usize) as u8;
+                    let v = if x.abs_diff(bar) < 12 { 255 } else { base };
+                    px[0] = v;
+                    px[1] = v ^ 0x55;
+                    px[2] = base;
+                    px[3] = 255;
+                }
+            }
+        }
+    }
+    frame
+}
+
+/// splitmix64: tiny, dependency-free, deterministic PRNG. Every randomized
+/// case is fully reproducible from the printed seed.
+struct Rng(u64);
+
+impl Rng {
+    fn next(&mut self) -> u64 {
+        self.0 = self.0.wrapping_add(0x9E37_79B9_7F4A_7C15);
+        let mut z = self.0;
+        z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
+        z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
+        z ^ (z >> 31)
+    }
+
+    fn range(&mut self, lo: u64, hi: u64) -> u64 {
+        lo + self.next() % (hi - lo + 1)
+    }
+
+    fn f64(&mut self) -> f64 {
+        (self.next() >> 11) as f64 / (1u64 << 53) as f64
+    }
+
+    fn pick<T: Copy>(&mut self, items: &[T]) -> T {
+        items[(self.next() % items.len() as u64) as usize]
+    }
+}
+
+#[derive(Clone)]
+struct VideoCase {
+    fps: u32,
+    sent: Vec<f64>,
+    fragmented: bool,
+    width: u32,
+    height: u32,
+    content: Content,
+    rng_seed: u64,
+}
+
+impl VideoCase {
+    fn curated(fps: u32, scenario: VideoScenario, fragmented: bool) -> Self {
+        Self {
+            fps,
+            sent: scenario.timestamps(fps),
+            fragmented,
+            width: 160,
+            height: 120,
+            content: Content::Flat,
+            rng_seed: 1,
+        }
+    }
+}
+
+async fn run_video_case(case: VideoCase) -> Result<String, String> {
+    let temp = tempfile::tempdir().map_err(|e| format!("tempdir: {e}"))?;
+    let out_path = if case.fragmented {
+        temp.path().join("display")
+    } else {
+        temp.path().join("display.mp4")
+    };
+    let fragmented = case.fragmented;
+
+    let info = VideoInfo::from_raw(
+        cap_media_info::RawVideoFormat::Bgra,
+        case.width,
+        case.height,
+        case.fps,
+    );
+    let (tx, rx) = flume::bounded::<FFmpegVideoFrame>(32);
+    let timestamps = Timestamps::now();
+
+    let sent = case.sent.clone();
+    let emit = {
+        let sent = sent.clone();
+        let base = timestamps.instant();
+        let (width, height, content) = (case.width, case.height, case.content);
+        let mut rng = Rng(case.rng_seed);
+        tokio::spawn(async move {
+            for (i, &ts) in sent.iter().enumerate() {
+                tokio::time::sleep_until((base + Duration::from_secs_f64(ts)).into()).await;
+                let frame = FFmpegVideoFrame {
+                    inner: make_video_frame(width, height, i as u64, content, &mut rng),
+                    timestamp: Timestamp::Instant(base + Duration::from_secs_f64(ts)),
+                };
+                if tx.send_async(frame).await.is_err() {
+                    break;
+                }
+            }
+            // Sender drops here, ending the stream.
+        })
+    };
+
+    let builder = OutputPipeline::builder(out_path.clone())
+        .with_video::<ChannelVideoSource<FFmpegVideoFrame>>(ChannelVideoSourceConfig::new(info, rx))
+        .with_timestamps(timestamps);
+
+    let pipeline = if fragmented {
+        builder
+            .build::<SegmentedVideoMuxer>(SegmentedVideoMuxerConfig {
+                segment_duration: Duration::from_secs(2),
+                ..Default::default()
+            })
+            .await
+    } else {
+        builder.build::<Mp4Muxer>(()).await
+    }
+    .map_err(|e| format!("pipeline build: {e}"))?;
+
+    emit.await.map_err(|e| format!("emit join: {e}"))?;
+    // The verification below assumes frames were emitted in real time; when a
+    // saturated runner (or a software encoder drowning in worst-case content)
+    // stalls emission for seconds, pts-vs-wall comparisons are meaningless.
+    // Skip loudly instead of failing on an environment artifact.
+    let emit_lag =
+        timestamps.instant().elapsed().as_secs_f64() - sent.last().copied().unwrap_or(0.0);
+    let finished = {
+        // Allow the tail of the stream to flush through the encoder.
+        tokio::time::sleep(Duration::from_millis(500)).await;
+        pipeline.stop().await.map_err(|e| format!("stop: {e}"))?
+    };
+    if emit_lag > 1.5 {
+        return Ok(format!(
+            "skipped: runner fell {emit_lag:.1}s behind real-time emission"
+        ));
+    }
+
+    // Read back the muxed pts.
+    let playable = if fragmented {
+        concat_fmp4(&out_path, temp.path())?
+    } else {
+        out_path.clone()
+    };
+    let pts = read_video_pts(&playable)?;
+
+    if pts.len() != sent.len() {
+        return Err(format!(
+            "frame count mismatch: sent {} frames, container has {}",
+            sent.len(),
+            pts.len()
+        ));
+    }
+
+    let mut max_abs: f64 = 0.0;
+    let mut max_rel: f64 = 0.0;
+    // At low frame rates the fixed tolerance is only a frame or two of
+    // budget, so scheduler jitter on shared runners trips it; express the
+    // floor in frames as well. The bug class this guards produces errors of
+    // a second or more either way.
+    let rel_tolerance = REL_TOLERANCE_SECS.max(2.5 / f64::from(case.fps));
+    for (i, (&p, &s)) in pts.iter().zip(&sent).enumerate() {
+        max_abs = max_abs.max((p - s).abs());
+        let rel = ((p - pts[0]) - (s - sent[0])).abs();
+        max_rel = max_rel.max(rel);
+        if rel > rel_tolerance {
+            return Err(format!(
+                "frame {i}: relative pts error {rel:.3}s (pts {p:.3}s vs sent {s:.3}s)"
+            ));
+        }
+    }
+    if max_abs > ABS_TOLERANCE_SECS {
+        return Err(format!(
+            "absolute pts error {max_abs:.3}s exceeds tolerance"
+        ));
+    }
+
+    // The span the recorder would persist must match the sent span.
+    if let Some((first, last)) = finished.video_timestamp_span {
+        let span = (last - first).as_secs_f64();
+        let expected = sent.last().unwrap() - sent[0];
+        if (span - expected).abs() > 0.25 {
+            return Err(format!(
+                "video_timestamp_span {span:.3}s does not match sent span {expected:.3}s"
+            ));
+        }
+    } else {
+        return Err("video_timestamp_span missing".to_string());
+    }
+
+    // Gap preservation is the regression that desynced 0.5.4: every gap in
+    // the sent timeline must survive into the container.
+    let max_sent_gap = sent.windows(2).map(|w| w[1] - w[0]).fold(0.0, f64::max);
+    if max_sent_gap > 1.0 {
+        let max_pts_gap = pts.windows(2).map(|w| w[1] - w[0]).fold(0.0, f64::max);
+        if max_pts_gap < max_sent_gap * 0.9 {
+            return Err(format!(
+                "{max_sent_gap:.2}s capture gap collapsed to {max_pts_gap:.3}s in the container"
+            ));
+        }
+    }
+
+    Ok(format!(
+        "{} frames, max abs err {:.0} ms, max rel err {:.0} ms",
+        pts.len(),
+        max_abs * 1000.0,
+        max_rel * 1000.0
+    ))
+}
+
+/// A mid-recording pause (instant mode): emission continues in real time but
+/// the pipeline drops frames while paused. The pause must be EXCISED from the
+/// output timeline — video pts must stay continuous across it (matching how
+/// audio drops paused samples and how the wall clock subtracts pauses), and
+/// the container must contain only the unpaused content. A regression here
+/// previously poisoned the drift anchor with pause-inflated time whenever the
+/// pause began before the ~2s warmup anchor existed.
+async fn run_video_pause_case() -> Result<String, String> {
+    const PRE_PAUSE_SECS: f64 = 1.0;
+    const PAUSE_SECS: f64 = 2.5;
+    const POST_PAUSE_SECS: f64 = 2.0;
+    const FPS: u32 = 30;
+
+    let temp = tempfile::tempdir().map_err(|e| format!("tempdir: {e}"))?;
+    let out_path = temp.path().join("display.mp4");
+
+    let info = VideoInfo::from_raw(cap_media_info::RawVideoFormat::Bgra, 160, 120, FPS);
+    let (tx, rx) = flume::bounded::<FFmpegVideoFrame>(32);
+    let timestamps = Timestamps::now();
+
+    let total_secs = PRE_PAUSE_SECS + PAUSE_SECS + POST_PAUSE_SECS;
+    let emit = {
+        let base = timestamps.instant();
+        let mut rng = Rng(7);
+        tokio::spawn(async move {
+            let period = 1.0 / f64::from(FPS);
+            let count = (total_secs * f64::from(FPS)) as u64;
+            for k in 0..count {
+                let ts = k as f64 * period;
+                tokio::time::sleep_until((base + Duration::from_secs_f64(ts)).into()).await;
+                let frame = FFmpegVideoFrame {
+                    inner: make_video_frame(160, 120, k, Content::Flat, &mut rng),
+                    timestamp: Timestamp::Instant(base + Duration::from_secs_f64(ts)),
+                };
+                if tx.send_async(frame).await.is_err() {
+                    break;
+                }
+            }
+        })
+    };
+
+    let pipeline = OutputPipeline::builder(out_path.clone())
+        .with_video::<ChannelVideoSource<FFmpegVideoFrame>>(ChannelVideoSourceConfig::new(info, rx))
+        .with_timestamps(timestamps)
+        .build::<Mp4Muxer>(())
+        .await
+        .map_err(|e| format!("pipeline build: {e}"))?;
+
+    tokio::time::sleep(Duration::from_secs_f64(PRE_PAUSE_SECS)).await;
+    let pause_started = std::time::Instant::now();
+    pipeline.pause();
+    tokio::time::sleep(Duration::from_secs_f64(PAUSE_SECS)).await;
+    pipeline.resume();
+    let actual_pause = pause_started.elapsed().as_secs_f64();
+
+    emit.await.map_err(|e| format!("emit join: {e}"))?;
+    let emit_lag = timestamps.instant().elapsed().as_secs_f64() - total_secs;
+    tokio::time::sleep(Duration::from_millis(500)).await;
+    pipeline.stop().await.map_err(|e| format!("stop: {e}"))?;
+    // The assertions below compare the muxed span against the intended
+    // pause/content windows; a runner too stalled to hit those windows
+    // invalidates the comparison, not the pipeline.
+    if emit_lag > 1.5 || (actual_pause - PAUSE_SECS).abs() > 0.5 {
+        return Ok(format!(
+            "skipped: runner too slow (emission lag {emit_lag:.1}s, pause window {actual_pause:.2}s)"
+        ));
+    }
+
+    let pts = read_video_pts(&out_path)?;
+    if pts.len() < 8 {
+        return Err(format!("only {} frames muxed", pts.len()));
+    }
+
+    // The pause is excised: the muxed span must cover roughly the unpaused
+    // content, not the wall-clock run.
+    let span = pts.last().unwrap() - pts[0];
+    let expected = PRE_PAUSE_SECS + POST_PAUSE_SECS;
+    if (span - expected).abs() > 0.6 {
+        return Err(format!(
+            "muxed span {span:.2}s should be about the unpaused content {expected:.2}s \
+             (pause leaked into the timeline)"
+        ));
+    }
+
+    // And no single pts step may contain the pause.
+    let max_gap = pts.windows(2).map(|w| w[1] - w[0]).fold(0.0, f64::max);
+    if max_gap > PAUSE_SECS * 0.8 {
+        return Err(format!(
+            "pause survived as a {max_gap:.2}s pts gap in the container"
+        ));
+    }
+
+    // Post-resume continuity is the discriminating check: without the pause
+    // excision the anomaly tracker accepts the pause as a confirmed jump and
+    // the drift tracker's wall cap re-pins the post-resume segment ~one
+    // tolerance late (measured +0.13s vs +0.03s with the fix). The median
+    // over the whole segment is immune to per-frame scheduler jitter.
+    let period = 1.0 / f64::from(FPS);
+    let split = pts
+        .windows(2)
+        .position(|w| w[1] - w[0] == max_gap)
+        .unwrap_or(0);
+    let pre_last = pts[split];
+    let mut post_offsets: Vec<f64> = pts[split + 1..]
+        .iter()
+        .enumerate()
+        .map(|(k, &p)| p - (pre_last + (k as f64 + 1.0) * period))
+        .collect();
+    post_offsets.sort_by(|a, b| a.partial_cmp(b).unwrap());
+    let continuity = post_offsets
+        .get(post_offsets.len() / 2)
+        .copied()
+        .unwrap_or(0.0);
+    if continuity.abs() > 0.08 {
+        return Err(format!(
+            "post-resume frames resume {continuity:+.3}s off the pre-pause timeline \
+             (pause bled into the drift anchor)"
+        ));
+    }
+
+    Ok(format!(
+        "{} frames, span {span:.2}s (expected ~{expected:.2}s), max pts gap {max_gap:.2}s, \
+         post-resume continuity {continuity:+.3}s",
+        pts.len()
+    ))
+}
+
+#[derive(Clone, Copy)]
+struct AudioCase {
+    rate: u32,
+    channels: u16,
+    /// Device buffer size in milliseconds; real hardware spans ~3-90ms.
+    chunk_ms: f64,
+    /// Source clock drift factor: samples arrive slightly faster or slower
+    /// than their nominal rate, as real device crystals do.
+    drift: f64,
+}
+
+impl AudioCase {
+    fn curated(rate: u32, channels: u16) -> Self {
+        Self {
+            rate,
+            channels,
+            chunk_ms: 20.0,
+            drift: 1.0,
+        }
+    }
+}
+
+async fn run_audio_case(case: AudioCase) -> Result<String, String> {
+    let AudioCase {
+        rate,
+        channels,
+        chunk_ms,
+        drift,
+    } = case;
+    let temp = tempfile::tempdir().map_err(|e| format!("tempdir: {e}"))?;
+    let out_path = temp.path().join("audio.ogg");
+
+    let info = AudioInfo::new(Sample::F32(Type::Packed), rate, channels)
+        .map_err(|e| format!("audio info: {e:?}"))?;
+    let (tx, rx) = futures::channel::mpsc::channel::<AudioFrame>(32);
+    let timestamps = Timestamps::now();
+
+    let chunk_frames = ((f64::from(rate) * chunk_ms / 1000.0) as usize).max(16);
+    let chunk_secs = chunk_frames as f64 / f64::from(rate);
+    let total_chunks = (CONTENT_SECS / chunk_secs).ceil() as usize;
+
+    let emit = {
+        let base = timestamps.instant();
+        let mut tx = tx;
+        let info = info;
+        tokio::spawn(async move {
+            use futures::SinkExt;
+            for k in 0..total_chunks {
+                let real_t = k as f64 * chunk_secs;
+                let ts = real_t * drift;
+                tokio::time::sleep_until((base + Duration::from_secs_f64(real_t)).into()).await;
+                let mut frame = ffmpeg::frame::Audio::new(
+                    ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed),
+                    chunk_frames,
+                    info.channel_layout(),
+                );
+                frame.set_rate(rate);
+                let data = frame.data_mut(0);
+                for (i, sample) in bytemuck_cast_f32(data).iter_mut().enumerate() {
+                    let n = (k * chunk_frames + i / channels as usize) as f32;
+                    *sample = (n * 440.0 * 2.0 * std::f32::consts::PI / rate as f32).sin() * 0.4;
+                }
+                let frame = AudioFrame::new(
+                    frame,
+                    Timestamp::Instant(base + Duration::from_secs_f64(ts)),
+                );
+                if tx.send(frame).await.is_err() {
+                    break;
+                }
+            }
+        })
+    };
+
+    let pipeline = OutputPipeline::builder(out_path.clone())
+        .with_audio_source::<ChannelAudioSource>(ChannelAudioSourceConfig::new(info, rx))
+        .with_timestamps(timestamps)
+        .build::<OggMuxer>(())
+        .await
+        .map_err(|e| format!("pipeline build: {e}"))?;
+
+    emit.await.map_err(|e| format!("emit join: {e}"))?;
+    let emit_lag = timestamps.instant().elapsed().as_secs_f64() - CONTENT_SECS;
+    tokio::time::sleep(Duration::from_millis(300)).await;
+    pipeline.stop().await.map_err(|e| format!("stop: {e}"))?;
+    if emit_lag > 1.5 {
+        return Ok(format!(
+            "skipped: runner fell {emit_lag:.1}s behind real-time emission"
+        ));
+    }
+
+    let (duration, decoded_channels, energy) = read_audio_stats(&out_path)?;
+    if (duration - CONTENT_SECS).abs() > AUDIO_DURATION_TOLERANCE_SECS {
+        return Err(format!(
+            "decoded duration {duration:.3}s vs expected {CONTENT_SECS:.3}s \
+             (rate handling error: content plays at the wrong speed)"
+        ));
+    }
+    if energy < 0.01 {
+        return Err(format!(
+            "decoded audio is nearly silent (rms {energy:.4}); samples were lost or zeroed"
+        ));
+    }
+
+    Ok(format!(
+        "duration {duration:.3}s, {decoded_channels} ch decoded, rms {energy:.3}"
+    ))
+}
+
+fn bytemuck_cast_f32(data: &mut [u8]) -> &mut [f32] {
+    let len = data.len() / 4;
+    unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast::<f32>(), len) }
+}
+
+/// Concatenates a fragmented-mp4 segment directory (init.mp4 + *.m4s) into a
+/// single playable file.
+fn concat_fmp4(dir: &Path, scratch: &Path) -> Result<PathBuf, String> {
+    let init = dir.join("init.mp4");
+    let mut bytes = std::fs::read(&init).map_err(|e| format!("read init.mp4: {e}"))?;
+    let mut segments: Vec<PathBuf> = std::fs::read_dir(dir)
+        .map_err(|e| format!("read segment dir: {e}"))?
+        .filter_map(|e| e.ok().map(|e| e.path()))
+        .filter(|p| p.extension().is_some_and(|ext| ext == "m4s"))
+        .collect();
+    segments.sort();
+    if segments.is_empty() {
+        return Err("no media segments produced".to_string());
+    }
+    for segment in &segments {
+        bytes.extend(std::fs::read(segment).map_err(|e| format!("read segment: {e}"))?);
+    }
+    let out = scratch.join("concat.mp4");
+    std::fs::write(&out, bytes).map_err(|e| format!("write concat: {e}"))?;
+    Ok(out)
+}
+
+fn read_video_pts(path: &Path) -> Result<Vec<f64>, String> {
+    let mut ictx = ffmpeg::format::input(&path).map_err(|e| format!("open {e}"))?;
+    let stream = ictx
+        .streams()
+        .best(ffmpeg::media::Type::Video)
+        .ok_or("no video stream")?;
+    let index = stream.index();
+    let tb = stream.time_base();
+    let tb = f64::from(tb.numerator()) / f64::from(tb.denominator());
+    let mut pts: Vec<f64> = ictx
+        .packets()
+        .filter_map(|(s, p)| (s.index() == index).then_some(p.pts()).flatten())
+        .map(|p| p as f64 * tb)
+        .collect();
+    pts.sort_by(|a, b| a.partial_cmp(b).unwrap());
+    Ok(pts)
+}
+
+fn read_audio_stats(path: &Path) -> Result<(f64, u16, f64), String> {
+    let mut ictx = ffmpeg::format::input(&path).map_err(|e| format!("open {e}"))?;
+    let stream = ictx
+        .streams()
+        .best(ffmpeg::media::Type::Audio)
+        .ok_or("no audio stream")?;
+    let index = stream.index();
+    let ctx = ffmpeg::codec::context::Context::from_parameters(stream.parameters())
+        .map_err(|e| format!("params: {e}"))?;
+    let mut decoder = ctx.decoder().audio().map_err(|e| format!("decoder: {e}"))?;
+
+    let mut samples = 0u64;
+    let mut rate = 0u32;
+    let mut channels = 0u16;
+    let mut sum_sq = 0.0f64;
+    let mut counted = 0u64;
+    let mut frame = ffmpeg::frame::Audio::empty();
+    for (s, packet) in ictx.packets() {
+        if s.index() != index {
+            continue;
+        }
+        if decoder.send_packet(&packet).is_ok() {
+            while decoder.receive_frame(&mut frame).is_ok() {
+                samples += frame.samples() as u64;
+                rate = frame.rate();
+                channels = frame.channels();
+                if let ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Planar) =
+                    frame.format()
+                {
+                    for &v in &frame.plane::<f32>(0)[..frame.samples()] {
+                        sum_sq += f64::from(v) * f64::from(v);
+                        counted += 1;
+                    }
+                }
+            }
+        }
+    }
+    let _ = decoder.send_eof();
+    while decoder.receive_frame(&mut frame).is_ok() {
+        samples += frame.samples() as u64;
+    }
+
+    if rate == 0 {
+        return Err("no audio decoded".to_string());
+    }
+    let rms = if counted > 0 {
+        (sum_sq / counted as f64).sqrt()
+    } else {
+        0.0
+    };
+    Ok((samples as f64 / f64::from(rate), channels, rms))
+}
+
+fn record(results: &mut Vec<CaseResult>, name: String, outcome: Result<String, String>) {
+    eprintln!(
+        "{name}: {}",
+        match &outcome {
+            Ok(d) => format!("ok ({d})"),
+            Err(e) => format!("FAIL ({e})"),
+        }
+    );
+    results.push(CaseResult {
+        name,
+        pass: outcome.is_ok(),
+        detail: outcome.unwrap_or_else(|e| e),
+    });
+}
+
+/// A fully random capture shape: arbitrary fps, resolution, encoder-load
+/// content, timestamp jitter, random drops, and 0-2 gaps at random positions
+/// (including inside the first two seconds, where the drift anchor does not
+/// exist yet).
+fn random_video_case(rng: &mut Rng) -> VideoCase {
+    let fps = rng.range(10, 120) as u32;
+    let (width, height) = rng.pick(&[(160u32, 120u32), (320, 240), (640, 360)]);
+    let content = rng.pick(&[Content::Flat, Content::Noise, Content::Motion]);
+    let fragmented = rng.f64() < 0.75;
+
+    let period = 1.0 / f64::from(fps);
+    let jitter = rng.f64() * 0.45;
+    let drop_prob = rng.f64() * 0.25;
+    let gap_count = rng.range(0, 2);
+    let mut gaps: Vec<(f64, f64)> = (0..gap_count)
+        .map(|_| {
+            let at = 0.4 + rng.f64() * (CONTENT_SECS - 1.0);
+            let len = 1.2 + rng.f64() * 2.0;
+            (at, len)
+        })
+        .collect();
+    gaps.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
+
+    let total = (CONTENT_SECS * f64::from(fps)) as u64;
+    let mut sent = Vec::new();
+    for k in 0..total {
+        if rng.f64() < drop_prob {
+            continue;
+        }
+        let base = k as f64 * period;
+        let mut ts = (base + (rng.f64() - 0.5) * period * jitter).max(0.0);
+        for &(at, len) in &gaps {
+            if ts >= at {
+                ts += len;
+            }
+        }
+        sent.push(ts);
+    }
+    sent.sort_by(|a, b| a.partial_cmp(b).unwrap());
+    sent.dedup_by(|a, b| (*a - *b).abs() < period * 0.25);
+    // Guarantee at least a handful of frames survive the drop lottery.
+    if sent.len() < 8 {
+        sent = (0..total).map(|k| k as f64 * period).collect();
+    }
+
+    VideoCase {
+        fps,
+        sent,
+        fragmented,
+        width,
+        height,
+        content,
+        rng_seed: rng.next(),
+    }
+}
+
+/// A random audio device shape: any rate from the set real devices negotiate,
+/// 1-8 channels, real-world buffer sizes, and a small crystal drift.
+fn random_audio_case(rng: &mut Rng) -> AudioCase {
+    let rate = rng.pick(&[
+        8_000u32, 11_025, 12_000, 16_000, 22_050, 24_000, 32_000, 44_100, 48_000, 88_200, 96_000,
+        176_400, 192_000,
+    ]);
+    let channels = rng.range(1, 8) as u16;
+    let chunk_ms = 3.0 + rng.f64() * 85.0;
+    let drift = 1.0 + (rng.f64() - 0.5) * 0.002; // +-0.1%
+
+    AudioCase {
+        rate,
+        channels,
+        chunk_ms,
+        drift,
+    }
+}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn synthetic_device_matrix_preserves_sync() {
+    let mut results: Vec<CaseResult> = Vec::new();
+
+    // Randomized cases are reproducible: rerun with CAP_SYNC_MATRIX_SEED=<seed>.
+    let seed: u64 = std::env::var("CAP_SYNC_MATRIX_SEED")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .unwrap_or_else(|| {
+            std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .map(|d| d.as_nanos() as u64)
+                .unwrap_or(0x5EED)
+        });
+    let random_cases: usize = std::env::var("CAP_SYNC_MATRIX_RANDOM_CASES")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(6);
+    eprintln!("randomized cases: {random_cases}, CAP_SYNC_MATRIX_SEED={seed}");
+
+    let video_cases: Vec<(u32, VideoScenario, bool)> = vec![
+        (15, VideoScenario::Steady, true),
+        (30, VideoScenario::Steady, true),
+        (60, VideoScenario::Steady, true),
+        (120, VideoScenario::Steady, true),
+        (30, VideoScenario::Jitter, true),
+        (60, VideoScenario::Jitter, true),
+        (30, VideoScenario::Drops, true),
+        (60, VideoScenario::Drops, true),
+        (30, VideoScenario::Gap, true),
+        (60, VideoScenario::Gap, true),
+        (30, VideoScenario::Steady, false),
+        (30, VideoScenario::Gap, false),
+    ];
+
+    for (fps, scenario, fragmented) in video_cases {
+        let name = format!(
+            "video/{}fps/{}/{}",
+            fps,
+            scenario.name(),
+            if fragmented { "fragmented" } else { "mp4" }
+        );
+        let outcome = run_video_case(VideoCase::curated(fps, scenario, fragmented)).await;
+        record(&mut results, name, outcome);
+    }
+
+    record(
+        &mut results,
+        "video/30fps/pause-resume/mp4".to_string(),
+        run_video_pause_case().await,
+    );
+
+    let audio_cases: Vec<(u32, u16)> = vec![
+        (8_000, 2),
+        (16_000, 2),
+        (22_050, 2),
+        (44_100, 2),
+        (48_000, 2),
+        (96_000, 2),
+        (48_000, 1),
+        (44_100, 1),
+        (48_000, 6),
+    ];
+
+    for (rate, channels) in audio_cases {
+        let name = format!("audio/{rate}hz/{channels}ch");
+        let outcome = run_audio_case(AudioCase::curated(rate, channels)).await;
+        record(&mut results, name, outcome);
+    }
+
+    // Non-predetermined coverage: random device shapes and delivery
+    // pathologies, combined audio+video like a real studio recording.
+    let mut rng = Rng(seed);
+    for i in 0..random_cases {
+        let video = random_video_case(&mut rng);
+        let audio = random_audio_case(&mut rng);
+        let name = format!(
+            "random/{i}/video-{}fps-{}x{}-{:?}-{}ts/audio-{}hz-{}ch-{:.0}ms-drift{:+.2}%",
+            video.fps,
+            video.width,
+            video.height,
+            video.content,
+            video.sent.len(),
+            audio.rate,
+            audio.channels,
+            audio.chunk_ms,
+            (audio.drift - 1.0) * 100.0,
+        );
+        // Run both legs concurrently, as a real recording does.
+        let (video_outcome, audio_outcome) =
+            tokio::join!(run_video_case(video), run_audio_case(audio));
+        let outcome = match (video_outcome, audio_outcome) {
+            (Ok(v), Ok(a)) => Ok(format!("video: {v}; audio: {a}")),
+            (Err(e), _) => Err(format!("video leg: {e}")),
+            (_, Err(e)) => Err(format!("audio leg: {e}")),
+        };
+        record(&mut results, name, outcome);
+    }
+
+    if let Ok(report_path) = std::env::var("CAP_SYNC_MATRIX_REPORT") {
+        #[derive(Serialize)]
+        struct Report<'a> {
+            seed: u64,
+            cases: &'a [CaseResult],
+        }
+        let json = serde_json::to_string_pretty(&Report {
+            seed,
+            cases: &results,
+        })
+        .expect("serialize report");
+        std::fs::write(&report_path, json).expect("write report");
+        eprintln!("report written to {report_path}");
+    }
+
+    let failures: Vec<&CaseResult> = results.iter().filter(|r| !r.pass).collect();
+    assert!(
+        failures.is_empty(),
+        "{} of {} matrix cases failed:\n{}",
+        failures.len(),
+        results.len(),
+        failures
+            .iter()
+            .map(|r| format!("  {} — {}", r.name, r.detail))
+            .collect::<Vec<_>>()
+            .join("\n")
+    );
+}
diff --git a/crates/rendering/src/decoder/avassetreader.rs b/crates/rendering/src/decoder/avassetreader.rs
index 26fd3f08c1..c450b31e7c 100644
--- a/crates/rendering/src/decoder/avassetreader.rs
+++ b/crates/rendering/src/decoder/avassetreader.rs
@@ -17,7 +17,10 @@ use crate::{DecodedFrame, PixelFormat};
 
 use super::frame_converter::{copy_bgra_to_rgba, copy_rgba_plane};
 use super::multi_position::{DecoderPoolManager, MultiPositionDecoderConfig, ScrubDetector};
-use super::{DecoderInitResult, DecoderType, FRAME_CACHE_SIZE, VideoDecoderMessage, pts_to_frame};
+use super::{
+    DecoderInitResult, DecoderType, FRAME_CACHE_SIZE, VideoDecoderMessage, pts_to_frame,
+    record_pts_hole,
+};
 
 const MAX_RELAXED_FALLBACK_DISTANCE: u32 = 8;
 const SCRUB_REUSE_THRESHOLD_SECS: f32 = 0.5;
@@ -31,7 +34,7 @@ struct FrameData {
 
 #[derive(Clone)]
 struct ProcessedFrame {
-    _number: u32,
+    number: u32,
     width: u32,
     height: u32,
     format: PixelFormat,
@@ -218,7 +221,7 @@ impl CachedFrame {
                 let mut img = image_buf;
                 let (data, fmt, y_str, uv_str) = processor.extract_raw(&mut img);
                 Self(ProcessedFrame {
-                    _number: number,
+                    number,
                     width,
                     height,
                     format: fmt,
@@ -232,7 +235,7 @@ impl CachedFrame {
             _ => {
                 let black_frame = vec![0u8; (width * height * 4) as usize];
                 Self(ProcessedFrame {
-                    _number: number,
+                    number,
                     width,
                     height,
                     format: PixelFormat::Rgba,
@@ -341,6 +344,10 @@ struct DecoderInstance {
     path: PathBuf,
     tokio_handle: TokioHandle,
     keyframe_index: Option<Arc<cap_video_decode::avassetreader::KeyframeIndex>>,
+    /// Previous frame number this instance vended, for pts-hole discovery.
+    /// Lives on the instance because the reader vends in pts order between
+    /// resets, but request batches may be served by different pool decoders.
+    prev_vended: Option<u32>,
 }
 
 impl DecoderInstance {
@@ -363,6 +370,7 @@ impl DecoderInstance {
             path,
             tokio_handle,
             keyframe_index,
+            prev_vended: None,
         })
     }
 
@@ -372,6 +380,7 @@ impl DecoderInstance {
                 self.is_done = false;
                 self.frames_iter_valid = true;
                 self.health.reset_counters();
+                self.prev_vended = None;
             }
             Err(e) => {
                 tracing::error!(
@@ -403,6 +412,7 @@ impl DecoderInstance {
         self.is_done = false;
         self.frames_iter_valid = true;
         self.health = DecoderHealth::new();
+        self.prev_vended = None;
         Ok(())
     }
 
@@ -571,6 +581,16 @@ impl AVAssetReaderDecoder {
         let mut last_active_frame = None::<u32>;
         let last_sent_frame = Rc::new(RefCell::new(None::<ProcessedFrame>));
         let first_ever_frame = Rc::new(RefCell::new(None::<ProcessedFrame>));
+        // pts holes (start frame -> first frame after the hole) discovered
+        // from decode-order jumps. These are facts about the file — decoders
+        // vend samples in pts order, so a jump between consecutive vends can
+        // only mean no samples exist in between — and therefore survive
+        // resets and cache eviction.
+        let mut pts_holes = BTreeMap::<u32, u32>::new();
+        // Content of the most recently served VFR hold, kept across request
+        // batches so a hole keeps rendering its true frame even after the
+        // pre-hole frame leaves the cache.
+        let mut gap_hold: Option<ProcessedFrame> = None;
 
         let processor = ImageBufProcessor::new();
 
@@ -639,6 +659,40 @@ impl AVAssetReaderDecoder {
             }
             pending_requests = unfulfilled;
 
+            // Requests inside a KNOWN pts hole are answered with the hole's
+            // start frame — the true VFR hold (the frame simply stayed on
+            // screen) — without touching the decoder. This keeps the
+            // post-hole frames cached and the reader parked, however long the
+            // hole runs; decoding ahead here would evict the very frames the
+            // requests are marching towards. Only recorded holes qualify: a
+            // bare "some cached frame lies beyond the request" test would
+            // also match disjoint cache islands left by seeks, and serving
+            // stale content there would freeze playback on old frames.
+            let mut still_unfulfilled = Vec::with_capacity(pending_requests.len());
+            for req in pending_requests.drain(..) {
+                let hole_start = pts_holes
+                    .range(..=req.frame)
+                    .next_back()
+                    .filter(|&(_, &end)| req.frame < end)
+                    .map(|(&start, _)| start);
+                let Some(hole_start) = hole_start else {
+                    still_unfulfilled.push(req);
+                    continue;
+                };
+                let data = cache
+                    .get(&hole_start)
+                    .map(|c| c.data().clone())
+                    .or_else(|| gap_hold.clone().filter(|h| h.number == hole_start));
+                if let Some(data) = data {
+                    gap_hold = Some(data.clone());
+                    *last_sent_frame.borrow_mut() = Some(data.clone());
+                    let _ = req.sender.send(data.to_decoded_frame());
+                } else {
+                    still_unfulfilled.push(req);
+                }
+            }
+            pending_requests = still_unfulfilled;
+
             if pending_requests.is_empty() {
                 continue;
             }
@@ -676,6 +730,10 @@ impl AVAssetReaderDecoder {
             let mut exit = false;
             let mut frames_iterated = 0u32;
             let mut last_decoded_position: Option<f32> = None;
+            // Newest vended frame below the fallback floor: after a seek the
+            // reader re-vends from the keyframe at-or-before the request, and
+            // that frame is the true VFR hold for requests inside a pts hole.
+            let mut hold_candidate: Option<(u32, R<cv::ImageBuf>)> = None;
 
             {
                 let decoder = &mut this.decoders[decoder_idx];
@@ -691,6 +749,10 @@ impl AVAssetReaderDecoder {
                                 error = %e,
                                 "Failed to read frame, skipping"
                             );
+                            // A skipped frame breaks the vend continuity that
+                            // hole discovery relies on; a jump across it is
+                            // not evidence of a hole.
+                            decoder.prev_vended = None;
                             continue;
                         }
                     };
@@ -699,11 +761,24 @@ impl AVAssetReaderDecoder {
                     let current_frame =
                         pts_to_frame(frame.pts().value, Rational::new(1, frame.pts().scale), fps);
 
+                    if let Some(prev) = decoder.prev_vended
+                        && current_frame > prev + 1
+                    {
+                        record_pts_hole(&mut pts_holes, prev, current_frame);
+                    }
+                    decoder.prev_vended = Some(current_frame);
+
                     let position_secs = current_frame as f32 / fps as f32;
                     last_decoded_position = Some(position_secs);
                     decoder.is_done = false;
 
                     if current_frame < minimum_fallback_frame {
+                        // Keep a handle to it instead of discarding it: if the
+                        // requests land inside a pts hole this is the only
+                        // at-or-before content the reader will ever vend.
+                        if let Some(buf) = frame.image_buf() {
+                            hold_candidate = Some((current_frame, buf.retained()));
+                        }
                         continue;
                     }
 
@@ -756,17 +831,55 @@ impl AVAssetReaderDecoder {
                                     *last_sent_frame.borrow_mut() = Some(data.clone());
                                     let _ = req.sender.send(data.to_decoded_frame());
                                 } else {
-                                    let nearest = cache
+                                    // Always answer. Prefer the newest frame at-or-before
+                                    // the request — from the cache, the hold candidate the
+                                    // seek re-vended, or the persistent gap hold — as the
+                                    // true VFR hold content (a pts gap means the frame
+                                    // stayed on screen). A later frame is the last resort;
+                                    // leaving the request unanswered would wedge the render
+                                    // loop.
+                                    let cached_before = cache
                                         .range(..=req.frame)
                                         .next_back()
-                                        .or_else(|| cache.range(req.frame..).next());
-
-                                    if let Some((&frame_num, cached)) = nearest {
-                                        let distance = req.frame.abs_diff(frame_num);
-                                        if distance <= req.max_fallback_distance {
-                                            let _ =
-                                                req.sender.send(cached.data().to_decoded_frame());
-                                        }
+                                        .map(|(_, c)| c.data().clone());
+                                    let hold_before = gap_hold.clone().filter(|h| {
+                                        pts_holes.get(&h.number).is_some_and(|&end| {
+                                            h.number <= req.frame && req.frame < end
+                                        })
+                                    });
+                                    let candidate_before = hold_candidate
+                                        .as_ref()
+                                        .filter(|(n, _)| *n <= req.frame)
+                                        .map(|(n, buf)| {
+                                            CachedFrame::new(&processor, buf.retained(), *n)
+                                                .data()
+                                                .clone()
+                                        });
+                                    let best_before =
+                                        [cached_before, hold_before, candidate_before]
+                                            .into_iter()
+                                            .flatten()
+                                            .max_by_key(|d| d.number);
+
+                                    if let Some(data) = best_before {
+                                        gap_hold = Some(data.clone());
+                                        *last_sent_frame.borrow_mut() = Some(data.clone());
+                                        let _ = req.sender.send(data.to_decoded_frame());
+                                    } else if let Some((&frame_num, cached)) =
+                                        cache.range(req.frame..).next()
+                                    {
+                                        tracing::debug!(
+                                            req_frame = req.frame,
+                                            nearest_frame = frame_num,
+                                            "serving forward frame across pts gap"
+                                        );
+                                        let _ = req.sender.send(cached.data().to_decoded_frame());
+                                    } else {
+                                        tracing::warn!(
+                                            req_frame = req.frame,
+                                            current_frame,
+                                            "dropping overshot request: cache empty"
+                                        );
                                     }
                                 }
                             } else {
@@ -895,26 +1008,46 @@ impl AVAssetReaderDecoder {
                         req.max_fallback_distance
                     };
 
-                    let nearest = cache
+                    // Always answer with the newest frame at-or-before the request —
+                    // from the cache, the hold candidate a seek re-vended, or the
+                    // persistent gap hold — as the true VFR hold content. A later frame
+                    // is the best remaining answer; dropping the request instead
+                    // starves the render loop and wedges gap playback/export.
+                    let cached_before = cache
                         .range(..=req.frame)
                         .next_back()
-                        .or_else(|| cache.range(req.frame..).next());
-
-                    if let Some((&frame_num, cached)) = nearest {
-                        let distance = req.frame.abs_diff(frame_num);
-                        if distance <= fallback_distance {
-                            let _ = req.sender.send(cached.data().to_decoded_frame());
-                        } else if allow_relaxed_fallback
-                            && let Some(ref last) = *last_sent_frame.borrow()
-                        {
-                            let _ = req.sender.send(last.to_decoded_frame());
-                        } else if allow_relaxed_fallback
-                            && let Some(ref first) = *first_ever_frame.borrow()
-                        {
-                            let _ = req.sender.send(first.to_decoded_frame());
-                        } else {
-                            unfulfilled_count += 1;
+                        .map(|(_, c)| c.data().clone());
+                    let hold_before = gap_hold.clone().filter(|h| {
+                        pts_holes
+                            .get(&h.number)
+                            .is_some_and(|&end| h.number <= req.frame && req.frame < end)
+                    });
+                    let candidate_before = hold_candidate
+                        .as_ref()
+                        .filter(|(n, _)| *n <= req.frame)
+                        .map(|(n, buf)| {
+                            CachedFrame::new(&processor, buf.retained(), *n)
+                                .data()
+                                .clone()
+                        });
+                    let best_before = [cached_before, hold_before, candidate_before]
+                        .into_iter()
+                        .flatten()
+                        .max_by_key(|d| d.number);
+
+                    if let Some(data) = best_before {
+                        gap_hold = Some(data.clone());
+                        *last_sent_frame.borrow_mut() = Some(data.clone());
+                        let _ = req.sender.send(data.to_decoded_frame());
+                    } else if let Some((&frame_num, cached)) = cache.range(req.frame..).next() {
+                        if req.frame.abs_diff(frame_num) > fallback_distance {
+                            tracing::debug!(
+                                req_frame = req.frame,
+                                nearest_frame = frame_num,
+                                "serving forward frame across pts gap"
+                            );
                         }
+                        let _ = req.sender.send(cached.data().to_decoded_frame());
                     } else if allow_relaxed_fallback
                         && let Some(ref last) = *last_sent_frame.borrow()
                     {
diff --git a/crates/rendering/src/decoder/ffmpeg.rs b/crates/rendering/src/decoder/ffmpeg.rs
index 1e26eeb25d..b703a76d94 100644
--- a/crates/rendering/src/decoder/ffmpeg.rs
+++ b/crates/rendering/src/decoder/ffmpeg.rs
@@ -19,7 +19,7 @@ use cap_video_decode::FrameTextures;
 
 use super::{
     DecoderInitResult, DecoderType, FRAME_CACHE_SIZE, VideoDecoderMessage,
-    frame_converter::FrameConverter, pts_to_frame,
+    frame_converter::FrameConverter, pts_to_frame, record_pts_hole,
 };
 
 #[derive(Clone)]
@@ -70,7 +70,6 @@ struct PendingRequest {
 }
 
 const MAX_FRAME_LOOKBACK_TOLERANCE: u32 = 2;
-const MAX_FRAME_FALLBACK_DISTANCE: u32 = 90;
 
 fn extract_yuv_planes(frame: &frame::Video) -> Option<(Vec<u8>, PixelFormat, u32, u32)> {
     let height = frame.height();
@@ -250,6 +249,17 @@ impl FfmpegDecoder {
 
             let mut cache = BTreeMap::<u32, CachedFrame>::new();
             let mut last_active_frame = None::<u32>;
+            // pts holes (start frame -> first frame after the hole) discovered
+            // from decode-order jumps; facts about the file, so they survive
+            // resets and cache eviction.
+            let mut pts_holes = BTreeMap::<u32, u32>::new();
+            // Content of the most recently served VFR hold, kept across
+            // requests so a hole keeps rendering its true frame even after
+            // the pre-hole frame leaves the cache.
+            let mut gap_hold: Option<OutputFrame> = None;
+            // Previous vended frame number since the last reset, for pts-hole
+            // discovery (the reader vends in pts order between resets).
+            let mut prev_vended: Option<u32> = None;
 
             let last_sent_frame = Rc::new(RefCell::new(None::<OutputFrame>));
             let first_ever_frame = Rc::new(RefCell::new(None::<OutputFrame>));
@@ -273,6 +283,14 @@ impl FfmpegDecoder {
 
                 let mut sw_cache = BTreeMap::<u32, CachedFrame>::new();
                 let mut sw_last_active_frame = None::<u32>;
+                // pts holes (start frame -> first frame after the hole) from
+                // decode-order jumps; facts about the file that survive resets
+                // and cache eviction.
+                let mut sw_pts_holes = BTreeMap::<u32, u32>::new();
+                // Content of the most recently served VFR hold.
+                let mut sw_gap_hold: Option<OutputFrame> = None;
+                // Previous vended frame number since the last reset.
+                let mut sw_prev_vended: Option<u32> = None;
                 let sw_last_sent_frame = Rc::new(RefCell::new(None::<OutputFrame>));
                 let sw_first_ever_frame = Rc::new(RefCell::new(None::<OutputFrame>));
                 let mut sw_frames = sw_this.frames();
@@ -289,6 +307,9 @@ impl FfmpegDecoder {
                     sw_cache.insert(current_frame, cache_frame);
                     *sw_first_ever_frame.borrow_mut() = Some(output.clone());
                     *sw_last_sent_frame.borrow_mut() = Some(output);
+                    // The pre-decoded frame is a real vend: without seeding
+                    // this, an opening static hold would never be recorded.
+                    sw_prev_vended = Some(current_frame);
                 }
 
                 let sw_decoder_type = DecoderType::FFmpegSoftware;
@@ -391,6 +412,29 @@ impl FfmpegDecoder {
                             sw_frames = sw_this.frames();
                             *sw_last_sent_frame.borrow_mut() = None;
                             sw_cache.clear();
+                            sw_prev_vended = None;
+                        }
+
+                        // Requests inside a KNOWN pts hole are answered with the
+                        // hole's start frame — the true VFR hold — without
+                        // touching the decoder; see the hardware path above.
+                        if !is_backward_seek
+                            && let Some(hole_start) = sw_pts_holes
+                                .range(..=requested_frame)
+                                .next_back()
+                                .filter(|&(_, &end)| requested_frame < end)
+                                .map(|(&start, _)| start)
+                        {
+                            let data = sw_cache
+                                .get_mut(&hole_start)
+                                .map(|c| c.produce(&mut sw_converter))
+                                .or_else(|| sw_gap_hold.clone().filter(|h| h.number == hole_start));
+                            if let Some(data) = data {
+                                sw_gap_hold = Some(data.clone());
+                                *sw_last_sent_frame.borrow_mut() = Some(data.clone());
+                                let _ = reply.send(data.frame);
+                                continue;
+                            }
                         }
 
                         if reply.is_closed() {
@@ -443,26 +487,47 @@ impl FfmpegDecoder {
                             sw_frames = sw_this.frames();
                             *sw_last_sent_frame.borrow_mut() = None;
                             sw_cache.clear();
+                            sw_prev_vended = None;
                         }
 
                         let mut exit = false;
+                        // Newest vended frame below the cache window: after a reset
+                        // the reader re-vends from the keyframe before the request,
+                        // and that frame is the true VFR hold for a request inside
+                        // a pts hole.
+                        let mut hold_candidate: Option<(u32, CachedFrame)> = None;
 
                         for frame in &mut sw_frames {
                             if reply_cell.borrow().as_ref().is_none_or(|r| r.is_closed()) {
                                 respond.take();
+                                // The frame just pulled is discarded: it breaks
+                                // the vend continuity that hole discovery
+                                // relies on.
+                                sw_prev_vended = None;
                                 break;
                             }
 
                             let Ok(frame) = frame.map_err(|e| format!("read frame / {e}")) else {
+                                // A skipped frame breaks the vend continuity
+                                // that hole discovery relies on.
+                                sw_prev_vended = None;
                                 continue;
                             };
 
                             let Some(pts) = frame.pts() else {
+                                sw_prev_vended = None;
                                 continue;
                             };
                             let current_frame =
                                 pts_to_frame(pts - sw_start_time, sw_time_base, fps);
 
+                            if let Some(prev) = sw_prev_vended
+                                && current_frame > prev + 1
+                            {
+                                record_pts_hole(&mut sw_pts_holes, prev, current_frame);
+                            }
+                            sw_prev_vended = Some(current_frame);
+
                             let mut cache_frame = CachedFrame::Raw {
                                 frame,
                                 number: current_frame,
@@ -476,16 +541,19 @@ impl FfmpegDecoder {
                             let exceeds_cache_bounds = current_frame > cache_max;
                             let too_small_for_cache_bounds = current_frame < cache_min;
 
-                            let cache_frame = if !too_small_for_cache_bounds {
-                                cache_frame.produce(&mut sw_converter);
-
-                                if current_frame == requested_frame
-                                    && let Some(respond) = respond.take()
-                                {
-                                    let output = cache_frame.produce(&mut sw_converter);
-                                    (respond)(output);
-                                    break;
+                            if too_small_for_cache_bounds {
+                                // Keep the newest pre-request frame as the VFR hold
+                                // candidate instead of discarding it; a frame below
+                                // the cache window can never exceed the request, so
+                                // nothing else in this iteration applies to it.
+                                if current_frame <= requested_frame {
+                                    hold_candidate = Some((current_frame, cache_frame));
                                 }
+                                continue;
+                            }
+
+                            {
+                                cache_frame.produce(&mut sw_converter);
 
                                 if sw_cache.len() >= FRAME_CACHE_SIZE {
                                     if let Some(last_active_frame) = &sw_last_active_frame {
@@ -505,28 +573,90 @@ impl FfmpegDecoder {
                                 }
 
                                 sw_cache.insert(current_frame, cache_frame);
-                                sw_cache.get_mut(&current_frame).unwrap()
-                            } else {
-                                &mut cache_frame
-                            };
 
-                            if current_frame > requested_frame && respond.is_some() {
-                                let last_sent_frame_clone = sw_last_sent_frame.borrow().clone();
+                                // Serve exact matches from the cache so
+                                // sequentially played frames stay available as
+                                // at-or-before holds for later gap requests.
+                                if current_frame == requested_frame
+                                    && let Some(respond) = respond.take()
+                                {
+                                    let output = sw_cache
+                                        .get_mut(&current_frame)
+                                        .unwrap()
+                                        .produce(&mut sw_converter);
+                                    (respond)(output);
+                                    break;
+                                }
+                            }
 
-                                if let Some((respond, last_frame)) = last_sent_frame_clone
-                                    .filter(|l| {
-                                        l.number <= requested_frame
-                                            && requested_frame.saturating_sub(l.number)
-                                                <= MAX_FRAME_FALLBACK_DISTANCE
+                            if current_frame > requested_frame && respond.is_some() {
+                                // A frame at-or-before the request is the true content for
+                                // that time in a VFR recording: a pts gap means the frame
+                                // stayed on screen, however long the gap is. Prefer the
+                                // newest such frame among the cache, the hold candidate a
+                                // reset re-vended, and the persistent gap hold.
+                                let cached_before = sw_cache
+                                    .range(..=requested_frame)
+                                    .next_back()
+                                    .map(|(&n, _)| n);
+                                let candidate_number = hold_candidate
+                                    .as_ref()
+                                    .map(|(n, _)| *n)
+                                    .filter(|&n| n <= requested_frame);
+                                let hold_before = sw_gap_hold.clone().filter(|h| {
+                                    sw_pts_holes.get(&h.number).is_some_and(|&end| {
+                                        h.number <= requested_frame && requested_frame < end
                                     })
-                                    .and_then(|l| Some((respond.take()?, l)))
+                                });
+                                let last_before = sw_last_sent_frame
+                                    .borrow()
+                                    .as_ref()
+                                    .filter(|l| l.number <= requested_frame)
+                                    .cloned();
+                                let best_number = [
+                                    cached_before,
+                                    candidate_number,
+                                    hold_before.as_ref().map(|h| h.number),
+                                    last_before.as_ref().map(|l| l.number),
+                                ]
+                                .into_iter()
+                                .flatten()
+                                .max();
+
+                                if let Some(best_number) = best_number
+                                    && let Some(respond) = respond.take()
                                 {
-                                    (respond)(last_frame);
+                                    let output = if cached_before == Some(best_number) {
+                                        sw_cache
+                                            .get_mut(&best_number)
+                                            .unwrap()
+                                            .produce(&mut sw_converter)
+                                    } else if candidate_number == Some(best_number) {
+                                        let (_, mut candidate) = hold_candidate.take().unwrap();
+                                        candidate.produce(&mut sw_converter)
+                                    } else if hold_before.as_ref().map(|h| h.number)
+                                        == Some(best_number)
+                                    {
+                                        hold_before.unwrap()
+                                    } else {
+                                        last_before.unwrap()
+                                    };
+                                    sw_gap_hold = Some(output.clone());
+                                    (respond)(output);
                                 } else if let Some(respond) = respond.take() {
-                                    let output = cache_frame.produce(&mut sw_converter);
-                                    *sw_last_sent_frame.borrow_mut() = Some(output.clone());
+                                    let output = sw_cache
+                                        .get_mut(&current_frame)
+                                        .unwrap()
+                                        .produce(&mut sw_converter);
                                     (respond)(output);
                                 }
+
+                                // The request is answered; stop here so the
+                                // next sample stays in the decoder for the
+                                // next request. Pulling it now would discard
+                                // it at the reply guard and poison the vend
+                                // continuity that hole discovery relies on.
+                                break;
                             }
 
                             exit = exit || exceeds_cache_bounds;
@@ -539,18 +669,55 @@ impl FfmpegDecoder {
                         sw_last_active_frame = Some(requested_frame);
 
                         if let Some(respond) = respond.take() {
-                            let best_cached = sw_cache
+                            // The newest frame at-or-before the request is always a
+                            // valid VFR hold, regardless of how far back it is —
+                            // whether it is cached, was re-vended below the cache
+                            // window by a reset, or is the persistent gap hold.
+                            let cached_before = sw_cache
                                 .range(..=requested_frame)
                                 .next_back()
-                                .filter(|(k, _)| {
-                                    requested_frame.saturating_sub(**k)
-                                        <= MAX_FRAME_FALLBACK_DISTANCE
+                                .map(|(&n, _)| n);
+                            let candidate_number = hold_candidate
+                                .as_ref()
+                                .map(|(n, _)| *n)
+                                .filter(|&n| n <= requested_frame);
+                            let hold_before = sw_gap_hold.clone().filter(|h| {
+                                sw_pts_holes.get(&h.number).is_some_and(|&end| {
+                                    h.number <= requested_frame && requested_frame < end
                                 })
-                                .map(|(_, v)| v);
-
-                            if let Some(cached) = best_cached {
-                                let output = cached.clone().produce(&mut sw_converter);
-                                *sw_last_sent_frame.borrow_mut() = Some(output.clone());
+                            });
+                            let last_before = sw_last_sent_frame
+                                .borrow()
+                                .as_ref()
+                                .filter(|l| l.number <= requested_frame)
+                                .cloned();
+                            let best_number = [
+                                cached_before,
+                                candidate_number,
+                                hold_before.as_ref().map(|h| h.number),
+                                last_before.as_ref().map(|l| l.number),
+                            ]
+                            .into_iter()
+                            .flatten()
+                            .max();
+
+                            if let Some(best_number) = best_number {
+                                let output = if cached_before == Some(best_number) {
+                                    sw_cache
+                                        .get_mut(&best_number)
+                                        .unwrap()
+                                        .produce(&mut sw_converter)
+                                } else if candidate_number == Some(best_number) {
+                                    let (_, mut candidate) = hold_candidate.take().unwrap();
+                                    candidate.produce(&mut sw_converter)
+                                } else if hold_before.as_ref().map(|h| h.number)
+                                    == Some(best_number)
+                                {
+                                    hold_before.unwrap()
+                                } else {
+                                    last_before.unwrap()
+                                };
+                                sw_gap_hold = Some(output.clone());
                                 (respond)(output);
                             } else {
                                 let last_frame_clone = sw_last_sent_frame.borrow().clone();
@@ -592,6 +759,9 @@ impl FfmpegDecoder {
                 cache.insert(current_frame, cache_frame);
                 *first_ever_frame.borrow_mut() = Some(output.clone());
                 *last_sent_frame.borrow_mut() = Some(output);
+                // The pre-decoded frame is a real vend: without seeding this,
+                // an opening static hold would never be recorded.
+                prev_vended = Some(current_frame);
                 info!(
                     "FFmpeg decoder '{}': pre-decoded first frame {} ({}x{})",
                     name, current_frame, video_width, video_height
@@ -701,6 +871,34 @@ impl FfmpegDecoder {
                         frames = this.frames();
                         *last_sent_frame.borrow_mut() = None;
                         cache.clear();
+                        prev_vended = None;
+                    }
+
+                    // Requests inside a KNOWN pts hole are answered with the
+                    // hole's start frame — the true VFR hold (the frame simply
+                    // stayed on screen) — without touching the decoder. This
+                    // keeps the post-hole frames cached and the reader parked,
+                    // however long the hole runs. Only recorded holes qualify: a
+                    // bare "some cached frame lies beyond the request" test would
+                    // also match disjoint cache islands left by seeks, and
+                    // serving stale content there would freeze playback.
+                    if !is_backward_seek
+                        && let Some(hole_start) = pts_holes
+                            .range(..=requested_frame)
+                            .next_back()
+                            .filter(|&(_, &end)| requested_frame < end)
+                            .map(|(&start, _)| start)
+                    {
+                        let data = cache
+                            .get_mut(&hole_start)
+                            .map(|c| c.produce(&mut converter))
+                            .or_else(|| gap_hold.clone().filter(|h| h.number == hole_start));
+                        if let Some(data) = data {
+                            gap_hold = Some(data.clone());
+                            *last_sent_frame.borrow_mut() = Some(data.clone());
+                            let _ = reply.send(data.frame);
+                            continue;
+                        }
                     }
 
                     if reply.is_closed() {
@@ -752,25 +950,45 @@ impl FfmpegDecoder {
                         frames = this.frames();
                         *last_sent_frame.borrow_mut() = None;
                         cache.clear();
+                        prev_vended = None;
                     }
 
                     let mut exit = false;
+                    // Newest vended frame below the cache window: after a reset
+                    // the reader re-vends from the keyframe before the request,
+                    // and that frame is the true VFR hold for a request inside
+                    // a pts hole.
+                    let mut hold_candidate: Option<(u32, CachedFrame)> = None;
 
                     for frame in &mut frames {
                         if reply_cell.borrow().as_ref().is_none_or(|r| r.is_closed()) {
                             respond.take();
+                            // The frame just pulled is discarded: it breaks the
+                            // vend continuity that hole discovery relies on.
+                            prev_vended = None;
                             break;
                         }
 
                         let Ok(frame) = frame.map_err(|e| format!("read frame / {e}")) else {
+                            // A skipped frame breaks the vend continuity that
+                            // hole discovery relies on.
+                            prev_vended = None;
                             continue;
                         };
 
                         let Some(pts) = frame.pts() else {
+                            prev_vended = None;
                             continue;
                         };
                         let current_frame = pts_to_frame(pts - start_time, time_base, fps);
 
+                        if let Some(prev) = prev_vended
+                            && current_frame > prev + 1
+                        {
+                            record_pts_hole(&mut pts_holes, prev, current_frame);
+                        }
+                        prev_vended = Some(current_frame);
+
                         let mut cache_frame = CachedFrame::Raw {
                             frame,
                             number: current_frame,
@@ -784,17 +1002,19 @@ impl FfmpegDecoder {
                         let exceeds_cache_bounds = current_frame > cache_max;
                         let too_small_for_cache_bounds = current_frame < cache_min;
 
-                        let cache_frame = if !too_small_for_cache_bounds {
-                            cache_frame.produce(&mut converter);
-
-                            if current_frame == requested_frame
-                                && let Some(respond) = respond.take()
-                            {
-                                let output = cache_frame.produce(&mut converter);
-                                (respond)(output);
-
-                                break;
+                        if too_small_for_cache_bounds {
+                            // Keep the newest pre-request frame as the VFR hold
+                            // candidate instead of discarding it; a frame below
+                            // the cache window can never exceed the request, so
+                            // nothing else in this iteration applies to it.
+                            if current_frame <= requested_frame {
+                                hold_candidate = Some((current_frame, cache_frame));
                             }
+                            continue;
+                        }
+
+                        {
+                            cache_frame.produce(&mut converter);
 
                             if cache.len() >= FRAME_CACHE_SIZE {
                                 if let Some(last_active_frame) = &last_active_frame {
@@ -816,28 +1036,85 @@ impl FfmpegDecoder {
                             }
 
                             cache.insert(current_frame, cache_frame);
-                            cache.get_mut(&current_frame).unwrap()
-                        } else {
-                            &mut cache_frame
-                        };
 
-                        if current_frame > requested_frame && respond.is_some() {
-                            let last_sent_frame_clone = last_sent_frame.borrow().clone();
+                            // Serve exact matches from the cache so
+                            // sequentially played frames stay available as
+                            // at-or-before holds for later gap requests.
+                            if current_frame == requested_frame
+                                && let Some(respond) = respond.take()
+                            {
+                                let output = cache
+                                    .get_mut(&current_frame)
+                                    .unwrap()
+                                    .produce(&mut converter);
+                                (respond)(output);
+                                break;
+                            }
+                        }
 
-                            if let Some((respond, last_frame)) = last_sent_frame_clone
-                                .filter(|l| {
-                                    l.number <= requested_frame
-                                        && requested_frame.saturating_sub(l.number)
-                                            <= MAX_FRAME_FALLBACK_DISTANCE
+                        if current_frame > requested_frame && respond.is_some() {
+                            // A frame at-or-before the request is the true content for
+                            // that time in a VFR recording: a pts gap means the frame
+                            // stayed on screen, however long the gap is. Prefer the
+                            // newest such frame among the cache, the hold candidate a
+                            // reset re-vended, and the persistent gap hold.
+                            let cached_before =
+                                cache.range(..=requested_frame).next_back().map(|(&n, _)| n);
+                            let candidate_number = hold_candidate
+                                .as_ref()
+                                .map(|(n, _)| *n)
+                                .filter(|&n| n <= requested_frame);
+                            let hold_before = gap_hold.clone().filter(|h| {
+                                pts_holes.get(&h.number).is_some_and(|&end| {
+                                    h.number <= requested_frame && requested_frame < end
                                 })
-                                .and_then(|l| Some((respond.take()?, l)))
+                            });
+                            let last_before = last_sent_frame
+                                .borrow()
+                                .as_ref()
+                                .filter(|l| l.number <= requested_frame)
+                                .cloned();
+                            let best_number = [
+                                cached_before,
+                                candidate_number,
+                                hold_before.as_ref().map(|h| h.number),
+                                last_before.as_ref().map(|l| l.number),
+                            ]
+                            .into_iter()
+                            .flatten()
+                            .max();
+
+                            if let Some(best_number) = best_number
+                                && let Some(respond) = respond.take()
                             {
-                                (respond)(last_frame);
+                                let output = if cached_before == Some(best_number) {
+                                    cache.get_mut(&best_number).unwrap().produce(&mut converter)
+                                } else if candidate_number == Some(best_number) {
+                                    let (_, mut candidate) = hold_candidate.take().unwrap();
+                                    candidate.produce(&mut converter)
+                                } else if hold_before.as_ref().map(|h| h.number)
+                                    == Some(best_number)
+                                {
+                                    hold_before.unwrap()
+                                } else {
+                                    last_before.unwrap()
+                                };
+                                gap_hold = Some(output.clone());
+                                (respond)(output);
                             } else if let Some(respond) = respond.take() {
-                                let output = cache_frame.produce(&mut converter);
-                                *last_sent_frame.borrow_mut() = Some(output.clone());
+                                let output = cache
+                                    .get_mut(&current_frame)
+                                    .unwrap()
+                                    .produce(&mut converter);
                                 (respond)(output);
                             }
+
+                            // The request is answered; stop here so the next
+                            // sample stays in the decoder for the next
+                            // request. Pulling it now would discard it at the
+                            // reply guard and poison the vend continuity that
+                            // hole discovery relies on.
+                            break;
                         }
 
                         exit = exit || exceeds_cache_bounds;
@@ -850,17 +1127,48 @@ impl FfmpegDecoder {
                     last_active_frame = Some(requested_frame);
 
                     if let Some(respond) = respond.take() {
-                        let best_cached = cache
-                            .range(..=requested_frame)
-                            .next_back()
-                            .filter(|(k, _)| {
-                                requested_frame.saturating_sub(**k) <= MAX_FRAME_FALLBACK_DISTANCE
+                        // The newest frame at-or-before the request is always a valid
+                        // VFR hold, regardless of how far back it is — whether it is
+                        // cached, was re-vended below the cache window by a reset, or
+                        // is the persistent gap hold.
+                        let cached_before =
+                            cache.range(..=requested_frame).next_back().map(|(&n, _)| n);
+                        let candidate_number = hold_candidate
+                            .as_ref()
+                            .map(|(n, _)| *n)
+                            .filter(|&n| n <= requested_frame);
+                        let hold_before = gap_hold.clone().filter(|h| {
+                            pts_holes.get(&h.number).is_some_and(|&end| {
+                                h.number <= requested_frame && requested_frame < end
                             })
-                            .map(|(_, v)| v);
-
-                        if let Some(cached) = best_cached {
-                            let output = cached.clone().produce(&mut converter);
-                            *last_sent_frame.borrow_mut() = Some(output.clone());
+                        });
+                        let last_before = last_sent_frame
+                            .borrow()
+                            .as_ref()
+                            .filter(|l| l.number <= requested_frame)
+                            .cloned();
+                        let best_number = [
+                            cached_before,
+                            candidate_number,
+                            hold_before.as_ref().map(|h| h.number),
+                            last_before.as_ref().map(|l| l.number),
+                        ]
+                        .into_iter()
+                        .flatten()
+                        .max();
+
+                        if let Some(best_number) = best_number {
+                            let output = if cached_before == Some(best_number) {
+                                cache.get_mut(&best_number).unwrap().produce(&mut converter)
+                            } else if candidate_number == Some(best_number) {
+                                let (_, mut candidate) = hold_candidate.take().unwrap();
+                                candidate.produce(&mut converter)
+                            } else if hold_before.as_ref().map(|h| h.number) == Some(best_number) {
+                                hold_before.unwrap()
+                            } else {
+                                last_before.unwrap()
+                            };
+                            gap_hold = Some(output.clone());
                             (respond)(output);
                         } else {
                             let last_frame_clone = last_sent_frame.borrow().clone();
diff --git a/crates/rendering/src/decoder/mod.rs b/crates/rendering/src/decoder/mod.rs
index 44456cea07..8cf323adc8 100644
--- a/crates/rendering/src/decoder/mod.rs
+++ b/crates/rendering/src/decoder/mod.rs
@@ -449,6 +449,27 @@ pub fn pts_to_frame(pts: i64, time_base: Rational, fps: u32) -> u32 {
 pub const FRAME_CACHE_SIZE: usize = 90;
 const DEFAULT_MAX_FALLBACK_DISTANCE: u32 = 90;
 
+/// Records a pts hole discovered from a decode-order vend jump (frames vend
+/// in pts order, so a jump means no samples exist in between). The map stays
+/// bounded by dropping the narrowest hole — wide static-screen holds matter
+/// most.
+pub(super) fn record_pts_hole(
+    holes: &mut std::collections::BTreeMap<u32, u32>,
+    start: u32,
+    end: u32,
+) {
+    const MAX_TRACKED_HOLES: usize = 64;
+    holes.insert(start, end);
+    if holes.len() > MAX_TRACKED_HOLES
+        && let Some(narrowest) = holes
+            .iter()
+            .min_by_key(|&(&s, &e)| e.saturating_sub(s))
+            .map(|(&s, _)| s)
+    {
+        holes.remove(&narrowest);
+    }
+}
+
 #[derive(Clone)]
 pub struct AsyncVideoDecoderHandle {
     sender: mpsc::Sender<VideoDecoderMessage>,
@@ -505,6 +526,10 @@ impl AsyncVideoDecoderHandle {
             ))
             .is_err()
         {
+            tracing::warn!(
+                time = adjusted_time,
+                "decoder thread is gone; frame request dropped"
+            );
             return None;
         }
 
diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs
index e46b0f68ef..f5b3edb9c5 100644
--- a/crates/rendering/src/lib.rs
+++ b/crates/rendering/src/lib.rs
@@ -393,6 +393,10 @@ impl RecordingSegmentDecoders {
 
             let camera_frame = camera.flatten();
 
+            if screen.is_none() {
+                tracing::warn!(segment_time, "screen decoder returned no frame");
+            }
+
             Some(DecodedSegmentFrames {
                 screen_frame: Some(screen?),
                 camera_frame,