diff --git a/.github/workflows/sync-tests.yml b/.github/workflows/sync-tests.yml new file mode 100644 index 0000000000..2197eb5751 --- /dev/null +++ b/.github/workflows/sync-tests.yml @@ -0,0 +1,161 @@ +name: A/V Sync Tests + +# Verifies audio/video sync correctness on every platform: +# 1. Unit + property tests for the timestamp pipeline (encoders, drift +# trackers, muxers). +# 2. The synthetic device matrix: fake cameras/screens/microphones across +# frame rates, sample rates, channel counts and delivery pathologies +# (jitter, drops, static-screen gaps), driven through the real recording +# pipeline and verified at the container level. No capture hardware or +# GPU required, so results are deterministic on hosted runners. +# +# Findings are published as a job-summary table and a JSON artifact per OS. + +on: + workflow_dispatch: + schedule: + - cron: "0 5 * * *" + pull_request: + paths: + - "crates/recording/**" + - "crates/enc-ffmpeg/**" + - "crates/enc-avfoundation/**" + - "crates/enc-mediafoundation/**" + - "crates/timestamp/**" + - "crates/rendering/**" + - "crates/media-info/**" + - ".github/workflows/sync-tests.yml" + +concurrency: + group: sync-tests-${{ github.head_ref || github.ref_name }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + sync-tests: + strategy: + fail-fast: false + matrix: + runner: + - macos-latest + - windows-2022 + - ubuntu-24.04 + runs-on: ${{ matrix.runner }} + permissions: + contents: read + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Rust setup + uses: dtolnay/rust-toolchain@1.88.0 + + - name: Rust cache + uses: ./.github/actions/setup-rust-cache + with: + target: ${{ runner.os == 'Windows' && 'x86_64-pc-windows-msvc' || runner.os == 'macOS' && 'aarch64-apple-darwin' || 'x86_64-unknown-linux-gnu' }} + + - name: Install desktop dependencies + uses: ./.github/actions/install-desktop-deps + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 24 + + - name: Native dependencies + env: + RUST_TARGET_TRIPLE: ${{ runner.os == 'Linux' && 'x86_64-unknown-linux-gnu' || runner.os == 'Windows' && 'x86_64-pc-windows-msvc' || 'aarch64-apple-darwin' }} + run: node scripts/setup.js + + - name: Add FFmpeg DLLs to PATH + if: runner.os == 'Windows' + shell: pwsh + run: Add-Content -Path $env:GITHUB_PATH -Value "${{ github.workspace }}\\target\\ffmpeg\\bin" + + - name: Install software Vulkan driver (Linux) + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y mesa-vulkan-drivers libvulkan1 + + - name: Timestamp pipeline unit + property tests + shell: bash + run: | + cargo test --locked -p cap-timestamp -p cap-enc-ffmpeg + cargo test --locked -p cap-recording --lib + cargo test --locked -p cap-rendering + + - name: Synthetic device matrix + id: matrix + continue-on-error: true + shell: bash + env: + CAP_SYNC_MATRIX_REPORT: ${{ github.workspace }}/sync-matrix-${{ matrix.runner }}.json + CAP_SYNC_MATRIX_RANDOM_CASES: ${{ github.event_name == 'schedule' && '40' || '6' }} + run: | + cargo test --locked -p cap-recording --test sync_matrix -- --nocapture + + # Verifies the editor's playback machinery (decoders, frame scheduling, + # audio pipeline) preserves sync. The fixture recording is generated + # through the real recording pipeline, so no capture hardware is needed; + # rendering uses the platform's software adapter where no GPU exists. + # 30s of pattern stabilizes the drift slope against frame-quantization + # noise. Playback runs at the default 30 fps: lower rates trip the audio + # sync policy's drift-correction threshold every few frames and accrue + # real (policy-induced) drift that fails the gate. + # + # Linux-only: the Windows WARP adapter composites blank frames and the + # macOS runners' paravirtualized Metal collapses to ~2 fps presentation + # regardless of decoder, so neither can sustain a wall-clock playback + # measurement. The decoder logic under test (FFmpeg gap holds) is fully + # exercised here; the macOS AVAssetReader path is covered by running + # `cap selftest playback` locally on real hardware. + - name: Editor playback sync harness + if: runner.os == 'Linux' + shell: bash + run: | + cargo run --locked -p cap -- --log-level info selftest playback --duration 30 --json + + - name: Report findings + if: always() + shell: bash + run: | + REPORT="${{ github.workspace }}/sync-matrix-${{ matrix.runner }}.json" + { + echo "## A/V sync matrix — ${{ matrix.runner }}" + echo "" + if [ -f "$REPORT" ]; then + PYTHONIOENCODING=utf-8 python3 - "$REPORT" << 'PYEOF' + import json, sys + report = json.load(open(sys.argv[1])) + print(f"Randomized seed: `{report.get('seed')}` (rerun with CAP_SYNC_MATRIX_SEED)") + print() + print("| Case | Result | Detail |") + print("| --- | --- | --- |") + for case in report.get("cases", []): + verdict = "PASS" if case["pass"] else "FAIL" + detail = case["detail"].replace("|", "\\|") + print(f"| {case['name']} | {verdict} | {detail} |") + PYEOF + else + echo "No report produced — the matrix crashed before writing results." + fi + } >> "$GITHUB_STEP_SUMMARY" + + - name: Upload findings + if: always() + uses: actions/upload-artifact@v4 + with: + name: sync-matrix-${{ matrix.runner }} + path: ${{ github.workspace }}/sync-matrix-${{ matrix.runner }}.json + if-no-files-found: ignore + + - name: Fail on matrix failures + if: steps.matrix.outcome == 'failure' + shell: bash + run: | + echo "Synthetic sync matrix reported failures; see the job summary." >&2 + exit 1 diff --git a/Cargo.lock b/Cargo.lock index 9d7cbc65d1..33d20ab582 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,22 @@ version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" +[[package]] +name = "ab_glyph" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01c0457472c38ea5bd1c3b5ada5e368271cb550be7a4ca4a0b4634e9913f6cc2" +dependencies = [ + "ab_glyph_rasterizer", + "owned_ttf_parser", +] + +[[package]] +name = "ab_glyph_rasterizer" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "366ffbaa4442f4684d91e2cd7c5ea7c4ed8add41959a31447066e279e432b618" + [[package]] name = "actix-codec" version = "0.5.2" @@ -250,6 +266,31 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "android-activity" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f2a1bb052857d5dd49572219344a7332b31b76405648eabac5bc68978251bcd" +dependencies = [ + "android-properties", + "bitflags 2.9.4", + "cc", + "jni 0.22.4", + "libc", + "log", + "ndk 0.9.0", + "ndk-context", + "ndk-sys 0.6.0+11769913", + "num_enum", + "thiserror 2.0.16", +] + +[[package]] +name = "android-properties" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7eb209b1518d6bb87b283c20095f5228ecda460da70b44f0802523dea6da04" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -354,7 +395,7 @@ dependencies = [ "image 0.25.8", "log", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-core-foundation", "objc2-core-graphics", "objc2-foundation 0.3.1", @@ -388,6 +429,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "as-raw-xcb-connection" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175571dd1d178ced59193a6fc02dde1b972eb0bc56c892cde9beeceac5bf0f6b" + [[package]] name = "ash" version = "0.38.0+1.3.281" @@ -1076,6 +1123,32 @@ dependencies = [ "system-deps 6.2.2", ] +[[package]] +name = "calloop" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b99da2f8558ca23c71f4fd15dc57c906239752dd27ff3c00a1d56b685b7cbfec" +dependencies = [ + "bitflags 2.9.4", + "log", + "polling", + "rustix 0.38.44", + "slab", + "thiserror 1.0.69", +] + +[[package]] +name = "calloop-wayland-source" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a66a987056935f7efce4ab5668920b5d0dac4a7c99991a67395f13702ddd20" +dependencies = [ + "calloop", + "rustix 0.38.44", + "wayland-backend", + "wayland-client", +] + [[package]] name = "camino" version = "1.2.0" @@ -1092,13 +1165,17 @@ dependencies = [ "cap-automation", "cap-camera", "cap-cli-install", + "cap-editor", "cap-export", + "cap-media-info", "cap-project", "cap-recording", + "cap-timestamp", "chrono", "cidre", "clap", "clap_complete", + "cpal 0.15.3 (git+https://github.com/CapSoftware/cpal?rev=3cc779a7b4ca)", "dirs 6.0.0", "ffmpeg-next", "flume", @@ -1106,11 +1183,13 @@ dependencies = [ "image 0.25.8", "kameo", "libc", + "relative-path", "reqwest 0.12.24", "scap-screencapturekit", "scap-targets", "serde", "serde_json", + "softbuffer", "tempfile", "tokio", "tokio-util", @@ -1118,6 +1197,7 @@ dependencies = [ "tracing-subscriber", "uuid", "windows 0.60.0", + "winit", "workspace-hack", ] @@ -1294,7 +1374,7 @@ version = "0.0.0" dependencies = [ "hex", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "serde", "sha2", "specta", @@ -1366,7 +1446,7 @@ dependencies = [ "nix 0.29.0", "objc", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-foundation 0.3.1", "opentelemetry", "opentelemetry-otlp", @@ -1739,7 +1819,7 @@ dependencies = [ "libproc", "objc", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "parking_lot", "pipewire", "relative-path", @@ -2145,7 +2225,7 @@ dependencies = [ "clipboard-win", "image 0.25.8", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-foundation 0.3.1", "windows 0.59.0", "x11rb", @@ -2374,6 +2454,19 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core-graphics" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081" +dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.9.4", + "core-graphics-types 0.1.3", + "foreign-types 0.5.0", + "libc", +] + [[package]] name = "core-graphics" version = "0.24.0" @@ -2520,7 +2613,7 @@ dependencies = [ "core-foundation-sys", "coreaudio-rs", "dasp_sample", - "jni", + "jni 0.21.1", "js-sys", "libc", "mach2", @@ -2542,7 +2635,7 @@ dependencies = [ "core-foundation-sys", "coreaudio-rs", "dasp_sample", - "jni", + "jni 0.21.1", "js-sys", "libc", "mach2", @@ -2709,6 +2802,21 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "ctor" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83cf0d42651b16c6dfe68685716d18480d18a9c39c62d76e8cf3eb6ed5d8bcbf" +dependencies = [ + "dtor", +] + +[[package]] +name = "cursor-icon" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27ae1dd37df86211c42e150270f82743308803d90a6f6e6651cd730d5e1732f" + [[package]] name = "darling" version = "0.20.11" @@ -3101,6 +3209,45 @@ dependencies = [ "serde", ] +[[package]] +name = "drm" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98888c4bbd601524c11a7ed63f814b8825f420514f78e96f752c437ae9cbb5d1" +dependencies = [ + "bitflags 2.9.4", + "bytemuck", + "drm-ffi", + "drm-fourcc", + "rustix 0.38.44", +] + +[[package]] +name = "drm-ffi" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97c98727e48b7ccb4f4aea8cfe881e5b07f702d17b7875991881b41af7278d53" +dependencies = [ + "drm-sys", + "rustix 0.38.44", +] + +[[package]] +name = "drm-fourcc" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aafbcdb8afc29c1a7ee5fbe53b5d62f4565b35a042a662ca9fecd0b54dae6f4" + +[[package]] +name = "drm-sys" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd39dde40b6e196c2e8763f23d119ddb1a8714534bf7d77fa97a65b0feda3986" +dependencies = [ + "libc", + "linux-raw-sys 0.6.5", +] + [[package]] name = "dtoa" version = "1.0.10" @@ -3116,6 +3263,12 @@ dependencies = [ "dtoa", ] +[[package]] +name = "dtor" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edf234dd1594d6dd434a8fb8cada51ddbbc593e40e4a01556a0b31c62da2775b" + [[package]] name = "dunce" version = "1.0.5" @@ -4042,7 +4195,7 @@ dependencies = [ "crossbeam-channel", "keyboard-types", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "once_cell", "serde", "thiserror 2.0.16", @@ -5003,19 +5156,68 @@ dependencies = [ "cesu8", "cfg-if", "combine", - "jni-sys", + "jni-sys 0.3.0", "log", "thiserror 1.0.69", "walkdir", "windows-sys 0.45.0", ] +[[package]] +name = "jni" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efd9a482cf3a427f00d6b35f14332adc7902ce91efb778580e180ff90fa3498" +dependencies = [ + "cfg-if", + "combine", + "jni-macros", + "jni-sys 0.4.1", + "log", + "simd_cesu8", + "thiserror 2.0.16", + "walkdir", + "windows-link 0.2.0", +] + +[[package]] +name = "jni-macros" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "simd_cesu8", + "syn 2.0.106", +] + [[package]] name = "jni-sys" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn 2.0.106", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -5301,7 +5503,7 @@ checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" dependencies = [ "bitflags 2.9.4", "libc", - "redox_syscall", + "redox_syscall 0.5.17", ] [[package]] @@ -5346,6 +5548,12 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +[[package]] +name = "linux-raw-sys" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a385b1be4e5c3e362ad2ffa73c392e53f031eaa5b7d648e64cd87f27f6063d7" + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -5830,7 +6038,7 @@ dependencies = [ "gtk", "keyboard-types", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-core-foundation", "objc2-foundation 0.3.1", "once_cell", @@ -5934,7 +6142,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2076a31b7010b17a38c01907c45b945e8f11495ee4dd588309718901b1f7a5b7" dependencies = [ "bitflags 2.9.4", - "jni-sys", + "jni-sys 0.3.0", "log", "ndk-sys 0.5.0+25.2.9519653", "num_enum", @@ -5948,7 +6156,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3f42e7bbe13d351b6bead8286a43aac9534b82bd3cc43e47037f012ebfd62d4" dependencies = [ "bitflags 2.9.4", - "jni-sys", + "jni-sys 0.3.0", "log", "ndk-sys 0.6.0+11769913", "num_enum", @@ -5968,7 +6176,7 @@ version = "0.5.0+25.2.9519653" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c196769dd60fd4f363e11d948139556a344e79d451aeb2fa2fd040738ef7691" dependencies = [ - "jni-sys", + "jni-sys 0.3.0", ] [[package]] @@ -5977,7 +6185,7 @@ version = "0.6.0+11769913" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee6cda3051665f1fb8d9e08fc35c96d5a244fb1be711a03b71118828afc9a873" dependencies = [ - "jni-sys", + "jni-sys 0.3.0", ] [[package]] @@ -6247,6 +6455,22 @@ dependencies = [ "objc2-exception-helper", ] +[[package]] +name = "objc2-app-kit" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4e89ad9e3d7d297152b17d39ed92cd50ca8063a89a9fa569046d41568891eff" +dependencies = [ + "bitflags 2.9.4", + "block2 0.5.1", + "libc", + "objc2 0.5.2", + "objc2-core-data 0.2.2", + "objc2-core-image 0.2.2", + "objc2-foundation 0.2.2", + "objc2-quartz-core 0.2.2", +] + [[package]] name = "objc2-app-kit" version = "0.3.1" @@ -6257,11 +6481,11 @@ dependencies = [ "block2 0.6.1", "libc", "objc2 0.6.2", - "objc2-cloud-kit", - "objc2-core-data", + "objc2-cloud-kit 0.3.1", + "objc2-core-data 0.3.1", "objc2-core-foundation", "objc2-core-graphics", - "objc2-core-image", + "objc2-core-image 0.3.1", "objc2-foundation 0.3.1", "objc2-quartz-core 0.3.1", ] @@ -6279,7 +6503,7 @@ dependencies = [ "objc2-avf-audio", "objc2-core-foundation", "objc2-core-graphics", - "objc2-core-image", + "objc2-core-image 0.3.1", "objc2-core-media", "objc2-core-video", "objc2-foundation 0.3.1", @@ -6296,6 +6520,19 @@ dependencies = [ "objc2-foundation 0.3.1", ] +[[package]] +name = "objc2-cloud-kit" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74dd3b56391c7a0596a295029734d3c1c5e7e510a4cb30245f8221ccea96b009" +dependencies = [ + "bitflags 2.9.4", + "block2 0.5.1", + "objc2 0.5.2", + "objc2-core-location", + "objc2-foundation 0.2.2", +] + [[package]] name = "objc2-cloud-kit" version = "0.3.1" @@ -6307,6 +6544,17 @@ dependencies = [ "objc2-foundation 0.3.1", ] +[[package]] +name = "objc2-contacts" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ff520e9c33812fd374d8deecef01d4a840e7b41862d849513de77e44aa4889" +dependencies = [ + "block2 0.5.1", + "objc2 0.5.2", + "objc2-foundation 0.2.2", +] + [[package]] name = "objc2-core-audio" version = "0.3.1" @@ -6329,6 +6577,18 @@ dependencies = [ "objc2 0.6.2", ] +[[package]] +name = "objc2-core-data" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "617fbf49e071c178c0b24c080767db52958f716d9eabdf0890523aeae54773ef" +dependencies = [ + "bitflags 2.9.4", + "block2 0.5.1", + "objc2 0.5.2", + "objc2-foundation 0.2.2", +] + [[package]] name = "objc2-core-data" version = "0.3.1" @@ -6364,6 +6624,18 @@ dependencies = [ "objc2-io-surface", ] +[[package]] +name = "objc2-core-image" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55260963a527c99f1819c4f8e3b47fe04f9650694ef348ffd2227e8196d34c80" +dependencies = [ + "block2 0.5.1", + "objc2 0.5.2", + "objc2-foundation 0.2.2", + "objc2-metal", +] + [[package]] name = "objc2-core-image" version = "0.3.1" @@ -6374,6 +6646,18 @@ dependencies = [ "objc2-foundation 0.3.1", ] +[[package]] +name = "objc2-core-location" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "000cfee34e683244f284252ee206a27953279d370e309649dc3ee317b37e5781" +dependencies = [ + "block2 0.5.1", + "objc2 0.5.2", + "objc2-contacts", + "objc2-foundation 0.2.2", +] + [[package]] name = "objc2-core-media" version = "0.3.1" @@ -6425,6 +6709,7 @@ checksum = "0ee638a5da3799329310ad4cfa62fbf045d5f56e3ef5ba4149e7452dcf89d5a8" dependencies = [ "bitflags 2.9.4", "block2 0.5.1", + "dispatch", "libc", "objc2 0.5.2", ] @@ -6473,6 +6758,18 @@ dependencies = [ "objc2-core-foundation", ] +[[package]] +name = "objc2-link-presentation" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a1ae721c5e35be65f01a03b6d2ac13a54cb4fa70d8a5da293d7b0020261398" +dependencies = [ + "block2 0.5.1", + "objc2 0.5.2", + "objc2-app-kit 0.2.2", + "objc2-foundation 0.2.2", +] + [[package]] name = "objc2-metal" version = "0.2.2" @@ -6493,7 +6790,7 @@ checksum = "26bb88504b5a050dbba515d2414607bf5e57dd56b107bc5f0351197a3e7bdc5d" dependencies = [ "bitflags 2.9.4", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-foundation 0.3.1", ] @@ -6532,6 +6829,37 @@ dependencies = [ "objc2-core-foundation", ] +[[package]] +name = "objc2-symbols" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a684efe3dec1b305badae1a28f6555f6ddd3bb2c2267896782858d5a78404dc" +dependencies = [ + "objc2 0.5.2", + "objc2-foundation 0.2.2", +] + +[[package]] +name = "objc2-ui-kit" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8bb46798b20cd6b91cbd113524c490f1686f4c4e8f49502431415f3512e2b6f" +dependencies = [ + "bitflags 2.9.4", + "block2 0.5.1", + "objc2 0.5.2", + "objc2-cloud-kit 0.2.2", + "objc2-core-data 0.2.2", + "objc2-core-image 0.2.2", + "objc2-core-location", + "objc2-foundation 0.2.2", + "objc2-link-presentation", + "objc2-quartz-core 0.2.2", + "objc2-symbols", + "objc2-uniform-type-identifiers", + "objc2-user-notifications", +] + [[package]] name = "objc2-ui-kit" version = "0.3.1" @@ -6544,6 +6872,30 @@ dependencies = [ "objc2-foundation 0.3.1", ] +[[package]] +name = "objc2-uniform-type-identifiers" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44fa5f9748dbfe1ca6c0b79ad20725a11eca7c2218bceb4b005cb1be26273bfe" +dependencies = [ + "block2 0.5.1", + "objc2 0.5.2", + "objc2-foundation 0.2.2", +] + +[[package]] +name = "objc2-user-notifications" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76cfcbf642358e8689af64cee815d139339f3ed8ad05103ed5eaf73db8d84cb3" +dependencies = [ + "bitflags 2.9.4", + "block2 0.5.1", + "objc2 0.5.2", + "objc2-core-location", + "objc2-foundation 0.2.2", +] + [[package]] name = "objc2-web-kit" version = "0.3.1" @@ -6553,7 +6905,7 @@ dependencies = [ "bitflags 2.9.4", "block2 0.6.1", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-core-foundation", "objc2-foundation 0.3.1", "objc2-javascript-core", @@ -6593,7 +6945,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8b61bebd49e5d43f5f8cc7ee2891c16e0f41ec7954d36bcb6c14c5e0de867fb" dependencies = [ - "jni", + "jni 0.21.1", "ndk 0.8.0", "ndk-context", "num-derive", @@ -6795,6 +7147,16 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c04f5d74368e4d0dfe06c45c8627c81bd7c317d52762d118fb9b3076f6420fd" +[[package]] +name = "orbclient" +version = "0.3.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5df339f526ea9a60e371768d50efc2f2508c7203290731565d1f7a6f71d21747" +dependencies = [ + "libc", + "libredox", +] + [[package]] name = "ordered-channel" version = "1.2.0" @@ -6894,6 +7256,15 @@ dependencies = [ "thiserror 2.0.16", ] +[[package]] +name = "owned_ttf_parser" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36820e9051aca1014ddc75770aab4d68bc1e9e632f0f5627c4086bc216fb583b" +dependencies = [ + "ttf-parser 0.25.1", +] + [[package]] name = "pango" version = "0.18.3" @@ -6959,7 +7330,7 @@ checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.17", "smallvec", "windows-targets 0.52.6", ] @@ -7913,6 +8284,15 @@ dependencies = [ "rustfft", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_syscall" version = "0.5.17" @@ -8162,7 +8542,7 @@ dependencies = [ "js-sys", "log", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-core-foundation", "objc2-foundation 0.3.1", "raw-window-handle", @@ -8492,7 +8872,7 @@ dependencies = [ "futures", "inquire", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-foundation 0.3.1", "scap-targets", "tokio", @@ -8637,6 +9017,19 @@ dependencies = [ "untrusted", ] +[[package]] +name = "sctk-adwaita" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6277f0217056f77f1d8f49f2950ac6c278c0d607c45f5ee99328d792ede24ec" +dependencies = [ + "ab_glyph", + "log", + "memmap2", + "smithay-client-toolkit", + "tiny-skia", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -9124,6 +9517,16 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "simd_cesu8" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33" +dependencies = [ + "rustc_version", + "simdutf8", +] + [[package]] name = "simd_helpers" version = "0.1.0" @@ -9133,6 +9536,12 @@ dependencies = [ "quote", ] +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "simplecss" version = "0.2.2" @@ -9224,11 +9633,39 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "smithay-client-toolkit" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3457dea1f0eb631b4034d61d4d8c32074caa6cd1ab2d59f2327bd8461e2c0016" +dependencies = [ + "bitflags 2.9.4", + "calloop", + "calloop-wayland-source", + "cursor-icon", + "libc", + "log", + "memmap2", + "rustix 0.38.44", + "thiserror 1.0.69", + "wayland-backend", + "wayland-client", + "wayland-csd-frame", + "wayland-cursor", + "wayland-protocols", + "wayland-protocols-wlr", + "wayland-scanner", + "xkeysym", +] + [[package]] name = "smol_str" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd538fb6910ac1099850255cf94a94df6551fbdd602454387d0adb2d1ca6dead" +dependencies = [ + "serde", +] [[package]] name = "socket2" @@ -9267,20 +9704,30 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18051cdd562e792cad055119e0cdb2cfc137e44e3987532e0f9659a77931bb08" dependencies = [ + "as-raw-xcb-connection", "bytemuck", "cfg_aliases 0.2.1", "core-graphics 0.24.0", + "drm", + "fastrand", "foreign-types 0.5.0", "js-sys", "log", + "memmap2", "objc2 0.5.2", "objc2-foundation 0.2.2", "objc2-quartz-core 0.2.2", "raw-window-handle", - "redox_syscall", + "redox_syscall 0.5.17", + "rustix 0.38.44", + "tiny-xlib", "wasm-bindgen", + "wayland-backend", + "wayland-client", + "wayland-sys", "web-sys", "windows-sys 0.59.0", + "x11rb", ] [[package]] @@ -9748,7 +10195,7 @@ dependencies = [ "gdkwayland-sys", "gdkx11-sys", "gtk", - "jni", + "jni 0.21.1", "lazy_static", "libc", "log", @@ -9756,7 +10203,7 @@ dependencies = [ "ndk-context", "ndk-sys 0.6.0+11769913", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-foundation 0.3.1", "once_cell", "parking_lot", @@ -9824,15 +10271,15 @@ dependencies = [ "http 1.3.1", "http-range", "image 0.25.8", - "jni", + "jni 0.21.1", "libc", "log", "mime", "muda", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-foundation 0.3.1", - "objc2-ui-kit", + "objc2-ui-kit 0.3.1", "objc2-web-kit", "percent-encoding", "plist", @@ -10112,7 +10559,7 @@ checksum = "786156aa8e89e03d271fbd3fe642207da8e65f3c961baa9e2930f332bf80a1f5" dependencies = [ "dunce", "glob", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-foundation 0.3.1", "open", "schemars 0.8.22", @@ -10295,9 +10742,9 @@ dependencies = [ "dpi", "gtk", "http 1.3.1", - "jni", + "jni 0.21.1", "objc2 0.6.2", - "objc2-ui-kit", + "objc2-ui-kit 0.3.1", "objc2-web-kit", "raw-window-handle", "serde", @@ -10318,10 +10765,10 @@ checksum = "c1fe9d48bd122ff002064e88cfcd7027090d789c4302714e68fcccba0f4b7807" dependencies = [ "gtk", "http 1.3.1", - "jni", + "jni 0.21.1", "log", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-foundation 0.3.1", "once_cell", "percent-encoding", @@ -10374,7 +10821,7 @@ dependencies = [ "anyhow", "brotli", "cargo_metadata", - "ctor", + "ctor 0.2.9", "dunce", "glob", "html5ever", @@ -10603,6 +11050,19 @@ dependencies = [ "strict-num", ] +[[package]] +name = "tiny-xlib" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a90a0ca3ee6a69f2ad28fd11621a4c3f03b371f366be500b64df260c4ffbafb4" +dependencies = [ + "as-raw-xcb-connection", + "ctor 0.10.1", + "libloading 0.8.8", + "pkg-config", + "tracing", +] + [[package]] name = "tinystr" version = "0.8.1" @@ -11084,7 +11544,7 @@ dependencies = [ "libappindicator", "muda", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-core-foundation", "objc2-core-graphics", "objc2-foundation 0.3.1", @@ -11710,6 +12170,28 @@ dependencies = [ "wayland-scanner", ] +[[package]] +name = "wayland-csd-frame" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625c5029dbd43d25e6aa9615e88b829a5cad13b2819c4ae129fdbb7c31ab4c7e" +dependencies = [ + "bitflags 2.9.4", + "cursor-icon", + "wayland-backend", +] + +[[package]] +name = "wayland-cursor" +version = "0.31.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447ccc440a881271b19e9989f75726d60faa09b95b0200a9b7eb5cc47c3eeb29" +dependencies = [ + "rustix 1.1.2", + "wayland-client", + "xcursor", +] + [[package]] name = "wayland-protocols" version = "0.32.9" @@ -11722,6 +12204,19 @@ dependencies = [ "wayland-scanner", ] +[[package]] +name = "wayland-protocols-plasma" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a07a14257c077ab3279987c4f8bb987851bf57081b93710381daea94f2c2c032" +dependencies = [ + "bitflags 2.9.4", + "wayland-backend", + "wayland-client", + "wayland-protocols", + "wayland-scanner", +] + [[package]] name = "wayland-protocols-wlr" version = "0.3.9" @@ -11754,6 +12249,7 @@ checksum = "34949b42822155826b41db8e5d0c1be3a2bd296c747577a43a3e6daefc296142" dependencies = [ "dlib", "log", + "once_cell", "pkg-config", ] @@ -12112,7 +12608,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9bec5a31f3f9362f2258fd0e9c9dd61a9ca432e7306cc78c444258f0dce9a9c" dependencies = [ "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-core-foundation", "objc2-foundation 0.3.1", "raw-window-handle", @@ -12763,6 +13259,58 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +[[package]] +name = "winit" +version = "0.30.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6755fa58a9f8350bd1e472d4c3fcc25f824ec358933bba33306d0b63df5978d" +dependencies = [ + "ahash", + "android-activity", + "atomic-waker", + "bitflags 2.9.4", + "block2 0.5.1", + "bytemuck", + "calloop", + "cfg_aliases 0.2.1", + "concurrent-queue", + "core-foundation 0.9.4", + "core-graphics 0.23.2", + "cursor-icon", + "dpi", + "js-sys", + "libc", + "memmap2", + "ndk 0.9.0", + "objc2 0.5.2", + "objc2-app-kit 0.2.2", + "objc2-foundation 0.2.2", + "objc2-ui-kit 0.2.2", + "orbclient", + "percent-encoding", + "pin-project", + "raw-window-handle", + "redox_syscall 0.4.1", + "rustix 0.38.44", + "sctk-adwaita", + "smithay-client-toolkit", + "smol_str", + "tracing", + "unicode-segmentation", + "wasm-bindgen", + "wasm-bindgen-futures", + "wayland-backend", + "wayland-client", + "wayland-protocols", + "wayland-protocols-plasma", + "web-sys", + "web-time", + "windows-sys 0.52.0", + "x11-dl", + "x11rb", + "xkbcommon-dl", +] + [[package]] name = "winnow" version = "0.5.40" @@ -12925,15 +13473,15 @@ dependencies = [ "html5ever", "http 1.3.1", "javascriptcore-rs", - "jni", + "jni 0.21.1", "kuchikiki", "libc", "ndk 0.9.0", "objc2 0.6.2", - "objc2-app-kit", + "objc2-app-kit 0.3.1", "objc2-core-foundation", "objc2-foundation 0.3.1", - "objc2-ui-kit", + "objc2-ui-kit 0.3.1", "objc2-web-kit", "once_cell", "percent-encoding", @@ -12979,7 +13527,11 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9993aa5be5a26815fe2c3eacfc1fde061fc1a1f094bf1ad2a18bf9c495dd7414" dependencies = [ + "as-raw-xcb-connection", "gethostname", + "libc", + "libloading 0.8.8", + "once_cell", "rustix 1.1.2", "x11rb-protocol", ] @@ -13000,6 +13552,25 @@ dependencies = [ "rustix 1.1.2", ] +[[package]] +name = "xcursor" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec9e4a500ca8864c5b47b8b482a73d62e4237670e5b5f1d6b9e3cae50f28f2b" + +[[package]] +name = "xkbcommon-dl" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039de8032a9a8856a6be89cea3e5d12fdd82306ab7c94d74e6deab2460651c5" +dependencies = [ + "bitflags 2.9.4", + "dlib", + "log", + "once_cell", + "xkeysym", +] + [[package]] name = "xkeysym" version = "0.2.1" diff --git a/apps/cli/Cargo.toml b/apps/cli/Cargo.toml index 9d7f7d024b..082df3cb90 100644 --- a/apps/cli/Cargo.toml +++ b/apps/cli/Cargo.toml @@ -8,7 +8,11 @@ clap = { version = "4.5.23", features = ["derive"] } clap_complete = "4.5.38" cap-project = { path = "../../crates/project" } cap-recording = { path = "../../crates/recording" } +cap-editor = { path = "../../crates/editor" } cap-export = { path = "../../crates/export" } +cap-media-info = { path = "../../crates/media-info" } +cap-timestamp = { path = "../../crates/timestamp" } +relative-path = "1.9.3" cap-automation = { path = "../../crates/automation" } cap-camera = { path = "../../crates/camera" } cap-cli-install = { path = "../../crates/cli-install" } @@ -28,6 +32,9 @@ image = "0.25.2" chrono = "0.4.31" tracing.workspace = true tracing-subscriber = "0.3.19" +cpal = { workspace = true } +winit = "0.30" +softbuffer = "0.4" workspace-hack = { version = "0.1", path = "../../crates/workspace-hack" } [target.'cfg(unix)'.dependencies] diff --git a/apps/cli/src/main.rs b/apps/cli/src/main.rs index 0782da5b36..af1f9e4f24 100644 --- a/apps/cli/src/main.rs +++ b/apps/cli/src/main.rs @@ -7,6 +7,7 @@ mod project; mod record; mod recordings; mod screenshot; +mod selftest; mod session; mod targets; mod update; @@ -196,6 +197,8 @@ enum Commands { Targets(TargetsArgs), /// Report CLI environment and capture-readiness diagnostics Doctor(FormatArgs), + /// Run end-to-end diagnostics that verify Cap works on this machine + Selftest(selftest::SelftestArgs), /// Print CLI version and execution context Version(FormatArgs), /// Inspect or manage the desktop-installed `cap` shim @@ -210,7 +213,12 @@ enum Commands { impl Commands { fn exit_after_success(&self) -> bool { - matches!(self, Self::Export(_) | Self::ExportPreview(_)) + // Selftest runs an export, so it shares export's teardown-crash + // avoidance on Windows. + matches!( + self, + Self::Export(_) | Self::ExportPreview(_) | Self::Selftest(_) + ) } } @@ -425,6 +433,12 @@ fn main() { let exit_after_success = cli.exit_after_success(); + // The self-test opens a window, which AppKit requires to live on the real + // process main thread — so the main thread stays here to serve pattern + // requests while the command itself runs on the runtime thread. + let pattern_rx = matches!(cli.command, Some(Commands::Selftest(_))) + .then(selftest::pattern::install_main_thread_runner); + // Windows export exercises deep WGPU/MediaFoundation/FFmpeg stacks. Running the CLI runtime // on an explicitly large stack is what stopped the export worker from overflowing before // the first frame; keep the sidecar and desktop runtimes in sync. @@ -432,6 +446,17 @@ fn main() { .name("cap-cli-runtime".to_string()) .stack_size(TOKIO_WORKER_THREAD_STACK_SIZE) .spawn(move || -> Result<(), String> { + // serve_main_thread blocks the main thread until this shutdown + // runs; a drop guard keeps that true on every exit path, including + // the runtime failing to build and panics unwinding out of run(). + struct PatternShutdown; + impl Drop for PatternShutdown { + fn drop(&mut self) { + selftest::pattern::shutdown_main_thread_runner(); + } + } + let _pattern_shutdown = PatternShutdown; + let runtime = tokio::runtime::Builder::new_multi_thread() .enable_all() .thread_stack_size(TOKIO_WORKER_THREAD_STACK_SIZE) @@ -451,17 +476,25 @@ fn main() { result }); - // Surface failures as a clean, unquoted `error: ...` line on stderr (the default - // `Result`-returning main prints `Error: "debug-quoted"`, which is noisy for humans and brittle - // for agents scraping stderr). clap already exits 2 for usage/parse errors before we get here. - let outcome = match runtime_thread { - Ok(handle) => handle.join(), + // A failed spawn means nothing will ever call shutdown_main_thread_runner, so the + // pattern server below would block forever — bail out before serving. + let runtime_thread = match runtime_thread { + Ok(handle) => handle, Err(e) => { eprintln!("error: Failed to spawn CLI runtime thread: {e}"); std::process::exit(1); } }; + if let Some(rx) = pattern_rx { + selftest::pattern::serve_main_thread(rx); + } + + // Surface failures as a clean, unquoted `error: ...` line on stderr (the default + // `Result`-returning main prints `Error: "debug-quoted"`, which is noisy for humans and brittle + // for agents scraping stderr). clap already exits 2 for usage/parse errors before we get here. + let outcome = runtime_thread.join(); + match outcome { Ok(Ok(())) => {} Ok(Err(message)) => { @@ -484,6 +517,7 @@ async fn run(cli: Cli) -> Result<(), String> { match command { Commands::Export(e) => e.run(json).await, Commands::ExportPreview(e) => e.run().await, + Commands::Selftest(args) => args.run(json).await, Commands::Project(args) => args.run(json), Commands::Record(RecordArgs { command, args }) => match command { Some(RecordCommands::Start(args)) => args.run(json).await, diff --git a/apps/cli/src/selftest/measure.rs b/apps/cli/src/selftest/measure.rs new file mode 100644 index 0000000000..defc65cc25 --- /dev/null +++ b/apps/cli/src/selftest/measure.rs @@ -0,0 +1,528 @@ +//! Content-level A/V sync measurement for the self-test. +//! +//! Extracts flash onsets from a video track (mean luma over the frame +//! center with adaptive hysteresis) and beep onsets from an audio track +//! (RMS envelope with an adaptive threshold), pairs them, and computes +//! robust offset and drift statistics. Positive offset means audio is late. + +use std::path::Path; + +use ffmpeg::{codec, format, frame, media}; +use serde::Serialize; + +/// Fraction of the frame (centered) used for luma measurement, avoiding +/// menu bars, notches and window chrome at the edges. +const CENTER_CROP: f64 = 0.5; +/// Minimum spacing between onsets, guarding against double-triggers within +/// a single tone burst. +const MIN_ONSET_GAP_SECS: f64 = 0.5; + +pub fn video_flash_onsets(path: &Path) -> Result, String> { + let mut ictx = + format::input(&path).map_err(|e| format!("open video {}: {e}", path.display()))?; + let stream = ictx + .streams() + .best(media::Type::Video) + .ok_or("no video stream")?; + let stream_index = stream.index(); + let time_base = stream.time_base(); + let tb = f64::from(time_base.numerator()) / f64::from(time_base.denominator()); + + let ctx = codec::context::Context::from_parameters(stream.parameters()) + .map_err(|e| format!("video codec params: {e}"))?; + let mut decoder = ctx + .decoder() + .video() + .map_err(|e| format!("video decoder: {e}"))?; + + let mut samples: Vec<(f64, f64)> = Vec::new(); + let mut take_frame = |decoded: &frame::Video| { + let Some(pts) = decoded.pts() else { return }; + let t = pts as f64 * tb; + if let Some(luma) = mean_center_luma(decoded) { + samples.push((t, luma)); + } + }; + + let mut decoded = frame::Video::empty(); + for (s, packet) in ictx.packets() { + if s.index() != stream_index { + continue; + } + if decoder.send_packet(&packet).is_ok() { + while decoder.receive_frame(&mut decoded).is_ok() { + take_frame(&decoded); + } + } + } + let _ = decoder.send_eof(); + while decoder.receive_frame(&mut decoded).is_ok() { + take_frame(&decoded); + } + + flash_onsets_from_luma(&samples) +} + +/// Flash onsets from a time-ordered `(seconds, mean luma)` series, shared by +/// the file analyzers and the playback harness (which samples luma at the +/// renderer's presentation boundary). +pub fn flash_onsets_from_luma(samples: &[(f64, f64)]) -> Result, String> { + if samples.len() < 10 { + return Err(format!( + "only {} video frames decoded; recording too short to analyze", + samples.len() + )); + } + + // Adaptive hysteresis from the observed luma range so exact black/white + // levels (color range, HDR tone mapping) don't matter. The high anchor is + // the peak, not a percentile: flashes are a small duty cycle of frames. + let mut lumas: Vec = samples.iter().map(|(_, l)| *l).collect(); + lumas.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + let lo = percentile_sorted(&lumas, 0.10); + let hi = percentile_sorted(&lumas, 0.998); + if hi - lo < 40.0 { + return Err(format!( + "test pattern not visible in the recording (luma range {lo:.0}..{hi:.0}); \ + the test window may have been covered or moved" + )); + } + let on = lo + 0.7 * (hi - lo); + let off = lo + 0.3 * (hi - lo); + + let mut onsets = Vec::new(); + let mut armed = true; + for (t, luma) in samples { + if *luma >= on && armed { + onsets.push(*t); + armed = false; + } else if *luma <= off { + armed = true; + } + } + Ok(onsets) +} + +fn mean_center_luma(frame: &frame::Video) -> Option { + // Plane 0 is luma for all YUV formats the decoders produce. Reject + // non-planar/packed formats rather than misreading them. + use ffmpeg::format::Pixel; + if !matches!( + frame.format(), + Pixel::YUV420P | Pixel::NV12 | Pixel::YUV422P | Pixel::YUV444P | Pixel::YUVJ420P + ) { + return None; + } + let width = frame.width() as usize; + let height = frame.height() as usize; + let stride = frame.stride(0); + let data = frame.data(0); + + let x0 = (width as f64 * (0.5 - CENTER_CROP / 2.0)) as usize; + let x1 = (width as f64 * (0.5 + CENTER_CROP / 2.0)) as usize; + let y0 = (height as f64 * (0.5 - CENTER_CROP / 2.0)) as usize; + let y1 = (height as f64 * (0.5 + CENTER_CROP / 2.0)) as usize; + + let mut sum = 0u64; + let mut count = 0u64; + let mut y = y0; + while y < y1.min(height) { + let row = &data[y * stride..y * stride + width]; + let mut x = x0; + while x < x1.min(width) { + sum += u64::from(row[x]); + count += 1; + x += 4; + } + y += 4; + } + (count > 0).then(|| sum as f64 / count as f64) +} + +pub struct AudioOnsets { + pub onsets: Vec, + /// Ratio of tone peak to noise floor; low values mean the beep was not + /// reliably captured (e.g. muted output). + pub snr: f64, +} + +pub fn audio_beep_onsets(path: &Path) -> Result { + let mut ictx = + format::input(&path).map_err(|e| format!("open audio {}: {e}", path.display()))?; + let stream = ictx + .streams() + .best(media::Type::Audio) + .ok_or("no audio stream")?; + let stream_index = stream.index(); + + let ctx = codec::context::Context::from_parameters(stream.parameters()) + .map_err(|e| format!("audio codec params: {e}"))?; + let mut decoder = ctx + .decoder() + .audio() + .map_err(|e| format!("audio decoder: {e}"))?; + + let mut mono: Vec = Vec::new(); + let mut sample_rate = 0u32; + let mut take_frame = |decoded: &frame::Audio| { + sample_rate = decoded.rate(); + append_mono(decoded, &mut mono); + }; + + let mut decoded = frame::Audio::empty(); + for (s, packet) in ictx.packets() { + if s.index() != stream_index { + continue; + } + if decoder.send_packet(&packet).is_ok() { + while decoder.receive_frame(&mut decoded).is_ok() { + take_frame(&decoded); + } + } + } + let _ = decoder.send_eof(); + while decoder.receive_frame(&mut decoded).is_ok() { + take_frame(&decoded); + } + + beep_onsets_from_mono(mono, sample_rate) +} + +/// Beep onsets from a mono sample stream, shared by the file analyzers and +/// the playback harness (which taps samples at the device handoff). +pub fn beep_onsets_from_mono(mut mono: Vec, sample_rate: u32) -> Result { + if sample_rate == 0 || mono.len() < sample_rate as usize { + return Err("audio track too short to analyze".to_string()); + } + + bandpass_1khz_in_place(&mut mono, sample_rate); + + // 1 ms RMS envelope. + let chunk = (sample_rate / 1000).max(1) as usize; + let mut env: Vec = mono + .chunks(chunk) + .map(|c| (c.iter().map(|s| s * s).sum::() / c.len() as f32).sqrt()) + .collect(); + let chunk_secs = chunk as f64 / f64::from(sample_rate); + + let mut sorted = env.clone(); + sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + let noise_floor = f64::from(percentile_sorted_f32(&sorted, 0.20)).max(1e-6); + let peak = f64::from(percentile_sorted_f32(&sorted, 0.999)); + let snr = peak / noise_floor; + // Edge-triggered hysteresis: after bandpassing, the tone bursts dominate + // the envelope peak while background audio (music, speech) sits well + // below it. A rising edge through the high threshold marks an onset; the + // detector re-arms only after the envelope falls back through the low + // threshold, so sustained background level cannot mask or spam onsets. + let hi = ((noise_floor * 8.0).max(peak * 0.35)) as f32; + let lo = hi * 0.5; + + let mut onsets = Vec::new(); + let mut armed = true; + let mut last = f64::NEG_INFINITY; + for (i, value) in env.drain(..).enumerate() { + let t = i as f64 * chunk_secs; + if value >= hi && armed && t - last >= MIN_ONSET_GAP_SECS { + onsets.push(t); + last = t; + armed = false; + } else if value < lo { + armed = true; + } + } + + Ok(AudioOnsets { onsets, snr }) +} + +/// Second-order (RBJ) bandpass centered on the 1 kHz test tone. Real +/// machines play music/speech during a self-test; narrowband filtering lets +/// the constant-frequency beep dominate the envelope regardless. +fn bandpass_1khz_in_place(samples: &mut [f32], sample_rate: u32) { + let f0 = 1000.0f64; + let q = 8.0f64; + let w0 = 2.0 * std::f64::consts::PI * f0 / f64::from(sample_rate.max(2001)); + let alpha = w0.sin() / (2.0 * q); + let cos_w0 = w0.cos(); + let a0 = 1.0 + alpha; + let b0 = (alpha / a0) as f32; + let b2 = (-alpha / a0) as f32; + let a1 = (-2.0 * cos_w0 / a0) as f32; + let a2 = ((1.0 - alpha) / a0) as f32; + + let (mut x1, mut x2, mut y1, mut y2) = (0.0f32, 0.0f32, 0.0f32, 0.0f32); + for sample in samples.iter_mut() { + let x0 = *sample; + let y0 = b0 * x0 + b2 * x2 - a1 * y1 - a2 * y2; + x2 = x1; + x1 = x0; + y2 = y1; + y1 = y0; + *sample = y0; + } +} + +fn append_mono(decoded: &frame::Audio, out: &mut Vec) { + use ffmpeg::format::Sample; + use ffmpeg::format::sample::Type; + let samples = decoded.samples(); + let channels = decoded.channels() as usize; + if samples == 0 || channels == 0 { + return; + } + match decoded.format() { + Sample::F32(Type::Planar) => { + let planes: Vec<&[f32]> = (0..channels.min(decoded.planes())) + .map(|p| &decoded.plane::(p)[..samples]) + .collect(); + for i in 0..samples { + let sum: f32 = planes.iter().map(|p| p[i]).sum(); + out.push(sum / planes.len() as f32); + } + } + Sample::F32(Type::Packed) => { + let data = &decoded.plane::(0)[..samples * channels]; + for frame in data.chunks_exact(channels) { + out.push(frame.iter().sum::() / channels as f32); + } + } + Sample::I16(Type::Planar) => { + let planes: Vec<&[i16]> = (0..channels.min(decoded.planes())) + .map(|p| &decoded.plane::(p)[..samples]) + .collect(); + for i in 0..samples { + let sum: f32 = planes.iter().map(|p| f32::from(p[i])).sum(); + out.push(sum / (planes.len() as f32 * f32::from(i16::MAX))); + } + } + Sample::I16(Type::Packed) => { + let data = &decoded.plane::(0)[..samples * channels]; + for frame in data.chunks_exact(channels) { + let sum: f32 = frame.iter().map(|s| f32::from(*s)).sum(); + out.push(sum / (channels as f32 * f32::from(i16::MAX))); + } + } + _ => { + // Unknown format; skip frame rather than misread it. + } + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct SyncMeasurement { + pub paired_events: usize, + pub inlier_events: usize, + pub median_offset_ms: f64, + pub mad_ms: f64, + pub drift_ms_per_min: f64, + /// Drift accumulated across the observed window (slope × span). More + /// robust to slope noise on short runs than the per-minute rate. + pub total_drift_ms: f64, + pub span_secs: f64, + pub min_offset_ms: f64, + pub max_offset_ms: f64, + /// (flash time, offset ms) per inlier event. + pub events: Vec<(f64, f64)>, +} + +/// Pairs flash onsets with the nearest beep onset (both on the same clock) +/// and computes robust statistics. The first event after settle is dropped: +/// window creation/compositor transitions make it unrepresentative. +pub fn measure_sync( + flash_onsets: &[f64], + beep_onsets: &[f64], + min_events: usize, +) -> Result { + if flash_onsets.len() < 2 { + return Err(format!( + "only {} flash events detected in the recording", + flash_onsets.len() + )); + } + if beep_onsets.is_empty() { + return Err("no beeps detected in the recording".to_string()); + } + + let mut pairs: Vec<(f64, f64)> = Vec::new(); + for flash in flash_onsets { + let Some(beep) = beep_onsets + .iter() + .min_by(|a, b| { + (*a - flash) + .abs() + .partial_cmp(&(*b - flash).abs()) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .copied() + else { + continue; + }; + let offset = beep - flash; + if offset.abs() <= 0.9 { + pairs.push((*flash, offset * 1000.0)); + } + } + // Drop the first event: window-creation transitions make it noisy. + if pairs.len() > min_events { + pairs.remove(0); + } + if pairs.len() < min_events { + return Err(format!( + "only {} usable flash/beep pairs (need {min_events}; detected {} flashes, {} beeps); \ + the test window may have been covered or the beeps too quiet", + pairs.len(), + flash_onsets.len(), + beep_onsets.len() + )); + } + + // Anchor on the densest offset cluster: mispaired events (a flash + // matching the wrong beep because the true one was masked) land seconds + // away, and with enough of them the median itself becomes junk. The true + // pairs all share one physical offset, so they form the tightest cluster. + let cluster_center = { + let mut best_center = 0.0; + let mut best_count = 0usize; + for (_, candidate) in &pairs { + let count = pairs + .iter() + .filter(|(_, o)| (o - candidate).abs() <= 60.0) + .count(); + if count > best_count { + best_count = count; + best_center = *candidate; + } + } + best_center + }; + + let mut inliers: Vec<(f64, f64)> = pairs + .iter() + .filter(|(_, o)| (o - cluster_center).abs() <= 90.0) + .copied() + .collect(); + if inliers.len() < min_events { + // No dominant cluster: report statistics over every pair and let the + // caller's thresholds judge them, rather than discarding the run. + inliers = pairs.clone(); + } + + let mut offsets: Vec = inliers.iter().map(|(_, o)| *o).collect(); + offsets.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + let median = percentile_sorted(&offsets, 0.5); + let mut deviations: Vec = offsets.iter().map(|o| (o - median).abs()).collect(); + deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + let mad = percentile_sorted(&deviations, 0.5); + + // Least-squares slope of offset over time = drift. + let n = inliers.len() as f64; + let mean_t = inliers.iter().map(|(t, _)| t).sum::() / n; + let mean_o = inliers.iter().map(|(_, o)| o).sum::() / n; + let mut num = 0.0; + let mut den = 0.0; + for (t, o) in &inliers { + num += (t - mean_t) * (o - mean_o); + den += (t - mean_t) * (t - mean_t); + } + let slope_ms_per_sec = if den > 0.0 { num / den } else { 0.0 }; + + let min = inliers + .iter() + .map(|(_, o)| *o) + .fold(f64::INFINITY, f64::min); + let max = inliers + .iter() + .map(|(_, o)| *o) + .fold(f64::NEG_INFINITY, f64::max); + let span_secs = inliers.last().map(|(t, _)| *t).unwrap_or(0.0) + - inliers.first().map(|(t, _)| *t).unwrap_or(0.0); + + Ok(SyncMeasurement { + paired_events: pairs.len(), + inlier_events: inliers.len(), + median_offset_ms: median, + mad_ms: mad, + drift_ms_per_min: slope_ms_per_sec * 60.0, + total_drift_ms: slope_ms_per_sec * span_secs, + span_secs, + min_offset_ms: min, + max_offset_ms: max, + events: inliers, + }) +} + +fn percentile_sorted(sorted: &[f64], q: f64) -> f64 { + if sorted.is_empty() { + return 0.0; + } + let idx = ((sorted.len() - 1) as f64 * q).round() as usize; + sorted[idx.min(sorted.len() - 1)] +} + +fn percentile_sorted_f32(sorted: &[f32], q: f64) -> f32 { + if sorted.is_empty() { + return 0.0; + } + let idx = ((sorted.len() - 1) as f64 * q).round() as usize; + sorted[idx.min(sorted.len() - 1)] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn measure_sync_detects_constant_offset() { + let flashes: Vec = (1..12).map(|k| k as f64 * 2.0).collect(); + let beeps: Vec = flashes.iter().map(|f| f + 0.030).collect(); + let m = measure_sync(&flashes, &beeps, 6).unwrap(); + assert!((m.median_offset_ms - 30.0).abs() < 1.0); + assert!(m.total_drift_ms.abs() < 1.0); + } + + #[test] + fn measure_sync_detects_drift() { + // 5 ms/s of drift = 300 ms/min. + let flashes: Vec = (1..12).map(|k| k as f64 * 2.0).collect(); + let beeps: Vec = flashes.iter().map(|f| f + 0.005 * f).collect(); + let m = measure_sync(&flashes, &beeps, 6).unwrap(); + assert!( + (m.drift_ms_per_min - 300.0).abs() < 30.0, + "drift {}", + m.drift_ms_per_min + ); + assert!(m.total_drift_ms > 50.0, "total {}", m.total_drift_ms); + } + + #[test] + fn measure_sync_rejects_outliers() { + let mut flashes: Vec = (1..12).map(|k| k as f64 * 2.0).collect(); + let beeps: Vec = flashes.iter().map(|f| f + 0.020).collect(); + // A wild first event, like a window-transition artifact. + flashes[0] -= 0.6; + let m = measure_sync(&flashes, &beeps, 6).unwrap(); + assert!((m.median_offset_ms - 20.0).abs() < 2.0); + assert!(m.inlier_events >= 9); + } + + #[test] + fn measure_sync_scattered_offsets_still_report_stats() { + // Half the events displaced by 60ms: no clean inlier set exists, but + // the caller still needs numbers (large MAD) to fail on, not an error. + let flashes: Vec = (1..12).map(|k| k as f64 * 2.0).collect(); + let beeps: Vec = flashes + .iter() + .enumerate() + .map(|(i, f)| f + if i % 2 == 0 { 0.010 } else { 0.070 }) + .collect(); + let m = measure_sync(&flashes, &beeps, 6).unwrap(); + assert!(m.mad_ms >= 20.0, "mad {}", m.mad_ms); + } + + #[test] + fn measure_sync_fails_with_too_few_events() { + let flashes = vec![2.0, 4.0]; + let beeps = vec![2.02, 4.02]; + assert!(measure_sync(&flashes, &beeps, 6).is_err()); + } +} diff --git a/apps/cli/src/selftest/mod.rs b/apps/cli/src/selftest/mod.rs new file mode 100644 index 0000000000..4edb0b19ce --- /dev/null +++ b/apps/cli/src/selftest/mod.rs @@ -0,0 +1,713 @@ +//! `cap selftest` — diagnostics that verify Cap works correctly on this +//! machine, starting with an end-to-end A/V sync test: record a known +//! flash+beep pattern through the real capture pipeline, then measure the +//! flash-to-beep offset in both the raw recording and an export of it. + +pub mod measure; +pub mod pattern; +pub mod playback; + +use std::{ + path::{Path, PathBuf}, + time::Duration, +}; + +use cap_project::{RecordingMeta, RecordingMetaInner, StudioRecordingMeta}; +use clap::{Args, Subcommand}; +use serde::Serialize; + +use measure::SyncMeasurement; +use pattern::PatternSpec; + +const DEFAULT_PATTERN_SECS: u64 = 20; +const EVENT_PERIOD: Duration = Duration::from_secs(2); +const FLASH_LEN: Duration = Duration::from_millis(120); +const SETTLE: Duration = Duration::from_secs(2); +const MIN_EVENTS: usize = 6; + +const PASS_OFFSET_MS: f64 = 80.0; +const PASS_TOTAL_DRIFT_MS: f64 = 20.0; +const PASS_MAD_MS: f64 = 20.0; +const WARN_OFFSET_MS: f64 = 120.0; +const WARN_TOTAL_DRIFT_MS: f64 = 40.0; +const WARN_MAD_MS: f64 = 40.0; +const MAX_RAW_EXPORT_DELTA_MS: f64 = 25.0; +const MIN_BEEP_SNR: f64 = 8.0; +/// Extra offset budget for the acoustic microphone path: sound flight time +/// plus input device latency. +const MIC_EXTRA_OFFSET_MS: f64 = 60.0; +/// Acoustic pickup competes with room noise; a lower SNR still yields sharp +/// onsets for a 1 kHz tone. +const MIN_MIC_SNR: f64 = 4.0; + +#[derive(Args)] +pub struct SelftestArgs { + #[command(subcommand)] + pub command: SelftestCommands, +} + +#[derive(Subcommand)] +pub enum SelftestCommands { + /// Record a test pattern and verify audio/video sync end-to-end + #[command(name = "av-sync")] + AvSync(AvSyncArgs), + /// Verify the editor playback path preserves audio/video sync + #[command(name = "playback")] + Playback(playback::PlaybackArgs), + /// Internal: measure flash/beep onsets in an existing recording or export + #[command(name = "analyze", hide = true)] + Analyze(AnalyzeArgs), +} + +#[derive(Args)] +pub struct AnalyzeArgs { + /// Video file (or file containing both tracks) + video: PathBuf, + /// Separate audio file (defaults to the video file's audio track) + #[arg(long)] + audio: Option, + /// Added to flash times (track start offset) + #[arg(long, default_value_t = 0.0)] + voffset: f64, + /// Added to beep times (track start offset) + #[arg(long, default_value_t = 0.0)] + aoffset: f64, +} + +#[derive(Args)] +pub struct AvSyncArgs { + /// Seconds of test pattern to record (longer = more sensitive to drift) + #[arg(long, default_value_t = DEFAULT_PATTERN_SECS)] + duration: u64, + /// Maximum fps to record at (defaults to the standard recording fps) + #[arg(long)] + fps: Option, + /// Also record a microphone and verify its sync acoustically (the mic + /// must be able to hear the test beeps through your speakers) + #[arg(long)] + mic: bool, + /// Microphone device name to use with --mic (defaults to the default mic) + #[arg(long)] + mic_name: Option, + /// Skip exporting the recording (tests only the recording stage) + #[arg(long)] + skip_export: bool, + /// Keep the recorded project on disk for inspection + #[arg(long)] + keep: bool, +} + +#[derive(Serialize, Clone, Copy, PartialEq, Eq, Debug)] +#[serde(rename_all = "lowercase")] +enum Verdict { + Pass, + Warn, + Fail, + Inconclusive, +} + +#[derive(Serialize)] +struct Thresholds { + pass_offset_ms: f64, + pass_total_drift_ms: f64, + warn_offset_ms: f64, + warn_total_drift_ms: f64, + max_raw_export_delta_ms: f64, +} + +#[derive(Serialize)] +struct Diagnostics { + beep_snr: Option, + audio_output_latency_ms: Option, + /// Median (beep DAC time − flash present time) at emission; the part of + /// the measured offset contributed by the test rig itself. + emission_skew_ms: Option, + project_path: Option, +} + +#[derive(Serialize)] +struct AvSyncReport { + verdict: Verdict, + summary: String, + recording: Option, + microphone: Option, + export: Option, + thresholds: Thresholds, + diagnostics: Diagnostics, +} + +impl SelftestArgs { + pub async fn run(self, json: bool) -> Result<(), String> { + match self.command { + SelftestCommands::AvSync(args) => run_av_sync(args, json).await, + SelftestCommands::Playback(args) => playback::run_playback(args, json).await, + SelftestCommands::Analyze(args) => run_analyze(args), + } + } +} + +fn run_analyze(args: AnalyzeArgs) -> Result<(), String> { + let flashes: Vec = measure::video_flash_onsets(&args.video)? + .into_iter() + .map(|t| t + args.voffset) + .collect(); + let audio_path = args.audio.as_ref().unwrap_or(&args.video); + let audio = measure::audio_beep_onsets(audio_path)?; + let beeps: Vec = audio.onsets.iter().map(|t| t + args.aoffset).collect(); + eprintln!( + "flashes: {} beeps: {} (snr {:.1})", + flashes.len(), + beeps.len(), + audio.snr + ); + let measurement = measure::measure_sync(&flashes, &beeps, MIN_EVENTS)?; + println!( + "{}", + serde_json::to_string_pretty(&measurement) + .map_err(|e| format!("failed to serialize: {e}"))? + ); + Ok(()) +} + +fn progress(json: bool, msg: &str) { + if !json { + eprintln!("{msg}"); + } +} + +async fn run_av_sync(args: AvSyncArgs, json: bool) -> Result<(), String> { + // ffmpeg's own stderr chatter (muxer segment writes, codec notes) drowns + // the progress output; measurement errors are surfaced through Results. + ffmpeg::util::log::set_level(ffmpeg::util::log::Level::Quiet); + + // The floor guarantees enough events for measure_sync's minimum after the + // first event is dropped: 14s -> 7 events -> 6 usable pairs. + let pattern_secs = args.duration.clamp(14, 120); + let events = (pattern_secs / EVENT_PERIOD.as_secs()).max(3) as u32; + let spec = PatternSpec { + settle: SETTLE, + events, + period: EVENT_PERIOD, + flash_len: FLASH_LEN, + }; + + // Rough wall-clock estimate: settle + pattern + finalize + analysis (+ export). + let estimate_secs = spec.total_runtime().as_secs() + 4 + if args.skip_export { 0 } else { 6 }; + + progress(json, "Cap A/V sync self-test"); + progress( + json, + &format!( + "This will take about {} seconds.", + (estimate_secs as f64 / 10.0).round() as u64 * 10 + ), + ); + progress( + json, + "A black window will appear with brief white flashes and short beeps.\n\ + Leave the window visible and make sure output volume is not muted.\n", + ); + + let project_path = + std::env::temp_dir().join(format!("cap-selftest-{}.cap", uuid::Uuid::new_v4())); + + progress( + json, + &format!("[1/4] Recording test pattern ({pattern_secs}s)..."), + ); + let mic_name = + if args.mic || args.mic_name.is_some() { + match args.mic_name.clone().or_else(|| { + cap_recording::MicrophoneFeed::default_device().map(|(label, _, _)| label) + }) { + Some(label) => { + progress(json, &format!("Including microphone: {label}")); + Some(label) + } + None => return Err("no microphone available for --mic".to_string()), + } + } else { + None + }; + + let handle = start_recording(&project_path, args.fps, mic_name.clone()).await?; + + // Give capture a moment to deliver first frames before the pattern starts. + tokio::time::sleep(Duration::from_millis(500)).await; + + let pattern_result = pattern::request_pattern(spec).await; + + let report = match pattern_result { + Ok(report) => report, + Err(e) => { + let _ = handle.stop().await; + let _ = std::fs::remove_dir_all(&project_path); + if e == "cancelled" { + return Err("self-test cancelled".to_string()); + } + return Err(format!("test pattern failed: {e}")); + } + }; + + // Let the tail of the last beep land in the recording. + tokio::time::sleep(Duration::from_secs(1)).await; + + progress(json, "[2/4] Finalizing recording..."); + let completed = handle + .stop() + .await + .map_err(|e| format!("failed to stop recording: {e}"))?; + let project_path = completed.project_path.clone(); + + // Fragmented recordings need the shared remux step before their segment + // files are directly readable (the same step the desktop app runs). + { + let project_path = project_path.clone(); + tokio::task::spawn_blocking(move || { + cap_recording::recovery::RecoveryManager::remux_if_needed(&project_path) + }) + .await + .map_err(|e| format!("remux task join error: {e}"))? + .map_err(|e| format!("failed to finalize recording segments: {e}"))?; + } + + let emission_skew_ms = median_emission_skew_ms(&report); + + progress(json, "[3/4] Analyzing recording..."); + let raw = analyze_raw(&project_path); + let mic = mic_name.is_some().then(|| analyze_mic(&project_path)); + + let export = if args.skip_export { + Ok(None) + } else { + progress(json, "[4/4] Exporting and verifying the export..."); + match crate::export::export_project_default(project_path.clone()).await { + Ok(output) => analyze_export(&output).map(Some), + Err(e) => Err(format!("export failed: {e}")), + } + }; + + let (verdict, summary, raw_m, mic_m, export_m, snr) = evaluate(raw, mic, export); + + let keep = args.keep || verdict != Verdict::Pass; + if keep { + progress( + json, + &format!("Recorded project kept at {}", project_path.display()), + ); + } else { + let _ = std::fs::remove_dir_all(&project_path); + } + + let report = AvSyncReport { + verdict, + summary: summary.clone(), + recording: raw_m, + microphone: mic_m, + export: export_m, + thresholds: Thresholds { + pass_offset_ms: PASS_OFFSET_MS, + pass_total_drift_ms: PASS_TOTAL_DRIFT_MS, + warn_offset_ms: WARN_OFFSET_MS, + warn_total_drift_ms: WARN_TOTAL_DRIFT_MS, + max_raw_export_delta_ms: MAX_RAW_EXPORT_DELTA_MS, + }, + diagnostics: Diagnostics { + beep_snr: snr, + audio_output_latency_ms: report.audio_latency_ms, + emission_skew_ms, + project_path: keep.then(|| project_path.display().to_string()), + }, + }; + + if json { + println!( + "{}", + serde_json::to_string_pretty(&report) + .map_err(|e| format!("failed to serialize report: {e}"))? + ); + } else { + print_human(&report); + } + + match verdict { + Verdict::Pass | Verdict::Warn => Ok(()), + Verdict::Fail => Err(format!("A/V sync check failed: {summary}")), + Verdict::Inconclusive => Err(format!("A/V sync check inconclusive: {summary}")), + } +} + +fn print_human(report: &AvSyncReport) { + println!(); + if let Some(m) = &report.recording { + println!( + "Recording: offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events (spread ±{:.0} ms)", + m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events, m.mad_ms + ); + } + if let Some(m) = &report.microphone { + println!( + "Microphone: offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events (spread ±{:.0} ms)", + m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events, m.mad_ms + ); + } + if let Some(m) = &report.export { + println!( + "Export: offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events (spread ±{:.0} ms)", + m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events, m.mad_ms + ); + } + let label = match report.verdict { + Verdict::Pass => "PASS", + Verdict::Warn => "WARN", + Verdict::Fail => "FAIL", + Verdict::Inconclusive => "INCONCLUSIVE", + }; + println!("\nResult: {label} — {}", report.summary); +} + +async fn start_recording( + path: &Path, + fps: Option, + mic_name: Option, +) -> Result { + use cap_recording::{ + MicrophoneFeed, feeds::microphone, screen_capture::ScreenCaptureTarget, studio_recording, + }; + use kameo::Actor as _; + + let display = scap_targets::Display::primary(); + let target = ScreenCaptureTarget::Display { id: display.id() }; + + let mut builder = + studio_recording::Actor::builder(path.to_path_buf(), target).with_system_audio(true); + + if let Some(label) = mic_name { + let (error_tx, _error_rx) = flume::bounded(16); + let mic_feed = MicrophoneFeed::spawn(MicrophoneFeed::new(error_tx)); + mic_feed + .ask(microphone::SetInput { + label: label.clone(), + settings: None, + }) + .await + .map_err(|e| format!("failed to set microphone input '{label}': {e}"))? + .await + .map_err(|e| format!("microphone '{label}' failed to connect: {e}"))?; + // The stream needs a moment to warm up before locking on slower devices. + tokio::time::sleep(Duration::from_millis(100)).await; + let lock = mic_feed + .ask(microphone::Lock) + .await + .map_err(|e| format!("failed to lock microphone feed: {e}"))?; + builder = builder.with_mic_feed(std::sync::Arc::new(lock)); + } + + let builder = + cap_recording::RecordingDefaults::default().apply_to_studio_builder(builder, false, fps); + + #[cfg(target_os = "macos")] + let shareable_content = cidre::sc::ShareableContent::current() + .await + .map_err(|e| { + format!( + "screen recording permission unavailable: {e}. \ + Grant Cap screen recording access in System Settings and retry." + ) + }) + .map(cap_recording::SendableShareableContent::from)?; + + builder + .build( + #[cfg(target_os = "macos")] + Some(shareable_content), + ) + .await + .map_err(|e| format!("failed to start recording: {e}")) +} + +struct RawTracks { + display: PathBuf, + system_audio: PathBuf, + display_start: f64, + audio_start: f64, + mic: Option<(PathBuf, f64)>, +} + +fn locate_raw_tracks(project_path: &Path) -> Result { + let meta = RecordingMeta::load_for_project(project_path) + .map_err(|e| format!("failed to load recording meta: {e}"))?; + let RecordingMetaInner::Studio(studio) = &meta.inner else { + return Err("self-test recording is not a studio recording".to_string()); + }; + let StudioRecordingMeta::MultipleSegments { inner, .. } = &**studio else { + return Err("unexpected single-segment recording".to_string()); + }; + let segment = inner.segments.first().ok_or("recording has no segments")?; + let audio = segment + .system_audio + .as_ref() + .ok_or("recording has no system audio track")?; + + // Fragmented recordings write meta before remux, so the display path may + // still reference the fragments directory; the remuxed file sits next to it. + let mut display = meta.path(&segment.display.path); + if display.is_dir() { + display = display.with_extension("mp4"); + } + if !display.is_file() { + return Err(format!("display track not found at {}", display.display())); + } + + let mic = segment + .mic + .as_ref() + .map(|mic| (meta.path(&mic.path), mic.start_time.unwrap_or(0.0))); + + Ok(RawTracks { + display, + system_audio: meta.path(&audio.path), + display_start: segment.display.start_time.unwrap_or(0.0), + audio_start: audio.start_time.unwrap_or(0.0), + mic, + }) +} + +type MeasureOutcome = Result<(SyncMeasurement, f64), String>; + +fn analyze_raw(project_path: &Path) -> MeasureOutcome { + let tracks = locate_raw_tracks(project_path)?; + + let flashes: Vec = measure::video_flash_onsets(&tracks.display)? + .into_iter() + .map(|t| t + tracks.display_start) + .collect(); + let audio = measure::audio_beep_onsets(&tracks.system_audio)?; + let beeps: Vec = audio + .onsets + .iter() + .map(|t| t + tracks.audio_start) + .collect(); + + if audio.snr < MIN_BEEP_SNR { + return Err(format!( + "test tone barely audible in the recording (SNR {:.1}); \ + check that output volume is not muted", + audio.snr + )); + } + + measure::measure_sync(&flashes, &beeps, MIN_EVENTS).map(|m| (m, audio.snr)) +} + +/// Measures the microphone track against the display flashes. The beeps +/// reach the mic acoustically, so this validates the real input-device path +/// end to end (device rate, resampling, timestamping). +fn analyze_mic(project_path: &Path) -> MeasureOutcome { + let tracks = locate_raw_tracks(project_path)?; + let (mic_path, mic_start) = tracks + .mic + .ok_or("recording has no microphone track despite --mic")?; + + let flashes: Vec = measure::video_flash_onsets(&tracks.display)? + .into_iter() + .map(|t| t + tracks.display_start) + .collect(); + let audio = measure::audio_beep_onsets(&mic_path)?; + let beeps: Vec = audio.onsets.iter().map(|t| t + mic_start).collect(); + + if audio.snr < MIN_MIC_SNR { + return Err(format!( + "test tone barely audible through the microphone (SNR {:.1}); \ + raise the output volume or move the mic closer to the speakers", + audio.snr + )); + } + + measure::measure_sync(&flashes, &beeps, MIN_EVENTS).map(|m| (m, audio.snr)) +} + +fn analyze_export(output: &Path) -> MeasureOutcome { + let flashes = measure::video_flash_onsets(output)?; + let audio = measure::audio_beep_onsets(output)?; + measure::measure_sync(&flashes, &audio.onsets, MIN_EVENTS).map(|m| (m, audio.snr)) +} + +fn median_emission_skew_ms(report: &pattern::PatternReport) -> Option { + let mut skews: Vec = report + .flash_presents + .iter() + .filter_map(|(event, flash)| { + let (_, beep) = report.beep_outputs.iter().find(|(e, _)| e == event)?; + Some(if beep >= flash { + (*beep - *flash).as_secs_f64() * 1000.0 + } else { + -((*flash - *beep).as_secs_f64() * 1000.0) + }) + }) + .collect(); + if skews.is_empty() { + return None; + } + skews.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + Some(skews[skews.len() / 2]) +} + +fn classify(m: &SyncMeasurement) -> Verdict { + let offset = m.median_offset_ms.abs(); + let drift = m.total_drift_ms.abs(); + if offset <= PASS_OFFSET_MS && drift <= PASS_TOTAL_DRIFT_MS && m.mad_ms <= PASS_MAD_MS { + Verdict::Pass + } else if offset <= WARN_OFFSET_MS && drift <= WARN_TOTAL_DRIFT_MS && m.mad_ms <= WARN_MAD_MS { + Verdict::Warn + } else { + Verdict::Fail + } +} + +#[allow(clippy::type_complexity)] +/// Classifies the acoustic microphone measurement: same drift/spread rules +/// as the digital path, with extra offset budget for sound flight time and +/// input device latency. +fn classify_mic(m: &SyncMeasurement) -> Verdict { + let offset = m.median_offset_ms.abs(); + let drift = m.total_drift_ms.abs(); + if offset <= PASS_OFFSET_MS + MIC_EXTRA_OFFSET_MS + && drift <= PASS_TOTAL_DRIFT_MS + && m.mad_ms <= PASS_MAD_MS + { + Verdict::Pass + } else if offset <= WARN_OFFSET_MS + MIC_EXTRA_OFFSET_MS + && drift <= WARN_TOTAL_DRIFT_MS + && m.mad_ms <= WARN_MAD_MS + { + Verdict::Warn + } else { + Verdict::Fail + } +} + +/// Merges verdicts: Fail dominates everything, otherwise the worse one wins. +fn merge_verdicts(a: Verdict, b: Verdict) -> Verdict { + if a == Verdict::Fail || b == Verdict::Fail { + Verdict::Fail + } else { + a.max(b) + } +} + +#[allow(clippy::type_complexity)] +fn evaluate( + raw: MeasureOutcome, + mic: Option, + export: Result, String>, +) -> ( + Verdict, + String, + Option, + Option, + Option, + Option, +) { + let (raw_m, snr) = match raw { + Ok((m, snr)) => (m, snr), + Err(reason) => { + return (Verdict::Inconclusive, reason, None, None, None, None); + } + }; + + let export_m = match export { + Ok(Some((m, _))) => Some(m), + Ok(None) => None, + Err(reason) => { + // A recording that measures fine but cannot be exported is a hard + // failure: the export path is part of the product. + return (Verdict::Fail, reason, Some(raw_m), None, None, Some(snr)); + } + }; + + let mut verdict = classify(&raw_m); + let mut reasons: Vec = Vec::new(); + + if verdict != Verdict::Pass { + reasons.push(format!( + "recording offset {:+.0} ms / drift {:+.0} ms over {:.0}s", + raw_m.median_offset_ms, raw_m.total_drift_ms, raw_m.span_secs + )); + } + + let mic_m = match mic { + None => None, + Some(Ok((m, _))) => { + let mic_verdict = classify_mic(&m); + if mic_verdict != Verdict::Pass { + reasons.push(format!( + "microphone offset {:+.0} ms / drift {:+.0} ms over {:.0}s", + m.median_offset_ms, m.total_drift_ms, m.span_secs + )); + } + verdict = merge_verdicts(verdict, mic_verdict); + Some(m) + } + Some(Err(reason)) => { + // The mic leg was explicitly requested; not being able to measure + // it makes the run inconclusive (unless something already failed). + verdict = merge_verdicts(verdict, Verdict::Inconclusive); + reasons.push(reason); + None + } + }; + + if let Some(export_m) = &export_m { + let export_verdict = classify(export_m); + if export_verdict != Verdict::Pass { + reasons.push(format!( + "export offset {:+.0} ms / drift {:+.0} ms over {:.0}s", + export_m.median_offset_ms, export_m.total_drift_ms, export_m.span_secs + )); + } + verdict = merge_verdicts(verdict, export_verdict); + let delta = (export_m.median_offset_ms - raw_m.median_offset_ms).abs(); + if delta > MAX_RAW_EXPORT_DELTA_MS { + verdict = Verdict::Fail; + reasons.push(format!( + "export changes sync by {delta:.0} ms vs the recording" + )); + } + } + + let summary = match verdict { + Verdict::Pass => format!( + "audio/video sync is healthy (offset {:+.0} ms, drift {:+.0} ms over {:.0}s)", + raw_m.median_offset_ms, raw_m.total_drift_ms, raw_m.span_secs + ), + Verdict::Warn => format!( + "sync is within tolerance but not ideal: {}", + reasons.join("; ") + ), + Verdict::Fail => format!("sync problem detected: {}", reasons.join("; ")), + Verdict::Inconclusive => reasons.join("; "), + }; + + (verdict, summary, Some(raw_m), mic_m, export_m, Some(snr)) +} + +impl PartialOrd for Verdict { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Verdict { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + fn rank(v: &Verdict) -> u8 { + match v { + Verdict::Pass => 0, + Verdict::Warn => 1, + Verdict::Fail => 2, + Verdict::Inconclusive => 3, + } + } + rank(self).cmp(&rank(other)) + } +} diff --git a/apps/cli/src/selftest/pattern.rs b/apps/cli/src/selftest/pattern.rs new file mode 100644 index 0000000000..d96b3037bc --- /dev/null +++ b/apps/cli/src/selftest/pattern.rs @@ -0,0 +1,530 @@ +//! Main-thread test pattern for the A/V sync self-test. +//! +//! Renders a fullscreen black window that flashes white at a fixed period +//! while playing a 1 kHz beep through the default audio output at the same +//! scheduled instants. The window must run on the process main thread +//! (required by AppKit); the async side of the self-test requests a pattern +//! run through [`request_pattern`] and the real main thread services it via +//! [`serve_main_thread`]. + +use std::{ + num::NonZeroU32, + sync::{ + Arc, Mutex, OnceLock, + atomic::{AtomicBool, Ordering}, + mpsc, + }, + time::{Duration, Instant}, +}; + +use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; +use winit::{ + application::ApplicationHandler, + event::{ElementState, WindowEvent}, + event_loop::{ActiveEventLoop, ControlFlow, EventLoop}, + keyboard::{Key, NamedKey}, + platform::run_on_demand::EventLoopExtRunOnDemand, + window::{Fullscreen, Window, WindowId, WindowLevel}, +}; + +#[derive(Clone, Copy, Debug)] +pub struct PatternSpec { + /// Time to sit black before the first event, letting capture settle. + pub settle: Duration, + /// Number of flash+beep events. + pub events: u32, + /// Time between event onsets. + pub period: Duration, + /// Duration of each flash / beep. + pub flash_len: Duration, +} + +impl PatternSpec { + pub fn total_runtime(&self) -> Duration { + self.settle + self.period * self.events + Duration::from_millis(500) + } + + /// Event onsets relative to the pattern epoch. Nominally periodic, with a + /// deterministic per-event jitter of up to ±300 ms: a perfectly periodic + /// schedule would let an A/V shift of exactly one period pair every flash + /// with the wrong beep and alias to a zero measured offset. + pub fn event_offsets_secs(&self) -> Vec { + (0..self.events) + .map(|k| { + let jitter = (u64::from(k).wrapping_mul(2_654_435_761) % 601) as f64 / 1000.0 - 0.3; + (f64::from(k) * self.period.as_secs_f64() + jitter).max(0.0) + }) + .collect() + } +} + +#[derive(Debug)] +pub struct PatternReport { + /// Instants at which each flash was actually presented (post-present). + pub flash_presents: Vec<(u32, Instant)>, + /// Estimated instants at which each beep hit the output (DAC time). + pub beep_outputs: Vec<(u32, Instant)>, + /// Mean reported output latency of the audio stream, if available. + pub audio_latency_ms: Option, +} + +pub struct PatternRequest { + pub spec: PatternSpec, + pub reply: mpsc::Sender>, +} + +static PATTERN_TX: OnceLock>>> = OnceLock::new(); + +/// Called from `main()` before the runtime thread spawns, when the parsed +/// command is a self-test. Returns the receiver the main thread must serve. +pub fn install_main_thread_runner() -> mpsc::Receiver { + let (tx, rx) = mpsc::channel(); + let _ = PATTERN_TX.set(Mutex::new(Some(tx))); + rx +} + +/// Called by the runtime thread once the command finishes, releasing the main +/// thread from its serve loop. +pub fn shutdown_main_thread_runner() { + if let Some(slot) = PATTERN_TX.get() { + slot.lock().unwrap().take(); + } +} + +/// Runs pattern requests on the main thread until the sender is dropped via +/// [`shutdown_main_thread_runner`]. +pub fn serve_main_thread(rx: mpsc::Receiver) { + while let Ok(request) = rx.recv() { + let result = run_pattern(request.spec); + let _ = request.reply.send(result); + } +} + +/// Called from the async side; blocks the calling task until the pattern +/// window has run to completion on the main thread. +pub async fn request_pattern(spec: PatternSpec) -> Result { + let tx = PATTERN_TX + .get() + .and_then(|slot| slot.lock().unwrap().clone()) + .ok_or("self-test pattern runner is not installed on the main thread")?; + tokio::task::spawn_blocking(move || { + let (reply_tx, reply_rx) = mpsc::channel(); + tx.send(PatternRequest { + spec, + reply: reply_tx, + }) + .map_err(|_| "main thread pattern runner is gone".to_string())?; + reply_rx + .recv() + .map_err(|_| "main thread pattern runner dropped the request".to_string())? + }) + .await + .map_err(|e| format!("pattern task join error: {e}"))? +} + +struct BeepState { + epoch: Instant, + /// Sorted (start, end) sample windows of each beep, relative to epoch. + event_windows: Vec<(u64, u64)>, + sample_rate: u32, + channels: usize, + /// Absolute sample index of pattern epoch, fixed on the first callback. + epoch_sample: Mutex>, + samples_written: Mutex, + beep_outputs: Mutex>, + latency_sum_ms: Mutex<(f64, u64)>, +} + +impl BeepState { + fn fill(&self, data: &mut [f32], info: &cpal::OutputCallbackInfo) { + let now = Instant::now(); + let mut written = self.samples_written.lock().unwrap(); + let buffer_start_sample = *written as i64; + + let mut epoch_sample = self.epoch_sample.lock().unwrap(); + let epoch_sample = *epoch_sample.get_or_insert_with(|| { + let until_epoch = if self.epoch > now { + (self.epoch - now).as_secs_f64() + } else { + -(now - self.epoch).as_secs_f64() + }; + buffer_start_sample + (until_epoch * self.sample_rate as f64) as i64 + }); + + let latency = info + .timestamp() + .playback + .duration_since(&info.timestamp().callback); + if let Some(latency) = latency { + let mut acc = self.latency_sum_ms.lock().unwrap(); + acc.0 += latency.as_secs_f64() * 1000.0; + acc.1 += 1; + } + + let frames = data.len() / self.channels.max(1); + for frame_idx in 0..frames { + let abs_sample = buffer_start_sample + frame_idx as i64; + let rel = abs_sample - epoch_sample; + let mut value = 0.0f32; + if rel >= 0 { + let rel = rel as u64; + let idx = self + .event_windows + .partition_point(|&(start, _)| start <= rel); + if idx > 0 { + let (start, end) = self.event_windows[idx - 1]; + if rel < end { + // 1 kHz tone with a 2 ms fade-in/out to avoid clicks while + // keeping the onset sharp for detection. + let within = rel - start; + let t = within as f32 / self.sample_rate as f32; + let fade_len = 0.002 * self.sample_rate as f32; + let fade_in = (within as f32 / fade_len).min(1.0); + let remaining = (end - rel) as f32; + let fade_out = (remaining / fade_len).min(1.0); + value = 0.4 + * fade_in + * fade_out + * (t * 1000.0 * 2.0 * std::f32::consts::PI).sin(); + + if within == 0 { + let dac = now + + latency.unwrap_or_default() + + Duration::from_secs_f64( + frame_idx as f64 / self.sample_rate as f64, + ); + self.beep_outputs + .lock() + .unwrap() + .push(((idx - 1) as u32, dac)); + } + } + } + } + for ch in 0..self.channels { + data[frame_idx * self.channels + ch] = value; + } + } + + *written += frames as u64; + } +} + +fn build_beep_stream( + epoch: Instant, + spec: &PatternSpec, +) -> Result<(cpal::Stream, Arc), String> { + let host = cpal::default_host(); + let device = host + .default_output_device() + .ok_or("no default audio output device; cannot run the sync test")?; + let config = device + .default_output_config() + .map_err(|e| format!("failed to query audio output config: {e}"))?; + + let sample_rate = config.sample_rate().0; + let channels = config.channels() as usize; + let beep_samples = (spec.flash_len.as_secs_f64() * sample_rate as f64) as u64; + let state = Arc::new(BeepState { + epoch, + event_windows: spec + .event_offsets_secs() + .into_iter() + .map(|offset| { + let start = (offset * sample_rate as f64) as u64; + (start, start + beep_samples) + }) + .collect(), + sample_rate, + channels, + epoch_sample: Mutex::new(None), + samples_written: Mutex::new(0), + beep_outputs: Mutex::new(Vec::new()), + latency_sum_ms: Mutex::new((0.0, 0)), + }); + + let err_fn = |e| tracing::warn!("selftest audio stream error: {e}"); + let stream_config = config.config(); + + let stream = match config.sample_format() { + cpal::SampleFormat::F32 => { + let state = state.clone(); + device + .build_output_stream( + &stream_config, + move |data: &mut [f32], info: &cpal::OutputCallbackInfo| { + state.fill(data, info); + }, + err_fn, + None, + ) + .map_err(|e| format!("failed to build audio output stream: {e}"))? + } + cpal::SampleFormat::I16 => { + let state = state.clone(); + let mut scratch = Vec::new(); + device + .build_output_stream( + &stream_config, + move |data: &mut [i16], info: &cpal::OutputCallbackInfo| { + scratch.clear(); + scratch.resize(data.len(), 0.0f32); + state.fill(&mut scratch, info); + for (dst, src) in data.iter_mut().zip(&scratch) { + *dst = (src * f32::from(i16::MAX)) as i16; + } + }, + err_fn, + None, + ) + .map_err(|e| format!("failed to build audio output stream: {e}"))? + } + other => { + return Err(format!( + "unsupported audio output sample format for the sync test: {other:?}" + )); + } + }; + + Ok((stream, state)) +} + +struct PatternApp { + spec: PatternSpec, + /// Event onsets in seconds from epoch, from `PatternSpec::event_offsets_secs`. + event_offsets: Vec, + run_start: Instant, + epoch: Instant, + window: Option>, + surface: Option, Arc>>, + size: (u32, u32), + last_drawn_white: bool, + flash_presents: Vec<(u32, Instant)>, + aborted: Arc, + error: Option, +} + +impl PatternApp { + /// Returns whether the pattern should currently show white, and the event + /// index if so. + fn desired_state(&self, now: Instant) -> Option { + if now < self.epoch { + return None; + } + let rel = (now - self.epoch).as_secs_f64(); + let flash = self.spec.flash_len.as_secs_f64(); + self.event_offsets + .iter() + .position(|&start| rel >= start && rel < start + flash) + .map(|idx| idx as u32) + } + + fn next_transition(&self, now: Instant) -> Instant { + if now < self.epoch { + return self.epoch; + } + let rel = (now - self.epoch).as_secs_f64(); + let flash = self.spec.flash_len.as_secs_f64(); + let next_rel = self + .event_offsets + .iter() + .flat_map(|&start| [start, start + flash]) + .filter(|&boundary| boundary > rel) + .fold(f64::INFINITY, f64::min); + if next_rel.is_finite() { + self.epoch + Duration::from_secs_f64(next_rel) + } else { + self.done_at() + } + } + + fn done_at(&self) -> Instant { + self.run_start + self.spec.total_runtime() + } + + fn draw(&mut self, event_loop: &ActiveEventLoop) { + let now = Instant::now(); + let desired = self.desired_state(now); + let white = desired.is_some(); + + let Some(surface) = self.surface.as_mut() else { + return; + }; + let (w, h) = self.size; + if w == 0 || h == 0 { + return; + } + if surface + .resize(NonZeroU32::new(w).unwrap(), NonZeroU32::new(h).unwrap()) + .is_err() + { + return; + } + let Ok(mut buffer) = surface.buffer_mut() else { + return; + }; + // Keep the high byte opaque: some softbuffer backends (macOS layers) + // treat it as alpha rather than ignoring it. + let color: u32 = if white { 0xFFFF_FFFF } else { 0xFF00_0000 }; + buffer.fill(color); + let presented = buffer.present().is_ok(); + + if presented && white && !self.last_drawn_white { + let event = desired.unwrap_or(0); + if self + .flash_presents + .last() + .is_none_or(|(last, _)| *last != event) + { + self.flash_presents.push((event, Instant::now())); + } + } + self.last_drawn_white = white; + + if Instant::now() >= self.done_at() { + event_loop.exit(); + } + } +} + +impl ApplicationHandler for PatternApp { + fn resumed(&mut self, event_loop: &ActiveEventLoop) { + if self.window.is_some() { + return; + } + let attrs = Window::default_attributes() + .with_title("Cap Sync Test") + .with_fullscreen(Some(Fullscreen::Borderless(None))) + .with_window_level(WindowLevel::AlwaysOnTop); + let window = match event_loop.create_window(attrs) { + Ok(w) => Arc::new(w), + Err(e) => { + self.error = Some(format!("failed to create test window: {e}")); + event_loop.exit(); + return; + } + }; + let size = window.inner_size(); + self.size = (size.width, size.height); + + let context = match softbuffer::Context::new(window.clone()) { + Ok(c) => c, + Err(e) => { + self.error = Some(format!("failed to create draw context: {e}")); + event_loop.exit(); + return; + } + }; + match softbuffer::Surface::new(&context, window.clone()) { + Ok(s) => self.surface = Some(s), + Err(e) => { + self.error = Some(format!("failed to create draw surface: {e}")); + event_loop.exit(); + return; + } + } + window.request_redraw(); + self.window = Some(window); + } + + fn window_event(&mut self, event_loop: &ActiveEventLoop, _id: WindowId, event: WindowEvent) { + match event { + WindowEvent::CloseRequested => { + self.aborted.store(true, Ordering::Release); + event_loop.exit(); + } + WindowEvent::KeyboardInput { event, .. } => { + if event.state == ElementState::Pressed + && event.logical_key == Key::Named(NamedKey::Escape) + { + self.aborted.store(true, Ordering::Release); + event_loop.exit(); + } + } + WindowEvent::Resized(size) => { + self.size = (size.width, size.height); + } + WindowEvent::RedrawRequested => { + self.draw(event_loop); + } + _ => {} + } + } + + fn about_to_wait(&mut self, event_loop: &ActiveEventLoop) { + let now = Instant::now(); + if now >= self.done_at() { + event_loop.exit(); + return; + } + let next = self.next_transition(now).min(self.done_at()); + if let Some(window) = &self.window { + // Redraw slightly eagerly so the flip lands at (not after) the + // scheduled transition. + window.request_redraw(); + } + event_loop.set_control_flow(ControlFlow::WaitUntil(next)); + } +} + +fn run_pattern(spec: PatternSpec) -> Result { + #[allow(unused_mut)] + let mut builder = EventLoop::builder(); + #[cfg(target_os = "macos")] + { + use winit::platform::macos::{ActivationPolicy, EventLoopBuilderExtMacOS}; + builder + .with_activation_policy(ActivationPolicy::Regular) + .with_activate_ignoring_other_apps(true); + } + let mut event_loop = builder + .build() + .map_err(|e| format!("failed to create event loop: {e}"))?; + + let run_start = Instant::now(); + let epoch = run_start + spec.settle; + + let (stream, beep_state) = build_beep_stream(epoch, &spec)?; + stream + .play() + .map_err(|e| format!("failed to start audio output: {e}"))?; + + let mut app = PatternApp { + event_offsets: spec.event_offsets_secs(), + spec, + run_start, + epoch, + window: None, + surface: None, + size: (0, 0), + last_drawn_white: false, + flash_presents: Vec::new(), + aborted: Arc::new(AtomicBool::new(false)), + error: None, + }; + + event_loop + .run_app_on_demand(&mut app) + .map_err(|e| format!("event loop error: {e}"))?; + + drop(stream); + + if let Some(error) = app.error { + return Err(error); + } + if app.aborted.load(Ordering::Acquire) { + return Err("cancelled".to_string()); + } + + let latency = { + let acc = beep_state.latency_sum_ms.lock().unwrap(); + (acc.1 > 0).then(|| acc.0 / acc.1 as f64) + }; + let beep_outputs = beep_state.beep_outputs.lock().unwrap().clone(); + + Ok(PatternReport { + flash_presents: app.flash_presents, + beep_outputs, + audio_latency_ms: latency, + }) +} diff --git a/apps/cli/src/selftest/playback.rs b/apps/cli/src/selftest/playback.rs new file mode 100644 index 0000000000..44ec082931 --- /dev/null +++ b/apps/cli/src/selftest/playback.rs @@ -0,0 +1,846 @@ +//! `cap selftest playback` — verifies A/V sync of the editor's playback +//! path: what the renderer presents vs what the audio output plays. +//! +//! The harness opens a flash+beep recording with the real editor machinery +//! (`EditorInstance`: real decoders, real frame scheduling, real audio +//! pipeline) and taps both presentation boundaries — the renderer's frame +//! callback and a headless audio sink that pulls blocks on a device-like +//! real-time schedule. Flash/beep onsets measured in those taps are compared +//! against the same onsets measured in the recording's raw tracks; playback +//! must reproduce the recording's sync within one frame and without drift. +//! +//! Without `--project` the fixture is generated through the real recording +//! pipeline (the same channel-source path the sync matrix uses), so the test +//! runs headless on CI where no capture hardware exists. + +use std::{ + path::{Path, PathBuf}, + sync::{Arc, Mutex}, + time::{Duration, Instant}, +}; + +use cap_editor::{EditorFrameOutput, EditorInstance, HEADLESS_CHANNELS, HEADLESS_SAMPLE_RATE}; +use cap_project::XY; +use clap::Args; +use serde::Serialize; + +use super::measure::{self, SyncMeasurement}; + +/// Flash/beep schedule of the generated fixture. Mirrors the av-sync pattern: +/// events every two seconds after a settle period, 120 ms flash+beep each. +const FIXTURE_SETTLE_SECS: f64 = 2.0; +const FIXTURE_PERIOD_SECS: f64 = 2.0; +/// Longer than the live pattern's 120 ms so CI runners with slow virtualized +/// GPUs still present at least one frame inside every flash window; onset +/// detection is edge-triggered, so the extra length does not blur the onset. +const FIXTURE_FLASH_SECS: f64 = 0.36; +const FIXTURE_TAIL_SECS: f64 = 1.0; +const FIXTURE_FPS: u32 = 30; +const FIXTURE_WIDTH: u32 = 320; +const FIXTURE_HEIGHT: u32 = 240; +/// A video emission gap after the second event: the screen is static, no +/// frames are captured, and playback/export must hold the last frame (the +/// VFR hold path) without disturbing audio sync. Longer than the decoders' +/// FRAME_CACHE_SIZE (90 frames = 3s at 30fps) so the pre-gap hold frame is +/// guaranteed to face cache eviction while requests march through the hole — +/// the regression class where post-gap content got served mid-hold. Events +/// whose flashes fall inside the gap are still beeped; their unpaired beeps +/// are rejected by the measurement's pairing window. +const FIXTURE_GAP_START_SECS: f64 = FIXTURE_SETTLE_SECS + FIXTURE_PERIOD_SECS + 0.4; +const FIXTURE_GAP_LEN_SECS: f64 = 4.2; +/// A second, narrow gap (~30 frames) between the fifth and sixth events. +/// Narrower than the decoders' cache read-ahead window, so it exercises the +/// in-loop narrow-hole answer paths that the long gap's cache-bounds exit +/// never reaches. +const FIXTURE_GAP2_START_SECS: f64 = FIXTURE_SETTLE_SECS + 4.0 * FIXTURE_PERIOD_SECS + 0.4; +const FIXTURE_GAP2_LEN_SECS: f64 = 1.0; + +/// The playback-vs-raw delta window is asymmetric because every presentation +/// boundary in the harness shifts it the same way: video content appears at +/// the first playback frame tick at-or-after its pts (0..1 frame late), the +/// renderer adds its render latency, and the zero-latency headless sink +/// consumes audio up to one block before the video clock starts. Audio can +/// therefore legitimately read EARLY by up to a frame plus a block plus a +/// render margin, but reading LATE (or early beyond that window) means the +/// editor's playback mapping itself is off. +const RENDER_MARGIN_MS: f64 = 35.0; +const DELTA_LATE_TOLERANCE_MS: f64 = 15.0; +/// Gated on the DIFFERENCE from the raw recording's drift: the fixture's own +/// emission jitter shows up identically in both legs and must not count +/// against playback. +const PASS_TOTAL_DRIFT_MS: f64 = 40.0; +const PASS_MAD_MS: f64 = 25.0; +/// The export decodes the same tracks offline, so its sync must match the +/// raw recording almost exactly (same budget as the av-sync selftest). +const EXPORT_DELTA_TOLERANCE_MS: f64 = 25.0; +/// Ceiling for waiting on playback to finish beyond the timeline duration. +const PLAYBACK_EXTRA_TIMEOUT: Duration = Duration::from_secs(30); + +#[derive(Args)] +pub struct PlaybackArgs { + /// Existing flash+beep .cap project to measure (defaults to generating a + /// synthetic fixture through the real recording pipeline) + #[arg(long)] + project: Option, + /// Seconds of synthetic fixture pattern to generate + #[arg(long, default_value_t = 20)] + duration: u64, + /// Frame rate to drive editor playback at + #[arg(long, default_value_t = 30)] + fps: u32, + /// Skip exporting the project (tests only the playback stage) + #[arg(long)] + skip_export: bool, + /// Keep the generated fixture project on disk for inspection + #[arg(long)] + keep: bool, +} + +#[derive(Serialize, Clone, Copy, PartialEq, Eq, Debug)] +#[serde(rename_all = "lowercase")] +enum Verdict { + Pass, + Fail, + Inconclusive, +} + +#[derive(Serialize)] +struct PlaybackReport { + verdict: Verdict, + summary: String, + /// Sync measured in the recording's raw tracks (ground truth). + raw: Option, + /// Sync measured at the editor playback presentation boundaries. + playback: Option, + /// Sync measured in an export of the same project. + export: Option, + /// playback median offset − raw median offset. Negative = audio early. + delta_ms: Option, + delta_early_tolerance_ms: f64, + delta_late_tolerance_ms: f64, + pass_total_drift_ms: f64, + frames_presented: usize, + project_path: Option, +} + +/// How early audio may legitimately read at the presentation taps: one video +/// frame (content quantization) + one audio block (sink start quantization) +/// + the render margin. +fn delta_early_tolerance_ms(fps: u32) -> f64 { + 1000.0 / f64::from(fps) + + 1000.0 * cap_editor::HEADLESS_BLOCK_FRAMES as f64 / f64::from(HEADLESS_SAMPLE_RATE) + + RENDER_MARGIN_MS +} + +pub async fn run_playback(args: PlaybackArgs, json: bool) -> Result<(), String> { + ffmpeg::util::log::set_level(ffmpeg::util::log::Level::Quiet); + + if !(1..=240).contains(&args.fps) { + return Err(format!("invalid playback fps: {}", args.fps)); + } + + let progress = |msg: &str| { + if !json { + eprintln!("{msg}"); + } + }; + + let (project_path, generated) = match &args.project { + Some(path) => (path.clone(), false), + None => { + let path = std::env::temp_dir().join(format!( + "cap-selftest-playback-{}.cap", + uuid::Uuid::new_v4() + )); + // The floor guarantees enough events for measure_sync's minimum + // after the first event is dropped AND the two events whose + // flashes fall inside the video gap: 18s -> 9 events -> 7 visible + // -> 6 pairs. + let pattern_secs = args.duration.clamp(18, 120) as f64; + progress(&format!( + "[1/3] Generating synthetic flash+beep recording ({pattern_secs:.0}s, real-time)..." + )); + fixture::generate(&path, pattern_secs).await?; + (path, true) + } + }; + + progress("[2/3] Measuring the raw recording..."); + let raw = super::analyze_raw(&project_path); + + progress("[3/3] Playing back through the editor and measuring what it presents..."); + let playback = measure_playback(&project_path, args.fps).await; + + // The export drives the same decoders and timeline mapping as playback + // through the offline path; on CI this is the only place the export-side + // VFR gap handling is exercised at all. + let export = if args.skip_export { + Ok(None) + } else { + progress("Exporting and verifying the export..."); + match crate::export::export_project_default(project_path.clone()).await { + Ok(output) => super::analyze_export(&output).map(Some), + Err(e) => Err(format!("export failed: {e}")), + } + }; + + let (verdict, summary, raw_m, playback_m, export_m, delta_ms, frames_presented) = + evaluate(&args, raw, playback, export); + + let keep = args.keep || (generated && verdict != Verdict::Pass); + if generated { + if keep { + progress(&format!( + "Fixture project kept at {}", + project_path.display() + )); + } else { + let _ = std::fs::remove_dir_all(&project_path); + } + } + + let report = PlaybackReport { + verdict, + summary: summary.clone(), + raw: raw_m, + playback: playback_m, + export: export_m, + delta_ms, + delta_early_tolerance_ms: delta_early_tolerance_ms(args.fps), + delta_late_tolerance_ms: DELTA_LATE_TOLERANCE_MS, + pass_total_drift_ms: PASS_TOTAL_DRIFT_MS, + frames_presented, + project_path: (keep || !generated).then(|| project_path.display().to_string()), + }; + + if json { + println!( + "{}", + serde_json::to_string_pretty(&report) + .map_err(|e| format!("failed to serialize report: {e}"))? + ); + } else { + if let Some(m) = &report.raw { + println!( + "\nRecording: offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events", + m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events + ); + } + if let Some(m) = &report.playback { + println!( + "Playback: offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events", + m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events + ); + } + if let Some(m) = &report.export { + println!( + "Export: offset {:+.0} ms (median), drift {:+.0} ms over {:.0}s, {} events", + m.median_offset_ms, m.total_drift_ms, m.span_secs, m.inlier_events + ); + } + let label = match verdict { + Verdict::Pass => "PASS", + Verdict::Fail => "FAIL", + Verdict::Inconclusive => "INCONCLUSIVE", + }; + println!("\nResult: {label} — {summary}"); + } + + match verdict { + Verdict::Pass => Ok(()), + Verdict::Fail => Err(format!("editor playback sync check failed: {summary}")), + Verdict::Inconclusive => Err(format!( + "editor playback sync check inconclusive: {summary}" + )), + } +} + +#[allow(clippy::type_complexity)] +fn evaluate( + args: &PlaybackArgs, + raw: Result<(SyncMeasurement, f64), String>, + playback: Result<(SyncMeasurement, usize), String>, + export: Result, String>, +) -> ( + Verdict, + String, + Option, + Option, + Option, + Option, + usize, +) { + let (raw_m, _snr) = match raw { + Ok(v) => v, + Err(reason) => { + return ( + Verdict::Inconclusive, + format!("could not measure the raw recording: {reason}"), + None, + None, + None, + None, + 0, + ); + } + }; + + let (playback_m, frames_presented) = match playback { + Ok(v) => v, + Err(reason) => { + return ( + Verdict::Fail, + format!("editor playback could not be measured: {reason}"), + Some(raw_m), + None, + None, + None, + 0, + ); + } + }; + + let export_m = match export { + Ok(v) => v.map(|(m, _)| m), + Err(reason) => { + // A project that plays back but cannot be exported is a hard + // failure: the export path is part of the product. + return ( + Verdict::Fail, + reason, + Some(raw_m), + Some(playback_m), + None, + None, + frames_presented, + ); + } + }; + + let delta = playback_m.median_offset_ms - raw_m.median_offset_ms; + let early_tolerance = delta_early_tolerance_ms(args.fps); + + let mut reasons = Vec::new(); + if delta < -early_tolerance || delta > DELTA_LATE_TOLERANCE_MS { + reasons.push(format!( + "playback shifts sync by {delta:+.0} ms vs the recording \ + (allowed -{early_tolerance:.0}..+{DELTA_LATE_TOLERANCE_MS:.0} ms)" + )); + } + let drift_delta = playback_m.total_drift_ms - raw_m.total_drift_ms; + if drift_delta.abs() > PASS_TOTAL_DRIFT_MS { + reasons.push(format!( + "playback adds {drift_delta:+.0} ms of drift over {:.0}s vs the recording", + playback_m.span_secs + )); + } + if playback_m.mad_ms > PASS_MAD_MS { + reasons.push(format!( + "playback offsets are unstable (spread ±{:.0} ms)", + playback_m.mad_ms + )); + } + if let Some(export_m) = &export_m { + let export_delta = (export_m.median_offset_ms - raw_m.median_offset_ms).abs(); + if export_delta > EXPORT_DELTA_TOLERANCE_MS { + reasons.push(format!( + "export changes sync by {export_delta:.0} ms vs the recording" + )); + } + let export_drift_delta = export_m.total_drift_ms - raw_m.total_drift_ms; + if export_drift_delta.abs() > PASS_TOTAL_DRIFT_MS { + reasons.push(format!( + "export adds {export_drift_delta:+.0} ms of drift over {:.0}s vs the recording", + export_m.span_secs + )); + } + } + + let verdict = if reasons.is_empty() { + Verdict::Pass + } else { + Verdict::Fail + }; + let summary = if reasons.is_empty() { + format!( + "editor playback preserves sync (playback {:+.0} ms vs recording {:+.0} ms, drift {:+.0} ms)", + playback_m.median_offset_ms, raw_m.median_offset_ms, playback_m.total_drift_ms + ) + } else { + reasons.join("; ") + }; + + ( + verdict, + summary, + Some(raw_m), + Some(playback_m), + export_m, + Some(delta), + frames_presented, + ) +} + +/// Drives the editor's real playback over the project and measures +/// flash-vs-beep alignment in what it presents. Returns the measurement and +/// the number of frames the renderer actually presented. +async fn measure_playback( + project_path: &Path, + fps: u32, +) -> Result<(SyncMeasurement, usize), String> { + // Wall-clock epoch shared by both presentation taps. + let epoch = Instant::now(); + + let video_events: Arc>> = Arc::new(Mutex::new(Vec::new())); + let frame_cb: Box = Box::new({ + let video_events = video_events.clone(); + move |output| { + let now = Instant::now(); + if let EditorFrameOutput::Rgba(frame) = output { + if let Some(luma) = mean_center_luma_rgba( + &frame.data, + frame.width, + frame.height, + frame.padded_bytes_per_row, + ) && let Ok(mut events) = video_events.lock() + { + events.push((now.duration_since(epoch).as_secs_f64(), luma)); + } + } + } + }); + + struct AudioTapState { + base_secs: Option, + mono: Vec, + } + let audio_tap_state = Arc::new(Mutex::new(AudioTapState { + base_secs: None, + mono: Vec::new(), + })); + let audio_tap: cap_editor::HeadlessAudioTap = Box::new({ + let state = audio_tap_state.clone(); + move |block: &[f32], deadline: Instant| { + let Ok(mut state) = state.lock() else { + return; + }; + if state.base_secs.is_none() { + // The pump's schedule is absolute, so the first block deadline + // anchors an exact sample-index -> wall-time mapping. + state.base_secs = Some( + deadline + .checked_duration_since(epoch) + .map(|d| d.as_secs_f64()) + .unwrap_or_else(|| -epoch.duration_since(deadline).as_secs_f64()), + ); + } + for frame in block.chunks_exact(usize::from(HEADLESS_CHANNELS)) { + state + .mono + .push(frame.iter().sum::() / frame.len() as f32); + } + } + }); + + let audio_output = Arc::new(cap_editor::AudioOutput::new_headless(audio_tap)); + + let instance = EditorInstance::new_with_audio_output( + project_path.to_path_buf(), + |_| {}, + frame_cb, + None, + audio_output, + ) + .await + .map_err(|e| format!("failed to open the project in the editor: {e}"))?; + + let resolution_base = { + let display = &instance.recordings.segments[0].display; + XY::new(display.width, display.height) + }; + + let total_frames = instance.get_total_frames(fps); + let expected_duration = Duration::from_secs_f64(f64::from(total_frames) / f64::from(fps)); + + instance.start_playback(fps, resolution_base).await; + + let mut handle = instance + .state + .lock() + .await + .playback_task + .clone() + .ok_or("editor playback did not start")?; + + let wait = tokio::time::timeout(expected_duration + PLAYBACK_EXTRA_TIMEOUT, async { + loop { + let event = *handle.receive_event().await; + if matches!(event, cap_editor::PlaybackEvent::Stop) { + break; + } + } + }) + .await; + + instance.dispose().await; + + if wait.is_err() { + return Err(format!( + "playback did not finish within {:?}", + expected_duration + PLAYBACK_EXTRA_TIMEOUT + )); + } + + let video_samples = video_events + .lock() + .map_err(|_| "video tap poisoned".to_string())? + .clone(); + let frames_presented = video_samples.len(); + let (audio_base_secs, mono) = { + let mut state = audio_tap_state + .lock() + .map_err(|_| "audio tap poisoned".to_string())?; + ( + state.base_secs.unwrap_or(0.0), + std::mem::take(&mut state.mono), + ) + }; + + let flashes = measure::flash_onsets_from_luma(&video_samples) + .map_err(|e| format!("playback video ({frames_presented} frames presented): {e}"))?; + let audio = measure::beep_onsets_from_mono(mono, HEADLESS_SAMPLE_RATE) + .map_err(|e| format!("playback audio: {e}"))?; + let beeps: Vec = audio.onsets.iter().map(|t| t + audio_base_secs).collect(); + + measure::measure_sync(&flashes, &beeps, super::MIN_EVENTS) + .map(|m| (m, frames_presented)) + .map_err(|e| format!("playback pairing ({frames_presented} frames presented): {e}")) +} + +/// Mean luma over the center crop of an RGBA/BGRA presentation frame. +/// Channel order doesn't matter for the black/white test pattern. +fn mean_center_luma_rgba( + data: &[u8], + width: u32, + height: u32, + padded_bytes_per_row: u32, +) -> Option { + let width = width as usize; + let height = height as usize; + let stride = padded_bytes_per_row as usize; + if width == 0 || height == 0 || stride < width * 4 || data.len() < stride * height { + return None; + } + + let x0 = width / 4; + let x1 = width * 3 / 4; + let y0 = height / 4; + let y1 = height * 3 / 4; + + let mut sum = 0u64; + let mut count = 0u64; + let mut y = y0; + while y < y1 { + let row = &data[y * stride..y * stride + width * 4]; + let mut x = x0; + while x < x1 { + let px = &row[x * 4..x * 4 + 3]; + sum += u64::from(px[0]) + u64::from(px[1]) + u64::from(px[2]); + count += 3; + x += 4; + } + y += 4; + } + (count > 0).then(|| sum as f64 / count as f64) +} + +/// Generates a real `.cap` studio project containing a flash+beep pattern by +/// driving the production recording pipeline with synthetic sources — the +/// same real-time channel-source path the sync matrix uses. Only the media +/// origin is synthetic; encoding, muxing and metadata are the real product +/// code paths. +mod fixture { + use std::{path::Path, time::Duration}; + + use cap_media_info::{AudioInfo, RawVideoFormat, Sample, Type, VideoInfo}; + use cap_project::{ + AudioMeta, ClipConfiguration, MultipleSegment, MultipleSegments, Platform, + ProjectConfiguration, RecordingMeta, RecordingMetaInner, StudioRecordingMeta, + StudioRecordingStatus, TimelineConfiguration, TimelineSegment, VideoMeta, + }; + use cap_recording::{ + AudioFrame, ChannelAudioSource, ChannelAudioSourceConfig, ChannelVideoSource, + ChannelVideoSourceConfig, OutputPipeline, + ffmpeg::{FFmpegVideoFrame, Mp4Muxer, OggMuxer}, + }; + use cap_timestamp::{Timestamp, Timestamps}; + use relative_path::RelativePathBuf; + + use super::{ + FIXTURE_FLASH_SECS, FIXTURE_FPS, FIXTURE_GAP_LEN_SECS, FIXTURE_GAP_START_SECS, + FIXTURE_GAP2_LEN_SECS, FIXTURE_GAP2_START_SECS, FIXTURE_HEIGHT, FIXTURE_PERIOD_SECS, + FIXTURE_SETTLE_SECS, FIXTURE_TAIL_SECS, FIXTURE_WIDTH, + }; + + const AUDIO_RATE: u32 = 48_000; + const AUDIO_CHUNK_SECS: f64 = 0.02; + const BEEP_FREQ: f32 = 1_000.0; + const BEEP_AMPLITUDE: f32 = 0.5; + + struct Pattern { + events: Vec, + total_secs: f64, + } + + fn pattern(pattern_secs: f64) -> Pattern { + let events = ((pattern_secs / FIXTURE_PERIOD_SECS) as u32).max(4); + // Same deterministic anti-aliasing jitter as the live pattern window + // (PatternSpec::event_offsets_secs): a perfectly periodic schedule + // would let a one-period A/V shift alias to a zero measured offset. + let events: Vec = (0..events) + .map(|k| { + let jitter = (u64::from(k).wrapping_mul(2_654_435_761) % 601) as f64 / 1000.0 - 0.3; + FIXTURE_SETTLE_SECS + (f64::from(k) * FIXTURE_PERIOD_SECS + jitter).max(0.0) + }) + .collect(); + let total_secs = + events.last().copied().unwrap_or(0.0) + FIXTURE_FLASH_SECS + FIXTURE_TAIL_SECS; + Pattern { events, total_secs } + } + + fn in_flash(events: &[f64], t: f64) -> bool { + events.iter().any(|&e| t >= e && t < e + FIXTURE_FLASH_SECS) + } + + fn in_video_gap(t: f64) -> bool { + (FIXTURE_GAP_START_SECS..FIXTURE_GAP_START_SECS + FIXTURE_GAP_LEN_SECS).contains(&t) + || (FIXTURE_GAP2_START_SECS..FIXTURE_GAP2_START_SECS + FIXTURE_GAP2_LEN_SECS) + .contains(&t) + } + + pub async fn generate(project_dir: &Path, pattern_secs: f64) -> Result<(), String> { + let pattern = pattern(pattern_secs); + + let segment_dir = project_dir.join("content/segments/segment-0"); + std::fs::create_dir_all(&segment_dir) + .map_err(|e| format!("failed to create fixture directories: {e}"))?; + let display_path = segment_dir.join("display.mp4"); + let audio_path = segment_dir.join("system_audio.ogg"); + + let timestamps = Timestamps::now(); + + // Video leg: black frames with white flashes; nothing is emitted + // inside the gap window, like a static screen under VFR capture. + let video_info = VideoInfo::from_raw( + RawVideoFormat::Bgra, + FIXTURE_WIDTH, + FIXTURE_HEIGHT, + FIXTURE_FPS, + ); + let (video_tx, video_rx) = flume::bounded::(32); + let video_emit = { + let events = pattern.events.clone(); + let total_secs = pattern.total_secs; + let base = timestamps.instant(); + tokio::spawn(async move { + let period = 1.0 / f64::from(FIXTURE_FPS); + let frame_count = (total_secs * f64::from(FIXTURE_FPS)) as u64; + for k in 0..frame_count { + let t = k as f64 * period; + if in_video_gap(t) { + continue; + } + tokio::time::sleep_until((base + Duration::from_secs_f64(t)).into()).await; + let mut frame = ffmpeg::frame::Video::new( + ffmpeg::format::Pixel::BGRA, + FIXTURE_WIDTH, + FIXTURE_HEIGHT, + ); + let shade = if in_flash(&events, t) { 0xFF } else { 0x00 }; + frame.data_mut(0).fill(shade); + let frame = FFmpegVideoFrame { + inner: frame, + timestamp: Timestamp::Instant(base + Duration::from_secs_f64(t)), + }; + if video_tx.send_async(frame).await.is_err() { + break; + } + } + }) + }; + + // Audio leg: silence with 1 kHz beep bursts aligned to the flashes. + let audio_info = AudioInfo::new(Sample::F32(Type::Packed), AUDIO_RATE, 2) + .map_err(|e| format!("audio info: {e:?}"))?; + let (audio_tx, audio_rx) = futures::channel::mpsc::channel::(32); + let audio_emit = { + let events = pattern.events.clone(); + let total_secs = pattern.total_secs; + let base = timestamps.instant(); + let mut tx = audio_tx; + let info = audio_info; + tokio::spawn(async move { + use futures::SinkExt; + let chunk_frames = (f64::from(AUDIO_RATE) * AUDIO_CHUNK_SECS) as usize; + let total_chunks = (total_secs / AUDIO_CHUNK_SECS).ceil() as usize; + for k in 0..total_chunks { + let chunk_t = k as f64 * AUDIO_CHUNK_SECS; + tokio::time::sleep_until((base + Duration::from_secs_f64(chunk_t)).into()) + .await; + let mut frame = ffmpeg::frame::Audio::new( + ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed), + chunk_frames, + info.channel_layout(), + ); + frame.set_rate(AUDIO_RATE); + let data = frame.data_mut(0); + let samples = unsafe { + std::slice::from_raw_parts_mut( + data.as_mut_ptr().cast::(), + data.len() / 4, + ) + }; + for (i, sample) in samples.iter_mut().enumerate() { + let n = (k * chunk_frames + i / 2) as f64; + let t = n / f64::from(AUDIO_RATE); + *sample = if in_flash(&events, t) { + (t as f32 * BEEP_FREQ * 2.0 * std::f32::consts::PI).sin() + * BEEP_AMPLITUDE + } else { + 0.0 + }; + } + let frame = AudioFrame::new( + frame, + Timestamp::Instant(base + Duration::from_secs_f64(chunk_t)), + ); + if tx.send(frame).await.is_err() { + break; + } + } + }) + }; + + let video_pipeline = OutputPipeline::builder(display_path.clone()) + .with_video::>(ChannelVideoSourceConfig::new( + video_info, video_rx, + )) + .with_timestamps(timestamps) + .build::(()) + .await + .map_err(|e| format!("video pipeline: {e}"))?; + let audio_pipeline = OutputPipeline::builder(audio_path.clone()) + .with_audio_source::(ChannelAudioSourceConfig::new( + audio_info, audio_rx, + )) + .with_timestamps(timestamps) + .build::(()) + .await + .map_err(|e| format!("audio pipeline: {e}"))?; + + video_emit + .await + .map_err(|e| format!("video emit join: {e}"))?; + audio_emit + .await + .map_err(|e| format!("audio emit join: {e}"))?; + // Let the stream tails flush through the encoders. + tokio::time::sleep(Duration::from_millis(500)).await; + + let finished_video = video_pipeline + .stop() + .await + .map_err(|e| format!("video pipeline stop: {e}"))?; + let finished_audio = audio_pipeline + .stop() + .await + .map_err(|e| format!("audio pipeline stop: {e}"))?; + + // Persist metadata the way the studio recorder does: start times are + // each track's first timestamp on the shared clock, and the timeline + // covers the real muxed video span. + let display_start = finished_video + .first_timestamp + .signed_duration_since_secs(timestamps); + let audio_start = finished_audio + .first_timestamp + .signed_duration_since_secs(timestamps); + let display_duration = finished_video + .video_timestamp_span + .map(|(first, last)| (last - first).as_secs_f64() + 1.0 / f64::from(FIXTURE_FPS)) + .ok_or("fixture video reported no timestamp span")?; + + let meta = StudioRecordingMeta::MultipleSegments { + inner: MultipleSegments { + segments: vec![MultipleSegment { + display: VideoMeta { + path: RelativePathBuf::from("content/segments/segment-0/display.mp4"), + fps: FIXTURE_FPS, + start_time: Some(display_start), + device_id: None, + }, + camera: None, + mic: None, + system_audio: Some(AudioMeta { + path: RelativePathBuf::from("content/segments/segment-0/system_audio.ogg"), + start_time: Some(audio_start), + device_id: None, + gap_summary: finished_audio.audio_gap_summary.map(|s| { + cap_project::AudioGapSummary { + total_overlap_trimmed_ms: s.total_overlap_trimmed_ms, + startup_overlap_trimmed_ms: s.startup_overlap_trimmed_ms, + overlap_dropped_frames: s.overlap_dropped_frames, + startup_overlap_drops: s.startup_overlap_drops, + } + }), + }), + cursor: None, + keyboard: None, + }], + cursors: Default::default(), + status: Some(StudioRecordingStatus::Complete), + }, + }; + + let recording_meta = RecordingMeta { + platform: Some(Platform::default()), + project_path: project_dir.to_path_buf(), + pretty_name: "Cap Playback Selftest Fixture".to_string(), + sharing: None, + inner: RecordingMetaInner::Studio(Box::new(meta)), + upload: None, + }; + recording_meta + .save_for_project() + .map_err(|e| format!("failed to write recording meta: {e:?}"))?; + + let project_config = ProjectConfiguration { + timeline: Some(TimelineConfiguration { + segments: vec![TimelineSegment { + recording_clip: 0, + start: 0.0, + end: display_duration, + timescale: 1.0, + name: None, + }], + zoom_segments: Vec::new(), + scene_segments: Vec::new(), + mask_segments: Vec::new(), + text_segments: Vec::new(), + caption_segments: Vec::new(), + keyboard_segments: Vec::new(), + audio_segments: Vec::new(), + }), + clips: vec![ClipConfiguration { + index: 0, + offsets: Default::default(), + }], + ..Default::default() + }; + project_config + .write(project_dir) + .map_err(|e| format!("failed to write project config: {e}"))?; + + Ok(()) + } +} diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index 61b3006196..2e30e93847 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -115,9 +115,11 @@ use tracing::*; use upload::{create_or_get_video, upload_screenshot_bytes, upload_screenshot_file, upload_video}; use web_api::AuthedApiError; use web_api::ManagerExt as WebManagerExt; +#[cfg(target_os = "macos")] +use windows::hide_overlay; use windows::{ CapWindowId, EditorRecordingTarget, EditorWindowIds, ScreenshotEditorWindowIds, ShowCapWindow, - hide_overlay, set_window_transparent, show_overlay, + set_window_transparent, show_overlay, }; use crate::{recording::start_recording, upload::build_video_meta}; diff --git a/apps/desktop/src-tauri/src/logging.rs b/apps/desktop/src-tauri/src/logging.rs index 64c690c3ef..f70d042c83 100644 --- a/apps/desktop/src-tauri/src/logging.rs +++ b/apps/desktop/src-tauri/src/logging.rs @@ -211,7 +211,7 @@ pub async fn upload_log_file(app: &AppHandle) -> Result<(), String> { .path() .app_data_dir() .map_err(|e| format!("Failed to get app data dir: {e}"))?; - let recordings_dir = GeneralSettingsStore::recordings_dir(&app); + let recordings_dir = GeneralSettingsStore::recordings_dir(app); let is_recording = { let app_lock = app.state::>(); diff --git a/crates/editor/src/audio_output.rs b/crates/editor/src/audio_output.rs index 080ffdc030..655003ecb6 100644 --- a/crates/editor/src/audio_output.rs +++ b/crates/editor/src/audio_output.rs @@ -13,10 +13,10 @@ use std::{ atomic::{AtomicBool, AtomicU64, Ordering}, mpsc as std_mpsc, }, - time::Duration, + time::{Duration, Instant}, }; -use cap_audio::FromSampleBytes; +use cap_audio::{AudioData, FromSampleBytes}; #[cfg(not(target_os = "windows"))] use cap_audio::{LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint}; use cap_media_info::AudioInfo; @@ -77,6 +77,18 @@ impl Default for AudioOutput { } } +/// Sample rate of the headless sink; matches the pipeline's master clock. +pub const HEADLESS_SAMPLE_RATE: u32 = 48_000; +/// Channel count of the headless sink. +pub const HEADLESS_CHANNELS: u16 = 2; +/// Frames per pulled block in the headless sink (a typical device period). +pub const HEADLESS_BLOCK_FRAMES: usize = 512; + +/// Receives every interleaved f32 block the headless sink pulls, together +/// with the deadline at which a real output device would start playing the +/// block's first sample. +pub type HeadlessAudioTap = Box; + impl AudioOutput { pub fn new() -> Self { let (control_tx, control_rx) = std_mpsc::channel(); @@ -96,6 +108,27 @@ impl AudioOutput { } } + /// An output that renders into `tap` instead of a device, pulling blocks + /// on a real-time schedule the way a sound card would. Runs the exact + /// production source pipeline (pre-render buffer, playhead sync policy), + /// so sync harnesses can observe what a device would have played without + /// needing audio hardware. + pub fn new_headless(tap: HeadlessAudioTap) -> Self { + let (control_tx, control_rx) = std_mpsc::channel(); + + if let Err(e) = std::thread::Builder::new() + .name("cap-audio-headless".into()) + .spawn(move || control_thread_headless(control_rx, tap)) + { + error!("Failed to spawn headless audio output thread: {e}"); + } + + Self { + control_tx, + next_generation: AtomicU64::new(0), + } + } + /// Opens the output stream ahead of the first play so even the first /// press doesn't wait on the device (Bluetooth wake, etc.). Non-blocking. pub fn prewarm(&self) { @@ -210,6 +243,239 @@ fn control_thread(control_rx: std_mpsc::Receiver) { info!("Audio output thread finished"); } +/// Applies pending install/remove commands to the active source. Shared by +/// the live cpal callback and the headless sink. +fn drain_source_commands( + active: &mut Option>, + source_rx: &std_mpsc::Receiver>, +) { + while let Ok(command) = source_rx.try_recv() { + match command { + SourceCommand::Install(source) => *active = Some(*source), + SourceCommand::Remove { generation } => { + let matches = generation.is_none() + || active + .as_ref() + .map(|s| Some(s.generation) == generation) + .unwrap_or(false); + if matches { + *active = None; + } + } + } + } +} + +/// Renders one output block from the active source: applies the video +/// playhead sync policy, fills the buffer and acknowledges the first +/// consumed block. Shared by the live cpal callback and the headless sink so +/// harnesses exercise the exact production logic. +fn render_source_block>( + source: &mut ActiveSource, + buffer: &mut [T], + latency_secs: f64, +) { + if source.playhead_rx.has_changed().unwrap_or(false) { + let video_playhead = *source.playhead_rx.borrow_and_update(); + let jump = (video_playhead - source.last_video_playhead).abs(); + let audible_playhead = source.buffer.current_audible_playhead(latency_secs); + let drift = (video_playhead - audible_playhead).abs(); + + if jump > 0.05 || drift > 0.04 { + source.buffer.set_playhead(video_playhead + latency_secs); + } + + source.last_video_playhead = video_playhead; + } + + source.buffer.fill(buffer); + + if let Some(ack) = source.ack.take() { + let _ = ack.send(()); + } +} + +/// Builds the per-playback source from a play spec and hands it to the +/// output via `install_tx`. `use_device_latency_hint` is false for the +/// headless sink, which models a zero-latency device. +fn install_source>( + spec: Box, + generation: u64, + ack: std_mpsc::Sender<()>, + output_info: AudioInfo, + use_device_latency_hint: bool, + install_tx: &std_mpsc::Sender>, +) -> Result<(), String> { + let PlaySpec { + segments, + music, + project, + duration_secs, + start_playhead_secs, + playhead_rx, + } = *spec; + + if !(duration_secs.is_finite() && duration_secs > 0.0) { + return Err(format!( + "Invalid audio pre-render duration: {duration_secs}" + )); + } + + #[cfg(not(target_os = "windows"))] + let latency_corrector = { + let hint = if use_device_latency_hint { + default_output_latency_hint(output_info.sample_rate, output_info.buffer_size) + } else { + None + }; + if let Some(hint) = hint + && hint.latency_secs > 0.0 + { + if hint.transport.is_wireless() { + info!( + "Applying wireless audio output latency hint: {:.1} ms", + hint.latency_secs * 1_000.0 + ); + } else { + info!( + "Applying audio output latency hint: {:.1} ms", + hint.latency_secs * 1_000.0 + ); + } + } + LatencyCorrector::new(hint, LatencyCorrectionConfig::default()) + }; + #[cfg(not(target_os = "windows"))] + let initial_latency_secs = latency_corrector.initial_output_latency_secs(); + #[cfg(target_os = "windows")] + let initial_latency_secs = { + let _ = use_device_latency_hint; + 0.0 + }; + + let start_playhead = start_playhead_secs + initial_latency_secs; + let mut buffer = PrerenderedAudioBuffer::::new( + segments, + music, + &project, + output_info, + duration_secs, + start_playhead, + ); + buffer.set_playhead(start_playhead); + // A few ms: guarantees the callback reads real samples at the + // playhead, never leading silence. + buffer.wait_until_ready(PRERENDER_READY_TIMEOUT); + + install_tx + .send(SourceCommand::Install(Box::new(ActiveSource { + generation, + buffer, + playhead_rx, + last_video_playhead: start_playhead_secs, + ack: Some(ack), + #[cfg(not(target_os = "windows"))] + latency_corrector, + }))) + .map_err(|_| "Audio callback channel closed".to_string()) +} + +/// Control loop for the headless sink: a pump thread pulls blocks on a +/// real-time schedule (as a device would) and hands every block to `tap`. +fn control_thread_headless(control_rx: std_mpsc::Receiver, mut tap: HeadlessAudioTap) { + let output_info = AudioInfo::new_raw( + AudioData::SAMPLE_FORMAT, + HEADLESS_SAMPLE_RATE, + HEADLESS_CHANNELS, + ); + + let (source_tx, source_rx) = std_mpsc::channel::>(); + let stop = Arc::new(AtomicBool::new(false)); + + let pump = { + let stop = stop.clone(); + let channels = usize::from(HEADLESS_CHANNELS); + std::thread::Builder::new() + .name("cap-audio-headless-pump".into()) + .spawn(move || { + let mut buffer = vec![0.0f32; HEADLESS_BLOCK_FRAMES * channels]; + let mut active: Option> = None; + let block = Duration::from_secs_f64( + HEADLESS_BLOCK_FRAMES as f64 / f64::from(HEADLESS_SAMPLE_RATE), + ); + let start = Instant::now(); + let mut n: u32 = 0; + + while !stop.load(Ordering::Acquire) { + // Absolute schedule: a device consumes samples isochronously, + // so late wakeups must not stretch the sample clock. + let deadline = start + block * n; + let now = Instant::now(); + if deadline > now { + std::thread::sleep(deadline - now); + } + + drain_source_commands(&mut active, &source_rx); + match active.as_mut() { + Some(source) => render_source_block(source, &mut buffer, 0.0), + None => buffer.fill(0.0), + } + tap(&buffer, deadline); + n = n.saturating_add(1); + } + }) + }; + let pump = match pump { + Ok(handle) => Some(handle), + Err(e) => { + error!("Failed to spawn headless audio pump: {e}"); + None + } + }; + + while let Ok(msg) = control_rx.recv() { + match msg { + ControlMsg::EnsureStream => {} + ControlMsg::Play { + spec, + generation, + result_tx, + } => { + let (ack_tx, ack_rx) = std_mpsc::channel(); + let ok = pump.is_some() + && match install_source::( + spec, + generation, + ack_tx, + output_info, + false, + &source_tx, + ) { + Ok(()) => ack_rx.recv_timeout(SOURCE_ACK_TIMEOUT).is_ok(), + Err(e) => { + error!("Failed to install headless audio source: {e}"); + false + } + }; + let _ = result_tx.send(ok); + } + ControlMsg::StopPlayback { generation } => { + let _ = source_tx.send(SourceCommand::Remove { + generation: Some(generation), + }); + } + ControlMsg::Shutdown => break, + } + } + + stop.store(true, Ordering::Release); + if let Some(pump) = pump { + let _ = pump.join(); + } + + info!("Headless audio output thread finished"); +} + fn handle_play(state: &mut Option, spec: Box, generation: u64) -> bool { if !ensure_stream(state) { return false; @@ -318,21 +584,7 @@ where .build_output_stream( &config, move |buffer: &mut [T], info| { - while let Ok(command) = source_rx.try_recv() { - match command { - SourceCommand::Install(source) => active = Some(*source), - SourceCommand::Remove { generation } => { - let matches = generation.is_none() - || active - .as_ref() - .map(|s| Some(s.generation) == generation) - .unwrap_or(false); - if matches { - active = None; - } - } - } - } + drain_source_commands(&mut active, &source_rx); let Some(source) = active.as_mut() else { buffer.fill(T::EQUILIBRIUM); @@ -347,24 +599,7 @@ where 0.0 }; - if source.playhead_rx.has_changed().unwrap_or(false) { - let video_playhead = *source.playhead_rx.borrow_and_update(); - let jump = (video_playhead - source.last_video_playhead).abs(); - let audible_playhead = source.buffer.current_audible_playhead(latency_secs); - let drift = (video_playhead - audible_playhead).abs(); - - if jump > 0.05 || drift > 0.04 { - source.buffer.set_playhead(video_playhead + latency_secs); - } - - source.last_video_playhead = video_playhead; - } - - source.buffer.fill(buffer); - - if let Some(ack) = source.ack.take() { - let _ = ack.send(()); - } + render_source_block(source, buffer, latency_secs); }, { let failed = failed.clone(); @@ -384,72 +619,7 @@ where let install_tx = source_tx.clone(); let install = Box::new( move |spec: Box, generation: u64, ack: std_mpsc::Sender<()>| { - let PlaySpec { - segments, - music, - project, - duration_secs, - start_playhead_secs, - playhead_rx, - } = *spec; - - if !(duration_secs.is_finite() && duration_secs > 0.0) { - return Err(format!( - "Invalid audio pre-render duration: {duration_secs}" - )); - } - - #[cfg(not(target_os = "windows"))] - let latency_corrector = { - let hint = - default_output_latency_hint(output_info.sample_rate, output_info.buffer_size); - if let Some(hint) = hint - && hint.latency_secs > 0.0 - { - if hint.transport.is_wireless() { - info!( - "Applying wireless audio output latency hint: {:.1} ms", - hint.latency_secs * 1_000.0 - ); - } else { - info!( - "Applying audio output latency hint: {:.1} ms", - hint.latency_secs * 1_000.0 - ); - } - } - LatencyCorrector::new(hint, LatencyCorrectionConfig::default()) - }; - #[cfg(not(target_os = "windows"))] - let initial_latency_secs = latency_corrector.initial_output_latency_secs(); - #[cfg(target_os = "windows")] - let initial_latency_secs = 0.0; - - let start_playhead = start_playhead_secs + initial_latency_secs; - let mut buffer = PrerenderedAudioBuffer::::new( - segments, - music, - &project, - output_info, - duration_secs, - start_playhead, - ); - buffer.set_playhead(start_playhead); - // A few ms: guarantees the callback reads real samples at the - // playhead, never leading silence. - buffer.wait_until_ready(PRERENDER_READY_TIMEOUT); - - install_tx - .send(SourceCommand::Install(Box::new(ActiveSource { - generation, - buffer, - playhead_rx, - last_video_playhead: start_playhead_secs, - ack: Some(ack), - #[cfg(not(target_os = "windows"))] - latency_corrector, - }))) - .map_err(|_| "Audio callback channel closed".to_string()) + install_source::(spec, generation, ack, output_info, true, &install_tx) }, ); diff --git a/crates/editor/src/editor_instance.rs b/crates/editor/src/editor_instance.rs index 5ef4e0a837..c093037846 100644 --- a/crates/editor/src/editor_instance.rs +++ b/crates/editor/src/editor_instance.rs @@ -115,6 +115,26 @@ impl EditorInstance { on_state_change: impl Fn(&EditorState) + Send + Sync + 'static, frame_cb: Box, shared_device: Option, + ) -> Result, String> { + Self::new_with_audio_output( + project_path, + on_state_change, + frame_cb, + shared_device, + Arc::new(crate::AudioOutput::new()), + ) + .await + } + + /// Like [`EditorInstance::new`] but with a caller-provided audio output, + /// letting harnesses substitute a headless sink while everything else + /// (decoders, renderer, playback) runs the production path. + pub async fn new_with_audio_output( + project_path: PathBuf, + on_state_change: impl Fn(&EditorState) + Send + Sync + 'static, + frame_cb: Box, + shared_device: Option, + audio_output: Arc, ) -> Result, String> { if !project_path.exists() { return Err(format!("Video path {} not found!", project_path.display())); @@ -246,9 +266,13 @@ impl EditorInstance { // Segment setup (decoder init + kicking off audio decodes) is // independent of the GPU/render setup below, so run it concurrently on // its own task. - let force_ffmpeg_for_editor = cfg!(target_os = "windows"); + // The env override lets headless harnesses on runners whose + // VideoToolbox is too slow for real-time playback fall back to the + // FFmpeg decoder. + let force_ffmpeg_for_editor = cfg!(target_os = "windows") + || std::env::var_os("CAP_EDITOR_FORCE_FFMPEG_DECODER").is_some(); if force_ffmpeg_for_editor { - tracing::info!("Using FFmpeg decoder for Windows editor preview"); + tracing::info!("Using FFmpeg decoder for editor preview"); } let segments_task = tokio::spawn({ @@ -260,7 +284,6 @@ impl EditorInstance { // Open the session's audio output stream now (in the background) so // the first play press doesn't wait on the device — Bluetooth outputs // in particular can take seconds to wake. - let audio_output = Arc::new(crate::AudioOutput::new()); let has_declared_audio = match meta.as_ref() { StudioRecordingMeta::SingleSegment { segment } => segment.audio.is_some(), StudioRecordingMeta::MultipleSegments { inner } => inner diff --git a/crates/editor/src/lib.rs b/crates/editor/src/lib.rs index 1c4d2fd3b3..6b4dfd1bde 100644 --- a/crates/editor/src/lib.rs +++ b/crates/editor/src/lib.rs @@ -7,7 +7,9 @@ mod segments; mod telemetry; pub use audio::{AudioRenderer, MusicTracks}; -pub use audio_output::AudioOutput; +pub use audio_output::{ + AudioOutput, HEADLESS_BLOCK_FRAMES, HEADLESS_CHANNELS, HEADLESS_SAMPLE_RATE, HeadlessAudioTap, +}; pub use editor::{ EditorFrameOutput, Renderer, RendererHandle, finish_renderer_layers_creation, start_renderer_layers_creation, diff --git a/crates/enc-ffmpeg/src/audio/base.rs b/crates/enc-ffmpeg/src/audio/base.rs index 5f352188db..44ac33659e 100644 --- a/crates/enc-ffmpeg/src/audio/base.rs +++ b/crates/enc-ffmpeg/src/audio/base.rs @@ -24,8 +24,11 @@ impl AudioEncoderBase { timestamp: Duration, output: &mut format::context::Output, ) -> Result<(), ffmpeg::Error> { + // Input frames are stamped in input-rate units; BufferedResampler + // rescales them to the encoder's output rate. + let input_rate = f64::from(self.resampler.input().rate); self.inner - .update_pts(&mut frame, timestamp, &mut self.encoder); + .update_pts_with_rate(&mut frame, timestamp, input_rate); self.resampler.add_frame(frame); diff --git a/crates/enc-ffmpeg/src/audio/buffered_resampler.rs b/crates/enc-ffmpeg/src/audio/buffered_resampler.rs index d350920856..34924bb4d6 100644 --- a/crates/enc-ffmpeg/src/audio/buffered_resampler.rs +++ b/crates/enc-ffmpeg/src/audio/buffered_resampler.rs @@ -42,8 +42,10 @@ impl BufferedResampler { }; for buffer in self.buffer.iter().skip(1) { - // fill in gap - remaining_samples += (buffer.1 - pts) as usize; + // Fill in gaps between buffered frames. Non-integer rate ratios + // (44.1k -> 48k) can round consecutive pts to overlap by a sample, + // making the difference negative; that is an overlap, not a gap. + remaining_samples += (buffer.1 - pts).max(0) as usize; remaining_samples += buffer.0.samples(); pts += buffer.0.samples() as i64; } @@ -55,6 +57,10 @@ impl BufferedResampler { *self.resampler.output() } + pub fn input(&self) -> resampling::context::Definition { + *self.resampler.input() + } + pub fn add_frame(&mut self, mut frame: ffmpeg::frame::Audio) { if let Some(min_next_pts) = self.min_next_pts && let Some(pts) = frame.pts() @@ -351,6 +357,61 @@ mod test { let last = bufferer.buffer.back().unwrap(); assert_eq!(last.1 + last.0.samples() as i64, 600); } + + #[test] + fn overlapping_resampled_pts_treated_as_zero_gap() { + // Non-integer rate ratios (44.1k -> 48k) round consecutive + // resampled pts so a frame can start a sample before the previous + // frame ends. The overlap must count as zero gap — the unclamped + // subtraction used to wrap to a huge unsigned "gap" and blow up + // frame retrieval. + let mut bufferer = create_resampler(IN_RATE); + + bufferer.buffer.push_back((make_input_frame(100, 0), 0)); + bufferer.buffer.push_back((make_input_frame(100, 0), 99)); + + assert_eq!(bufferer.remaining_samples(), 200); + + let out_frame = bufferer.get_frame(200).expect("both frames are buffered"); + assert_eq!(out_frame.samples(), 200); + assert_eq!(out_frame.pts(), Some(0)); + } + + #[test] + fn non_integer_ratio_stream_preserves_duration() { + // Real-world 44.1k -> 48k with device-sized buffers: pts rounding + // must neither panic nor lose samples over a sustained stream. + let mut bufferer = BufferedResampler::new( + AudioInfo::new_raw(format::Sample::U8(cap_media_info::Type::Packed), 44_100, 1), + AudioInfo::new_raw(format::Sample::U8(cap_media_info::Type::Packed), 48_000, 1), + ) + .unwrap(); + + let mut total = 0usize; + for k in 0..64i64 { + let mut frame = ffmpeg::frame::Audio::new( + cap_media_info::Sample::U8(cap_media_info::Type::Packed), + 1024, + ChannelLayout::MONO, + ); + frame.data_mut(0).fill(69); + frame.set_rate(44_100); + frame.set_pts(Some(k * 1024)); + bufferer.add_frame(frame); + while let Some(out) = bufferer.get_frame(960) { + total += out.samples(); + } + } + while let Some(out) = bufferer.flush(960) { + total += out.samples(); + } + + let expected = (64.0 * 1024.0 * 48_000.0 / 44_100.0) as isize; + assert!( + ((total as isize) - expected).abs() < 2_000, + "drained {total} output samples, expected about {expected}" + ); + } } mod get_frame { diff --git a/crates/enc-ffmpeg/src/audio/opus.rs b/crates/enc-ffmpeg/src/audio/opus.rs index 1b9c144624..34e1902dae 100644 --- a/crates/enc-ffmpeg/src/audio/opus.rs +++ b/crates/enc-ffmpeg/src/audio/opus.rs @@ -67,6 +67,11 @@ impl OpusEncoder { let mut output_config = input_config; output_config.sample_format = Self::SAMPLE_FORMAT; output_config.sample_rate = rate as u32; + // libopus rejects surround layouts without an explicit mapping + // family; multichannel interfaces (5.1 mics) would fail to start a + // recording at all. Voice capture doesn't need surround: downmix to + // stereo via the resampler instead. + output_config.channels = output_config.channels.min(2); let resampler = BufferedResampler::new(input_config, output_config) .map_err(OpusEncoderError::Resampler)?; diff --git a/crates/enc-ffmpeg/src/base.rs b/crates/enc-ffmpeg/src/base.rs index 88ffbef5ba..60179c8fa1 100644 --- a/crates/enc-ffmpeg/src/base.rs +++ b/crates/enc-ffmpeg/src/base.rs @@ -66,6 +66,44 @@ impl EncoderBase { } } + /// Stamps the frame's pts from its capture timestamp using an explicit + /// tick rate. Audio input frames must be stamped in *input sample rate* + /// units — the resampler rescales them to the encoder's output rate — + /// whereas [`Self::update_pts`] uses the encoder's own (output) time + /// base. Mixing the two conventions plays non-48kHz microphones at the + /// wrong speed. + pub fn update_pts_with_rate( + &mut self, + frame: &mut frame::Frame, + timestamp: Duration, + rate: f64, + ) { + if timestamp != Duration::MAX { + let pts = (timestamp.as_secs_f64() * rate).round() as i64; + let first_pts = *self.first_pts.get_or_insert(pts); + let mut pts = pts - first_pts; + if let Some(last) = self.last_frame_pts + && pts <= last + { + pts = last + 1; + } + self.last_frame_pts = Some(pts); + frame.set_pts(Some(pts)); + } else if let Some(pts) = frame.pts() { + let first_pts = *self.first_pts.get_or_insert(pts); + let mut pts = pts - first_pts; + if let Some(last) = self.last_frame_pts + && pts <= last + { + pts = last + 1; + } + self.last_frame_pts = Some(pts); + frame.set_pts(Some(pts)); + } else { + tracing::error!("Frame has no pts"); + } + } + pub fn send_frame( &mut self, frame: &frame::Frame, diff --git a/crates/enc-ffmpeg/src/mux/segmented_stream.rs b/crates/enc-ffmpeg/src/mux/segmented_stream.rs index 5605044b45..f6ec79014a 100644 --- a/crates/enc-ffmpeg/src/mux/segmented_stream.rs +++ b/crates/enc-ffmpeg/src/mux/segmented_stream.rs @@ -84,7 +84,6 @@ pub struct SegmentedVideoEncoder { segment_start_time: Option, last_frame_timestamp: Option, frames_in_segment: u32, - encoded_frame_count: u64, completed_segments: Vec, @@ -281,7 +280,6 @@ impl SegmentedVideoEncoder { segment_start_time: None, last_frame_timestamp: None, frames_in_segment: 0, - encoded_frame_count: 0, completed_segments: Vec::new(), pending_segment_indices: Vec::new(), frames_since_pending_flush: 0, @@ -341,10 +339,12 @@ impl SegmentedVideoEncoder { self.last_frame_timestamp = Some(timestamp); - let encoder_timestamp = self.next_encoder_timestamp(); + // Encode with the frame's real capture-derived timestamp. The encoder + // anchors pts at the first frame, so capture gaps (static content, + // stream restarts, dropped frames) stay in the timeline instead of + // compressing it and drifting video ahead of audio. self.encoder - .queue_frame(frame, encoder_timestamp, &mut self.output)?; - self.encoded_frame_count += 1; + .queue_frame(frame, timestamp, &mut self.output)?; self.frames_in_segment += 1; if is_first_frame { @@ -367,12 +367,6 @@ impl SegmentedVideoEncoder { Ok(()) } - fn next_encoder_timestamp(&self) -> Duration { - let frame_rate_num = self.codec_info.frame_rate_num.max(1) as f64; - let frame_rate_den = self.codec_info.frame_rate_den.max(1) as f64; - Duration::from_secs_f64(self.encoded_frame_count as f64 * frame_rate_den / frame_rate_num) - } - fn notify_segment(&self, event: SegmentCompletedEvent) { if let Some(tx) = &self.segment_tx && let Err(e) = tx.send(event) @@ -995,6 +989,88 @@ mod tests { assert!(all_video, "all events should be video type"); } + #[test] + fn encoded_pts_preserve_capture_timestamps_across_gaps() { + ffmpeg::init().ok(); + + let temp = tempfile::tempdir().unwrap(); + let base_path = temp.path().to_path_buf(); + + let mut encoder = SegmentedVideoEncoder::init( + base_path.clone(), + test_video_info(), + SegmentedVideoEncoderConfig { + segment_duration: Duration::from_millis(500), + ..Default::default() + }, + ) + .unwrap(); + + // Three frames at ~30fps, a 1.9s capture gap (static screen / + // stream restart), then three more frames. The encoded pts must + // reflect the gap instead of collapsing to a frame-counter grid, + // otherwise every dropped frame desyncs video from audio. + let timestamps_ms: [u64; 6] = [0, 33, 66, 2000, 2033, 2066]; + for ts_ms in timestamps_ms { + let frame = create_test_frame(320, 240); + encoder + .queue_frame(frame, Duration::from_millis(ts_ms)) + .unwrap(); + } + + encoder.finish().unwrap(); + + // fMP4 segments concatenated after the init segment form a valid mp4. + let mut segment_paths: Vec = std::fs::read_dir(&base_path) + .unwrap() + .filter_map(|e| e.ok().map(|e| e.path())) + .filter(|p| p.extension().is_some_and(|ext| ext == "m4s")) + .collect(); + segment_paths.sort(); + assert!( + !segment_paths.is_empty(), + "encoder should have produced media segments" + ); + + let concat_path = base_path.join("concat_test.mp4"); + let mut concatenated = std::fs::read(base_path.join(INIT_SEGMENT_NAME)).unwrap(); + for segment in &segment_paths { + concatenated.extend(std::fs::read(segment).unwrap()); + } + std::fs::write(&concat_path, concatenated).unwrap(); + + let mut input = format::input(&concat_path).unwrap(); + let stream_index = input + .streams() + .best(ffmpeg::media::Type::Video) + .unwrap() + .index(); + let time_base = input.stream(stream_index).unwrap().time_base(); + let tb = time_base.numerator() as f64 / time_base.denominator() as f64; + + let mut pts_secs: Vec = input + .packets() + .filter_map(|(stream, packet)| { + (stream.index() == stream_index) + .then_some(packet.pts()) + .flatten() + }) + .map(|pts| pts as f64 * tb) + .collect(); + pts_secs.sort_by(|a, b| a.partial_cmp(b).unwrap()); + + assert_eq!(pts_secs.len(), timestamps_ms.len()); + + for (pts, expected_ms) in pts_secs.iter().zip(timestamps_ms) { + let expected = expected_ms as f64 / 1000.0; + assert!( + (pts - expected).abs() < 0.005, + "encoded pts {pts:.3}s should match capture timestamp {expected:.3}s \ + (all pts: {pts_secs:?})" + ); + } + } + #[test] fn manifest_updated_on_segment_boundary() { ffmpeg::init().ok(); diff --git a/crates/recording/src/output_pipeline/core.rs b/crates/recording/src/output_pipeline/core.rs index 42390d899a..298724048f 100644 --- a/crates/recording/src/output_pipeline/core.rs +++ b/crates/recording/src/output_pipeline/core.rs @@ -620,6 +620,7 @@ fn video_mux_send_error(frame_count: u64, error: anyhow::Error) -> anyhow::Error pub(crate) struct AudioTimestampGenerator { sample_rate: u32, total_samples: u64, + clock_samples_advanced: u64, master_clock: Option>, } @@ -631,37 +632,69 @@ impl AudioTimestampGenerator { Self { sample_rate, total_samples: 0, + clock_samples_advanced: 0, master_clock: None, } } + #[cfg(test)] fn from_master_clock(master_clock: Arc) -> Self { + let rate = master_clock.sample_rate(); + Self::from_master_clock_with_rate(master_clock, rate) + } + + /// The generator converts counted samples into time, so it must run at + /// the audio source's real sample rate. The shared master clock may run + /// at a different (default 48kHz) rate: counting a 44.1kHz mic's samples + /// against a 48kHz clock makes the audio timeline lag real time and the + /// gap tracker "corrects" the difference with bogus silence — the + /// recording then plays at the wrong speed. + fn from_master_clock_with_rate(master_clock: Arc, sample_rate: u32) -> Self { Self { - sample_rate: master_clock.sample_rate(), + sample_rate: if sample_rate > 0 { + sample_rate + } else { + master_clock.sample_rate() + }, total_samples: 0, + clock_samples_advanced: 0, master_clock: Some(master_clock), } } + fn advance_clock(&mut self) { + let Some(clock) = &self.master_clock else { + return; + }; + // Convert source-rate samples into clock-rate samples so the shared + // clock advances by real time regardless of the source's rate. The + // conversion runs on the cumulative total: converting each buffer + // independently truncates up to one clock sample per call, which + // accumulates into real drift for non-integer ratios (44.1k -> 48k). + let target = if clock.sample_rate() == self.sample_rate { + self.total_samples + } else { + (self.total_samples as u128 * clock.sample_rate() as u128 + / u128::from(self.sample_rate.max(1))) as u64 + }; + let delta = target.saturating_sub(self.clock_samples_advanced); + self.clock_samples_advanced = target; + if delta > 0 { + clock.advance_samples(delta); + } + } + fn next_timestamp(&mut self, frame_samples: u64) -> Duration { let timestamp_nanos = samples_to_nanos(self.total_samples, self.sample_rate); self.total_samples += frame_samples; - if let Some(clock) = &self.master_clock - && frame_samples > 0 - { - clock.advance_samples(frame_samples); - } + self.advance_clock(); Duration::from_nanos(timestamp_nanos) } fn advance_by_duration(&mut self, duration: Duration) -> u64 { let samples = (duration.as_secs_f64() * self.sample_rate as f64).round() as u64; self.total_samples += samples; - if let Some(clock) = &self.master_clock - && samples > 0 - { - clock.advance_samples(samples); - } + self.advance_clock(); samples } } @@ -1134,21 +1167,20 @@ impl TimestampAnomalyTracker { self.max_forward_skew_secs = jump_secs; } - let expected_increment = Duration::from_millis(33); - let adjusted = last.saturating_add(expected_increment); - - let compensation_secs = current.as_secs_f64() - adjusted.as_secs_f64(); - self.accumulated_compensation_secs -= compensation_secs; - self.resync_count += 1; - self.did_resync = true; - if wall_clock_confirmed { + // Frame delivery paused for about as long as the timestamp jump: + // this is a real gap (static screen, stream restart, sleep/wake), + // not a source-clock glitch. The gap must stay in the timeline — + // collapsing it desyncs video from audio whenever it happens + // before the wall-clock anchor exists to re-expand it. let wall_clock_gap_secs = self .last_valid_wall_clock .map(|wc| now.duration_since(wc).as_secs_f64()) .unwrap_or(0.0); self.wall_clock_confirmed_jumps += 1; + self.consecutive_anomalies = 0; + self.last_valid_duration = Some(current); info!( stream = self.stream_name, @@ -1158,9 +1190,21 @@ impl TimestampAnomalyTracker { current_ms = current.as_millis(), resync_count = self.resync_count, confirmed_jumps = self.wall_clock_confirmed_jumps, - "Wall-clock-confirmed forward jump (system sleep/wake), accepting new baseline" + "Wall-clock-confirmed forward jump (gap in frame delivery), accepting new baseline" ); - } else { + + return Ok(current); + } + + let expected_increment = Duration::from_millis(33); + let adjusted = last.saturating_add(expected_increment); + + let compensation_secs = current.as_secs_f64() - adjusted.as_secs_f64(); + self.accumulated_compensation_secs -= compensation_secs; + self.resync_count += 1; + self.did_resync = true; + + { self.anomaly_count += 1; let wall_clock_gap_secs = self @@ -1595,6 +1639,7 @@ impl OutputPipelineBuilder> { let shared_pause = SharedWallClockPause::new(build_ctx.pause_flag.clone()); let video_frame_count = Arc::new(AtomicU64::new(0)); + let video_timestamp_span = Arc::new(VideoTimestampSpan::default()); let video_start_gate = has_audio_sources.then(VideoStartGate::new); @@ -1608,6 +1653,7 @@ impl OutputPipelineBuilder> { timestamps, shared_pause.clone(), video_frame_count.clone(), + video_timestamp_span.clone(), master_clock.clone(), video_info, video_start_gate.clone(), @@ -1641,6 +1687,7 @@ impl OutputPipelineBuilder> { pause_flag: build_ctx.pause_flag, cancel_token: build_ctx.stop_token, video_frame_count, + video_timestamp_span, health_rx: Some(build_ctx.health_rx), audio_gap_summary, }) @@ -1724,6 +1771,7 @@ impl OutputPipelineBuilder { pause_flag: build_ctx.pause_flag, cancel_token: build_ctx.stop_token, video_frame_count: Arc::new(AtomicU64::new(0)), + video_timestamp_span: Arc::new(VideoTimestampSpan::default()), health_rx: Some(build_ctx.health_rx), audio_gap_summary, }) @@ -1890,6 +1938,42 @@ fn estimate_video_frame_duration_ns(video_info: &VideoInfo) -> u64 { 1_000_000_000 / fps as u64 } +/// Span of the video timestamps actually sent to the muxer, used to report +/// the real encoded media duration. Capture is VFR (static screens, dropped +/// frames), so `frame_count / fps` under-reports the duration by the length +/// of every gap. +#[derive(Debug)] +pub struct VideoTimestampSpan { + first_ns: AtomicU64, + last_ns: AtomicU64, +} + +impl Default for VideoTimestampSpan { + fn default() -> Self { + Self { + first_ns: AtomicU64::new(u64::MAX), + last_ns: AtomicU64::new(0), + } + } +} + +impl VideoTimestampSpan { + fn record(&self, timestamp: Duration) { + let ns = timestamp.as_nanos().min(u64::MAX as u128) as u64; + self.first_ns.fetch_min(ns, Ordering::AcqRel); + self.last_ns.fetch_max(ns, Ordering::AcqRel); + } + + pub fn get(&self) -> Option<(Duration, Duration)> { + let first = self.first_ns.load(Ordering::Acquire); + if first == u64::MAX { + return None; + } + let last = self.last_ns.load(Ordering::Acquire).max(first); + Some((Duration::from_nanos(first), Duration::from_nanos(last))) + } +} + #[allow(clippy::too_many_arguments)] fn spawn_video_encoder, TVideo: VideoSource>( setup_ctx: &mut SetupCtx, @@ -1901,6 +1985,7 @@ fn spawn_video_encoder, TVideo: V timestamps: Timestamps, shared_pause: SharedWallClockPause, frame_counter: Arc, + timestamp_span: Arc, master_clock: Arc, video_info: VideoInfo, video_start_gate: Option, @@ -1975,8 +2060,16 @@ fn spawn_video_encoder, TVideo: V ); } + // Excise accumulated pause time from the content timeline + // before anomaly tracking. Audio already excises pauses + // (paused frames are dropped and sample counting carries + // on), and wall_clock_elapsed below subtracts pauses too; + // leaving the pause in the video timestamps would make a + // resume look like a wall-clock-confirmed capture gap and + // poison the drift anchor with pause-inflated time. let remapped_ts = Timestamp::Instant( - timestamps.instant() + remap.duration(), + timestamps.instant() + + remap.duration().saturating_sub(total_pause_duration), ); let raw_duration = match anomaly_tracker.process_timestamp(remapped_ts, timestamps) { @@ -1999,6 +2092,7 @@ fn spawn_video_encoder, TVideo: V let raw_wall_clock = timestamps.instant().elapsed(); let wall_clock_elapsed = raw_wall_clock.saturating_sub(total_pause_duration); let duration = drift_tracker.calculate_timestamp(raw_duration, wall_clock_elapsed); + timestamp_span.record(duration); if frame_count.is_multiple_of(300) { let drift_ratio = if raw_duration.as_secs_f64() > 0.0 { @@ -2071,8 +2165,13 @@ fn spawn_video_encoder, TVideo: V "Published video start timestamp to encoder-pair gate (drain path)" ); } + // Excise pauses exactly like the main loop above, so + // drained tail frames stay on the same content timeline. let remapped_ts = Timestamp::Instant( - timestamps.instant() + remap.duration(), + timestamps.instant() + + remap + .duration() + .saturating_sub(shared_pause.total_pause_duration()), ); let raw_duration = @@ -2092,6 +2191,7 @@ fn spawn_video_encoder, TVideo: V let wall_clock_elapsed = raw_wall_clock.saturating_sub(total_pause); let duration = drift_tracker.calculate_timestamp(raw_duration, wall_clock_elapsed); + timestamp_span.record(duration); match muxer.lock().await.send_video_frame(frame, duration) { Ok(()) => {} @@ -2191,8 +2291,10 @@ impl PreparedAudioSources { let stop_token = stop_token.child_token(); let muxer = muxer.clone(); async move { - let mut timestamp_generator = - AudioTimestampGenerator::from_master_clock(master_clock.clone()); + let mut timestamp_generator = AudioTimestampGenerator::from_master_clock_with_rate( + master_clock.clone(), + audio_info.sample_rate, + ); let sample_rate = audio_info.sample_rate; let mut dropped_during_pause: u64 = 0; let mut frame_count: u64 = 0; @@ -2741,6 +2843,7 @@ pub struct OutputPipeline { pause_flag: Arc, cancel_token: CancellationToken, video_frame_count: Arc, + video_timestamp_span: Arc, health_rx: Option, audio_gap_summary: Arc>, } @@ -2750,6 +2853,9 @@ pub struct FinishedOutputPipeline { pub first_timestamp: Timestamp, pub video_info: Option, pub video_frame_count: u64, + /// First and last video timestamps sent to the muxer; the real encoded + /// media span for VFR content. + pub video_timestamp_span: Option<(Duration, Duration)>, pub audio_gap_summary: Option, } @@ -2842,6 +2948,7 @@ impl OutputPipeline { first_timestamp, video_info: self.video_info, video_frame_count: self.video_frame_count.load(Ordering::Acquire), + video_timestamp_span: self.video_timestamp_span.get(), audio_gap_summary: self.audio_gap_summary.get().copied(), }) } @@ -3805,11 +3912,17 @@ mod tests { tracker.last_valid_wall_clock = Instant::now().checked_sub(Duration::from_secs(3)); let jump_ts = make_timestamp(timestamps, Duration::from_millis(4 * 33 + 3000)); - tracker.process_timestamp(jump_ts, timestamps).unwrap(); + let accepted = tracker.process_timestamp(jump_ts, timestamps).unwrap(); + // A wall-clock-confirmed jump is a real gap in frame delivery and + // passes through unmodified — it is not a resync. assert!( - tracker.take_resync_flag(), - "Resync flag should be set after wall-clock-confirmed jump" + !tracker.take_resync_flag(), + "Confirmed gap must not be treated as a timeline resync" + ); + assert!( + (accepted.as_secs_f64() - (4.0 * 0.033 + 3.0)).abs() < 0.05, + "confirmed gap must pass through, got {accepted:?}" ); let next_ts = @@ -3851,7 +3964,10 @@ mod tests { assert_eq!(tracker.anomaly_count, 0); assert_eq!(tracker.wall_clock_confirmed_jumps, 2); - assert_eq!(tracker.resync_count, 2); + assert_eq!( + tracker.resync_count, 0, + "confirmed gaps pass through; they are not timeline resyncs" + ); } #[test] @@ -4365,10 +4481,14 @@ mod tests { #[test] fn returns_timeout_when_thread_does_not_exit_in_time() { - let handle = std::thread::spawn(|| { - std::thread::sleep(Duration::from_millis(100)); + // The worker blocks until released, so it can never beat the + // timeout however unfairly a loaded machine schedules threads. + let (release_tx, release_rx) = std::sync::mpsc::channel::<()>(); + let handle = std::thread::spawn(move || { + let _ = release_rx.recv(); Ok(()) }); + let _release_tx = release_tx; match wait_for_blocking_thread_finish(handle, Duration::from_millis(5), "test-worker") { BlockingThreadFinish::TimedOut(error) => { @@ -5279,5 +5399,53 @@ mod tests { "video drifted from the wall clock by {max_skew:?} (correction failed)" ); } + + // A static screen (or a capture-stream restart) stops frame delivery + // entirely. The gap must survive into the output timeline: collapsing + // it compresses video relative to audio and desyncs the recording. + #[test] + fn video_timeline_preserves_capture_gaps() { + let mut video = VideoDriftTracker::new(); + let interval = 1.0 / 30.0; + + let mut outs = Vec::new(); + for v in 0..150u64 { + let t = Duration::from_secs_f64(v as f64 * interval); + outs.push(video.calculate_timestamp(t, t)); + } + // 4s with no frames delivered, then delivery resumes with + // timestamps that include the gap. + for v in 150..300u64 { + let t = Duration::from_secs_f64(v as f64 * interval + 4.0); + outs.push(video.calculate_timestamp(t, t)); + } + + let gap = outs[150].saturating_sub(outs[149]); + assert!( + gap >= Duration::from_secs_f64(3.5), + "capture gap collapsed to {gap:?} in the output timeline" + ); + + let span = outs[299].saturating_sub(outs[0]); + let real = 299.0 * interval + 4.0; + assert!( + (span.as_secs_f64() - real).abs() < 0.3, + "output span {span:?} does not match real elapsed time {real:.2}s" + ); + } + + #[test] + fn video_timestamp_span_reports_first_and_last_sent() { + let span = VideoTimestampSpan::default(); + assert!(span.get().is_none(), "unset span must be None"); + + span.record(Duration::from_millis(100)); + span.record(Duration::from_millis(133)); + span.record(Duration::from_millis(4000)); // across a capture gap + + let (first, last) = span.get().expect("span should be set"); + assert_eq!(first, Duration::from_millis(100)); + assert_eq!(last, Duration::from_millis(4000)); + } } } diff --git a/crates/recording/src/output_validation.rs b/crates/recording/src/output_validation.rs index 72da56a4d9..3e045b3a41 100644 --- a/crates/recording/src/output_validation.rs +++ b/crates/recording/src/output_validation.rs @@ -103,3 +103,49 @@ pub fn validate_instant_recording( output_duration, } } + +/// Tolerated difference between the display track's container duration and the +/// media span the recorder persisted, before the recording is flagged as +/// having suspicious sync. Generous enough for muxer rounding and trailing +/// keyframe padding; far below the hundreds of milliseconds a real timestamp +/// bug produces. +const SYNC_SPAN_TOLERANCE_SECS: f64 = 0.5; +const SYNC_SPAN_TOLERANCE_RATIO: f64 = 0.03; + +/// Cross-checks a finalized display track against the media duration the +/// recorder derived from the capture timestamps it actually muxed. +/// +/// The two are produced independently: the expected duration comes from the +/// pipeline's timestamp span, the container duration from what the encoder +/// and muxer wrote. A container SHORTER than the span means timestamps were +/// mangled between the pipeline and the file — the class of bug that +/// silently desyncs audio/video. A LONGER container is legitimate for VFR +/// content: muxers extend the final frame through any trailing static-screen +/// hold (AVFoundation ends the session at the wall-clock stop time), so that +/// direction is only noted at debug level. Non-fatal: logs a structured +/// warning and returns the mismatch so callers can surface it. +pub fn check_display_sync_span(display_path: &Path, expected: Duration) -> Option { + let container = get_media_duration(display_path)?; + let shortfall = expected.as_secs_f64() - container.as_secs_f64(); + let tolerance = + (expected.as_secs_f64() * SYNC_SPAN_TOLERANCE_RATIO).max(SYNC_SPAN_TOLERANCE_SECS); + if shortfall > tolerance { + tracing::error!( + path = %display_path.display(), + container_secs = container.as_secs_f64(), + expected_secs = expected.as_secs_f64(), + delta_secs = shortfall, + "SYNC INVARIANT VIOLATION: display track duration is shorter than \ + the muxed timestamp span; this recording may have desynced audio/video" + ); + Some(shortfall) + } else { + debug!( + path = %display_path.display(), + container_secs = container.as_secs_f64(), + expected_secs = expected.as_secs_f64(), + "display track duration consistent with muxed timestamp span" + ); + None + } +} diff --git a/crates/recording/src/recovery.rs b/crates/recording/src/recovery.rs index e542718b08..572d4b10f7 100644 --- a/crates/recording/src/recovery.rs +++ b/crates/recording/src/recovery.rs @@ -757,6 +757,28 @@ impl RecoveryManager { if total.is_zero() { None } else { Some(total) } } + /// Reads the display media duration the recorder persisted into the + /// project's default timeline, used to cross-check the remuxed container. + fn expected_display_duration_from_config( + project_path: &Path, + segment_index: u32, + ) -> Option { + let config = std::fs::read_to_string(project_path.join("project-config.json")).ok()?; + let value: serde_json::Value = serde_json::from_str(&config).ok()?; + let segments = value.get("timeline")?.get("segments")?.as_array()?; + let segment = segments.iter().find(|s| { + s.get("recordingSegment") + .and_then(serde_json::Value::as_u64) + == Some(u64::from(segment_index)) + })?; + let end = segment.get("end")?.as_f64()?; + let start = segment + .get("start") + .and_then(serde_json::Value::as_f64) + .unwrap_or(0.0); + (end > start && end.is_finite()).then(|| std::time::Duration::from_secs_f64(end - start)) + } + pub fn recover(recording: &IncompleteRecording) -> Result { Self::finalize_with_purpose(recording, RecoveryPurpose::Recover) } @@ -828,6 +850,17 @@ impl RecoveryManager { } } + // Sync invariant: the remuxed display track must match the media + // span the recorder persisted from its capture timestamps. + if display_output.is_file() + && let Some(expected) = Self::expected_display_duration_from_config( + &recording.project_path, + segment.index, + ) + { + crate::output_validation::check_display_sync_span(&display_output, expected); + } + if let Some(camera_frags) = &segment.camera_fragments { let camera_output = segment_dir.join("camera.mp4"); let camera_dir = segment_dir.join("camera"); diff --git a/crates/recording/src/studio_recording.rs b/crates/recording/src/studio_recording.rs index 30352fb085..b79278a414 100644 --- a/crates/recording/src/studio_recording.rs +++ b/crates/recording/src/studio_recording.rs @@ -1002,17 +1002,39 @@ async fn stop_recording( ); DEFAULT_FPS }); - // Use the encoded display-media duration (frame_count / fps), not the wall-clock - // recording span which includes pipeline-drain latency. This is the timeline the - // recorder persists to project-config.json, so it is what un-edited recordings use; the - // editor/export fallbacks only synthesize a timeline when none is present and read the - // muxed container duration, which this closely (not bit-exactly) matches. - let display_media_duration = if display_fps > 0 { - s.pipeline.screen.video_frame_count as f64 / f64::from(display_fps) - } else { - 0.0 + // Use the encoded display-media span (first to last muxed timestamp plus one + // nominal frame), not the wall-clock recording span which includes + // pipeline-drain latency, and not frame_count / fps, which under-reports VFR + // content by the length of every capture gap (static screens, dropped frames). + // This is the timeline the recorder persists to project-config.json, so it is + // what un-edited recordings use. + let display_media_duration = match s.pipeline.screen.video_timestamp_span { + Some((first, last)) if display_fps > 0 => { + (last - first).as_secs_f64() + 1.0 / f64::from(display_fps) + } + _ if display_fps > 0 => { + s.pipeline.screen.video_frame_count as f64 / f64::from(display_fps) + } + _ => 0.0, }; + // Non-fragmented recordings have their final display file already; + // verify the muxed container matches the timestamps we sent it. + // Fragmented recordings get the same check after remux in recovery. + if s.pipeline + .screen + .path + .extension() + .is_some_and(|e| e == "mp4") + && s.pipeline.screen.path.is_file() + && display_media_duration > 0.0 + { + crate::output_validation::check_display_sync_span( + &s.pipeline.screen.path, + Duration::from_secs_f64(display_media_duration), + ); + } + SegmentOutput { meta: MultipleSegment { display: VideoMeta { @@ -1809,6 +1831,7 @@ mod tests { first_timestamp, video_info, video_frame_count, + video_timestamp_span: None, audio_gap_summary: None, } } diff --git a/crates/recording/tests/sync_matrix.rs b/crates/recording/tests/sync_matrix.rs new file mode 100644 index 0000000000..d7b08a3d19 --- /dev/null +++ b/crates/recording/tests/sync_matrix.rs @@ -0,0 +1,905 @@ +//! Synthetic device matrix for A/V sync. +//! +//! Drives the real recording pipeline (sources -> mux loop -> encoders -> +//! containers) with synthetic video and audio across sample rates, channel +//! counts, frame rates and delivery pathologies (jitter, drops, gaps), then +//! verifies the muxed output preserves real time. No capture hardware is +//! required, so this runs identically on macOS, Windows and Linux CI. +//! +//! Frames are emitted in real time because the pipeline pins video +//! timestamps to the wall clock; each case therefore costs its content +//! duration. Keep cases short. +//! +//! Set `CAP_SYNC_MATRIX_REPORT` to write a JSON report of every case. + +use std::{ + path::{Path, PathBuf}, + time::Duration, +}; + +use cap_media_info::{AudioInfo, Sample, Type, VideoInfo}; +use cap_recording::{ + AudioFrame, ChannelAudioSource, ChannelAudioSourceConfig, ChannelVideoSource, + ChannelVideoSourceConfig, OutputPipeline, + ffmpeg::{ + FFmpegVideoFrame, Mp4Muxer, OggMuxer, SegmentedVideoMuxer, SegmentedVideoMuxerConfig, + }, +}; +use cap_timestamp::{Timestamp, Timestamps}; +use serde::Serialize; + +const CONTENT_SECS: f64 = 4.0; +/// Absolute tolerance for a muxed pts vs the sent capture timestamp. Covers +/// warmup anchoring, emission jitter and encoder rounding, plus scheduler +/// noise on shared CI runners. +const ABS_TOLERANCE_SECS: f64 = 0.25; +/// Tolerance for the relative structure (pts deltas vs sent deltas), which is +/// what actually determines sync drift. The bug class this guards against +/// produces errors of a second or more. +const REL_TOLERANCE_SECS: f64 = 0.15; +/// Tolerance for decoded audio duration vs generated duration. +const AUDIO_DURATION_TOLERANCE_SECS: f64 = 0.15; + +#[derive(Debug, Clone, Copy)] +enum VideoScenario { + Steady, + Jitter, + Drops, + Gap, +} + +impl VideoScenario { + fn name(self) -> &'static str { + match self { + Self::Steady => "steady", + Self::Jitter => "jitter", + Self::Drops => "drops", + Self::Gap => "gap", + } + } + + /// Deterministic capture timestamps (seconds) for the scenario. + fn timestamps(self, fps: u32) -> Vec { + let period = 1.0 / f64::from(fps); + let total = (CONTENT_SECS * f64::from(fps)) as u64; + let mut out = Vec::new(); + for k in 0..total { + let base = k as f64 * period; + match self { + Self::Steady => out.push(base), + Self::Jitter => { + // Deterministic pseudo-jitter, +-40% of the period, kept + // monotonic (and non-negative) by construction. + let phase = (k as f64 * 0.7368).fract() - 0.5; + out.push((base + phase * period * 0.8).max(0.0)); + } + Self::Drops => { + // Drop every 4th and 7th frame: the capture stream simply + // never delivers them. + if k % 4 != 3 && k % 7 != 6 { + out.push(base); + } + } + Self::Gap => { + // 1.5s of frames, a 2s static-screen gap, then the rest. + let t = base; + if t < 1.5 { + out.push(t); + } else { + out.push(t + 2.0); + } + } + } + } + out.sort_by(|a, b| a.partial_cmp(b).unwrap()); + out.dedup_by(|a, b| (*a - *b).abs() < period * 0.25); + out + } +} + +#[derive(Serialize)] +struct CaseResult { + name: String, + pass: bool, + detail: String, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +enum Content { + /// Flat color; encodes trivially. + Flat, + /// Per-frame pseudo-random noise: worst-case encoder load, exercising + /// backpressure the way dense real screen content does. + Noise, + /// A moving bar over a gradient: typical screen-content motion. + Motion, +} + +fn make_video_frame( + width: u32, + height: u32, + frame_index: u64, + content: Content, + rng: &mut Rng, +) -> ffmpeg::frame::Video { + let mut frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::BGRA, width, height); + let stride = frame.stride(0); + let data = frame.data_mut(0); + match content { + Content::Flat => { + let shade = ((frame_index * 7) % 200) as u8; + data.fill(shade); + } + Content::Noise => { + // Refresh a pseudo-random buffer per frame so no two frames are + // alike and inter prediction gets no free lunch. + for chunk in data.chunks_mut(8) { + let v = rng.next().to_le_bytes(); + let n = chunk.len(); + chunk.copy_from_slice(&v[..n]); + } + } + Content::Motion => { + let bar = ((frame_index * 6) % u64::from(width)) as usize; + for y in 0..height as usize { + let row = &mut data[y * stride..y * stride + width as usize * 4]; + for (x, px) in row.chunks_mut(4).enumerate() { + let base = ((x * 255) / width as usize) as u8; + let v = if x.abs_diff(bar) < 12 { 255 } else { base }; + px[0] = v; + px[1] = v ^ 0x55; + px[2] = base; + px[3] = 255; + } + } + } + } + frame +} + +/// splitmix64: tiny, dependency-free, deterministic PRNG. Every randomized +/// case is fully reproducible from the printed seed. +struct Rng(u64); + +impl Rng { + fn next(&mut self) -> u64 { + self.0 = self.0.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = self.0; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^ (z >> 31) + } + + fn range(&mut self, lo: u64, hi: u64) -> u64 { + lo + self.next() % (hi - lo + 1) + } + + fn f64(&mut self) -> f64 { + (self.next() >> 11) as f64 / (1u64 << 53) as f64 + } + + fn pick(&mut self, items: &[T]) -> T { + items[(self.next() % items.len() as u64) as usize] + } +} + +#[derive(Clone)] +struct VideoCase { + fps: u32, + sent: Vec, + fragmented: bool, + width: u32, + height: u32, + content: Content, + rng_seed: u64, +} + +impl VideoCase { + fn curated(fps: u32, scenario: VideoScenario, fragmented: bool) -> Self { + Self { + fps, + sent: scenario.timestamps(fps), + fragmented, + width: 160, + height: 120, + content: Content::Flat, + rng_seed: 1, + } + } +} + +async fn run_video_case(case: VideoCase) -> Result { + let temp = tempfile::tempdir().map_err(|e| format!("tempdir: {e}"))?; + let out_path = if case.fragmented { + temp.path().join("display") + } else { + temp.path().join("display.mp4") + }; + let fragmented = case.fragmented; + + let info = VideoInfo::from_raw( + cap_media_info::RawVideoFormat::Bgra, + case.width, + case.height, + case.fps, + ); + let (tx, rx) = flume::bounded::(32); + let timestamps = Timestamps::now(); + + let sent = case.sent.clone(); + let emit = { + let sent = sent.clone(); + let base = timestamps.instant(); + let (width, height, content) = (case.width, case.height, case.content); + let mut rng = Rng(case.rng_seed); + tokio::spawn(async move { + for (i, &ts) in sent.iter().enumerate() { + tokio::time::sleep_until((base + Duration::from_secs_f64(ts)).into()).await; + let frame = FFmpegVideoFrame { + inner: make_video_frame(width, height, i as u64, content, &mut rng), + timestamp: Timestamp::Instant(base + Duration::from_secs_f64(ts)), + }; + if tx.send_async(frame).await.is_err() { + break; + } + } + // Sender drops here, ending the stream. + }) + }; + + let builder = OutputPipeline::builder(out_path.clone()) + .with_video::>(ChannelVideoSourceConfig::new(info, rx)) + .with_timestamps(timestamps); + + let pipeline = if fragmented { + builder + .build::(SegmentedVideoMuxerConfig { + segment_duration: Duration::from_secs(2), + ..Default::default() + }) + .await + } else { + builder.build::(()).await + } + .map_err(|e| format!("pipeline build: {e}"))?; + + emit.await.map_err(|e| format!("emit join: {e}"))?; + // The verification below assumes frames were emitted in real time; when a + // saturated runner (or a software encoder drowning in worst-case content) + // stalls emission for seconds, pts-vs-wall comparisons are meaningless. + // Skip loudly instead of failing on an environment artifact. + let emit_lag = + timestamps.instant().elapsed().as_secs_f64() - sent.last().copied().unwrap_or(0.0); + let finished = { + // Allow the tail of the stream to flush through the encoder. + tokio::time::sleep(Duration::from_millis(500)).await; + pipeline.stop().await.map_err(|e| format!("stop: {e}"))? + }; + if emit_lag > 1.5 { + return Ok(format!( + "skipped: runner fell {emit_lag:.1}s behind real-time emission" + )); + } + + // Read back the muxed pts. + let playable = if fragmented { + concat_fmp4(&out_path, temp.path())? + } else { + out_path.clone() + }; + let pts = read_video_pts(&playable)?; + + if pts.len() != sent.len() { + return Err(format!( + "frame count mismatch: sent {} frames, container has {}", + sent.len(), + pts.len() + )); + } + + let mut max_abs: f64 = 0.0; + let mut max_rel: f64 = 0.0; + // At low frame rates the fixed tolerance is only a frame or two of + // budget, so scheduler jitter on shared runners trips it; express the + // floor in frames as well. The bug class this guards produces errors of + // a second or more either way. + let rel_tolerance = REL_TOLERANCE_SECS.max(2.5 / f64::from(case.fps)); + for (i, (&p, &s)) in pts.iter().zip(&sent).enumerate() { + max_abs = max_abs.max((p - s).abs()); + let rel = ((p - pts[0]) - (s - sent[0])).abs(); + max_rel = max_rel.max(rel); + if rel > rel_tolerance { + return Err(format!( + "frame {i}: relative pts error {rel:.3}s (pts {p:.3}s vs sent {s:.3}s)" + )); + } + } + if max_abs > ABS_TOLERANCE_SECS { + return Err(format!( + "absolute pts error {max_abs:.3}s exceeds tolerance" + )); + } + + // The span the recorder would persist must match the sent span. + if let Some((first, last)) = finished.video_timestamp_span { + let span = (last - first).as_secs_f64(); + let expected = sent.last().unwrap() - sent[0]; + if (span - expected).abs() > 0.25 { + return Err(format!( + "video_timestamp_span {span:.3}s does not match sent span {expected:.3}s" + )); + } + } else { + return Err("video_timestamp_span missing".to_string()); + } + + // Gap preservation is the regression that desynced 0.5.4: every gap in + // the sent timeline must survive into the container. + let max_sent_gap = sent.windows(2).map(|w| w[1] - w[0]).fold(0.0, f64::max); + if max_sent_gap > 1.0 { + let max_pts_gap = pts.windows(2).map(|w| w[1] - w[0]).fold(0.0, f64::max); + if max_pts_gap < max_sent_gap * 0.9 { + return Err(format!( + "{max_sent_gap:.2}s capture gap collapsed to {max_pts_gap:.3}s in the container" + )); + } + } + + Ok(format!( + "{} frames, max abs err {:.0} ms, max rel err {:.0} ms", + pts.len(), + max_abs * 1000.0, + max_rel * 1000.0 + )) +} + +/// A mid-recording pause (instant mode): emission continues in real time but +/// the pipeline drops frames while paused. The pause must be EXCISED from the +/// output timeline — video pts must stay continuous across it (matching how +/// audio drops paused samples and how the wall clock subtracts pauses), and +/// the container must contain only the unpaused content. A regression here +/// previously poisoned the drift anchor with pause-inflated time whenever the +/// pause began before the ~2s warmup anchor existed. +async fn run_video_pause_case() -> Result { + const PRE_PAUSE_SECS: f64 = 1.0; + const PAUSE_SECS: f64 = 2.5; + const POST_PAUSE_SECS: f64 = 2.0; + const FPS: u32 = 30; + + let temp = tempfile::tempdir().map_err(|e| format!("tempdir: {e}"))?; + let out_path = temp.path().join("display.mp4"); + + let info = VideoInfo::from_raw(cap_media_info::RawVideoFormat::Bgra, 160, 120, FPS); + let (tx, rx) = flume::bounded::(32); + let timestamps = Timestamps::now(); + + let total_secs = PRE_PAUSE_SECS + PAUSE_SECS + POST_PAUSE_SECS; + let emit = { + let base = timestamps.instant(); + let mut rng = Rng(7); + tokio::spawn(async move { + let period = 1.0 / f64::from(FPS); + let count = (total_secs * f64::from(FPS)) as u64; + for k in 0..count { + let ts = k as f64 * period; + tokio::time::sleep_until((base + Duration::from_secs_f64(ts)).into()).await; + let frame = FFmpegVideoFrame { + inner: make_video_frame(160, 120, k, Content::Flat, &mut rng), + timestamp: Timestamp::Instant(base + Duration::from_secs_f64(ts)), + }; + if tx.send_async(frame).await.is_err() { + break; + } + } + }) + }; + + let pipeline = OutputPipeline::builder(out_path.clone()) + .with_video::>(ChannelVideoSourceConfig::new(info, rx)) + .with_timestamps(timestamps) + .build::(()) + .await + .map_err(|e| format!("pipeline build: {e}"))?; + + tokio::time::sleep(Duration::from_secs_f64(PRE_PAUSE_SECS)).await; + let pause_started = std::time::Instant::now(); + pipeline.pause(); + tokio::time::sleep(Duration::from_secs_f64(PAUSE_SECS)).await; + pipeline.resume(); + let actual_pause = pause_started.elapsed().as_secs_f64(); + + emit.await.map_err(|e| format!("emit join: {e}"))?; + let emit_lag = timestamps.instant().elapsed().as_secs_f64() - total_secs; + tokio::time::sleep(Duration::from_millis(500)).await; + pipeline.stop().await.map_err(|e| format!("stop: {e}"))?; + // The assertions below compare the muxed span against the intended + // pause/content windows; a runner too stalled to hit those windows + // invalidates the comparison, not the pipeline. + if emit_lag > 1.5 || (actual_pause - PAUSE_SECS).abs() > 0.5 { + return Ok(format!( + "skipped: runner too slow (emission lag {emit_lag:.1}s, pause window {actual_pause:.2}s)" + )); + } + + let pts = read_video_pts(&out_path)?; + if pts.len() < 8 { + return Err(format!("only {} frames muxed", pts.len())); + } + + // The pause is excised: the muxed span must cover roughly the unpaused + // content, not the wall-clock run. + let span = pts.last().unwrap() - pts[0]; + let expected = PRE_PAUSE_SECS + POST_PAUSE_SECS; + if (span - expected).abs() > 0.6 { + return Err(format!( + "muxed span {span:.2}s should be about the unpaused content {expected:.2}s \ + (pause leaked into the timeline)" + )); + } + + // And no single pts step may contain the pause. + let max_gap = pts.windows(2).map(|w| w[1] - w[0]).fold(0.0, f64::max); + if max_gap > PAUSE_SECS * 0.8 { + return Err(format!( + "pause survived as a {max_gap:.2}s pts gap in the container" + )); + } + + // Post-resume continuity is the discriminating check: without the pause + // excision the anomaly tracker accepts the pause as a confirmed jump and + // the drift tracker's wall cap re-pins the post-resume segment ~one + // tolerance late (measured +0.13s vs +0.03s with the fix). The median + // over the whole segment is immune to per-frame scheduler jitter. + let period = 1.0 / f64::from(FPS); + let split = pts + .windows(2) + .position(|w| w[1] - w[0] == max_gap) + .unwrap_or(0); + let pre_last = pts[split]; + let mut post_offsets: Vec = pts[split + 1..] + .iter() + .enumerate() + .map(|(k, &p)| p - (pre_last + (k as f64 + 1.0) * period)) + .collect(); + post_offsets.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let continuity = post_offsets + .get(post_offsets.len() / 2) + .copied() + .unwrap_or(0.0); + if continuity.abs() > 0.08 { + return Err(format!( + "post-resume frames resume {continuity:+.3}s off the pre-pause timeline \ + (pause bled into the drift anchor)" + )); + } + + Ok(format!( + "{} frames, span {span:.2}s (expected ~{expected:.2}s), max pts gap {max_gap:.2}s, \ + post-resume continuity {continuity:+.3}s", + pts.len() + )) +} + +#[derive(Clone, Copy)] +struct AudioCase { + rate: u32, + channels: u16, + /// Device buffer size in milliseconds; real hardware spans ~3-90ms. + chunk_ms: f64, + /// Source clock drift factor: samples arrive slightly faster or slower + /// than their nominal rate, as real device crystals do. + drift: f64, +} + +impl AudioCase { + fn curated(rate: u32, channels: u16) -> Self { + Self { + rate, + channels, + chunk_ms: 20.0, + drift: 1.0, + } + } +} + +async fn run_audio_case(case: AudioCase) -> Result { + let AudioCase { + rate, + channels, + chunk_ms, + drift, + } = case; + let temp = tempfile::tempdir().map_err(|e| format!("tempdir: {e}"))?; + let out_path = temp.path().join("audio.ogg"); + + let info = AudioInfo::new(Sample::F32(Type::Packed), rate, channels) + .map_err(|e| format!("audio info: {e:?}"))?; + let (tx, rx) = futures::channel::mpsc::channel::(32); + let timestamps = Timestamps::now(); + + let chunk_frames = ((f64::from(rate) * chunk_ms / 1000.0) as usize).max(16); + let chunk_secs = chunk_frames as f64 / f64::from(rate); + let total_chunks = (CONTENT_SECS / chunk_secs).ceil() as usize; + + let emit = { + let base = timestamps.instant(); + let mut tx = tx; + let info = info; + tokio::spawn(async move { + use futures::SinkExt; + for k in 0..total_chunks { + let real_t = k as f64 * chunk_secs; + let ts = real_t * drift; + tokio::time::sleep_until((base + Duration::from_secs_f64(real_t)).into()).await; + let mut frame = ffmpeg::frame::Audio::new( + ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed), + chunk_frames, + info.channel_layout(), + ); + frame.set_rate(rate); + let data = frame.data_mut(0); + for (i, sample) in bytemuck_cast_f32(data).iter_mut().enumerate() { + let n = (k * chunk_frames + i / channels as usize) as f32; + *sample = (n * 440.0 * 2.0 * std::f32::consts::PI / rate as f32).sin() * 0.4; + } + let frame = AudioFrame::new( + frame, + Timestamp::Instant(base + Duration::from_secs_f64(ts)), + ); + if tx.send(frame).await.is_err() { + break; + } + } + }) + }; + + let pipeline = OutputPipeline::builder(out_path.clone()) + .with_audio_source::(ChannelAudioSourceConfig::new(info, rx)) + .with_timestamps(timestamps) + .build::(()) + .await + .map_err(|e| format!("pipeline build: {e}"))?; + + emit.await.map_err(|e| format!("emit join: {e}"))?; + let emit_lag = timestamps.instant().elapsed().as_secs_f64() - CONTENT_SECS; + tokio::time::sleep(Duration::from_millis(300)).await; + pipeline.stop().await.map_err(|e| format!("stop: {e}"))?; + if emit_lag > 1.5 { + return Ok(format!( + "skipped: runner fell {emit_lag:.1}s behind real-time emission" + )); + } + + let (duration, decoded_channels, energy) = read_audio_stats(&out_path)?; + if (duration - CONTENT_SECS).abs() > AUDIO_DURATION_TOLERANCE_SECS { + return Err(format!( + "decoded duration {duration:.3}s vs expected {CONTENT_SECS:.3}s \ + (rate handling error: content plays at the wrong speed)" + )); + } + if energy < 0.01 { + return Err(format!( + "decoded audio is nearly silent (rms {energy:.4}); samples were lost or zeroed" + )); + } + + Ok(format!( + "duration {duration:.3}s, {decoded_channels} ch decoded, rms {energy:.3}" + )) +} + +fn bytemuck_cast_f32(data: &mut [u8]) -> &mut [f32] { + let len = data.len() / 4; + unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr().cast::(), len) } +} + +/// Concatenates a fragmented-mp4 segment directory (init.mp4 + *.m4s) into a +/// single playable file. +fn concat_fmp4(dir: &Path, scratch: &Path) -> Result { + let init = dir.join("init.mp4"); + let mut bytes = std::fs::read(&init).map_err(|e| format!("read init.mp4: {e}"))?; + let mut segments: Vec = std::fs::read_dir(dir) + .map_err(|e| format!("read segment dir: {e}"))? + .filter_map(|e| e.ok().map(|e| e.path())) + .filter(|p| p.extension().is_some_and(|ext| ext == "m4s")) + .collect(); + segments.sort(); + if segments.is_empty() { + return Err("no media segments produced".to_string()); + } + for segment in &segments { + bytes.extend(std::fs::read(segment).map_err(|e| format!("read segment: {e}"))?); + } + let out = scratch.join("concat.mp4"); + std::fs::write(&out, bytes).map_err(|e| format!("write concat: {e}"))?; + Ok(out) +} + +fn read_video_pts(path: &Path) -> Result, String> { + let mut ictx = ffmpeg::format::input(&path).map_err(|e| format!("open {e}"))?; + let stream = ictx + .streams() + .best(ffmpeg::media::Type::Video) + .ok_or("no video stream")?; + let index = stream.index(); + let tb = stream.time_base(); + let tb = f64::from(tb.numerator()) / f64::from(tb.denominator()); + let mut pts: Vec = ictx + .packets() + .filter_map(|(s, p)| (s.index() == index).then_some(p.pts()).flatten()) + .map(|p| p as f64 * tb) + .collect(); + pts.sort_by(|a, b| a.partial_cmp(b).unwrap()); + Ok(pts) +} + +fn read_audio_stats(path: &Path) -> Result<(f64, u16, f64), String> { + let mut ictx = ffmpeg::format::input(&path).map_err(|e| format!("open {e}"))?; + let stream = ictx + .streams() + .best(ffmpeg::media::Type::Audio) + .ok_or("no audio stream")?; + let index = stream.index(); + let ctx = ffmpeg::codec::context::Context::from_parameters(stream.parameters()) + .map_err(|e| format!("params: {e}"))?; + let mut decoder = ctx.decoder().audio().map_err(|e| format!("decoder: {e}"))?; + + let mut samples = 0u64; + let mut rate = 0u32; + let mut channels = 0u16; + let mut sum_sq = 0.0f64; + let mut counted = 0u64; + let mut frame = ffmpeg::frame::Audio::empty(); + for (s, packet) in ictx.packets() { + if s.index() != index { + continue; + } + if decoder.send_packet(&packet).is_ok() { + while decoder.receive_frame(&mut frame).is_ok() { + samples += frame.samples() as u64; + rate = frame.rate(); + channels = frame.channels(); + if let ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Planar) = + frame.format() + { + for &v in &frame.plane::(0)[..frame.samples()] { + sum_sq += f64::from(v) * f64::from(v); + counted += 1; + } + } + } + } + } + let _ = decoder.send_eof(); + while decoder.receive_frame(&mut frame).is_ok() { + samples += frame.samples() as u64; + } + + if rate == 0 { + return Err("no audio decoded".to_string()); + } + let rms = if counted > 0 { + (sum_sq / counted as f64).sqrt() + } else { + 0.0 + }; + Ok((samples as f64 / f64::from(rate), channels, rms)) +} + +fn record(results: &mut Vec, name: String, outcome: Result) { + eprintln!( + "{name}: {}", + match &outcome { + Ok(d) => format!("ok ({d})"), + Err(e) => format!("FAIL ({e})"), + } + ); + results.push(CaseResult { + name, + pass: outcome.is_ok(), + detail: outcome.unwrap_or_else(|e| e), + }); +} + +/// A fully random capture shape: arbitrary fps, resolution, encoder-load +/// content, timestamp jitter, random drops, and 0-2 gaps at random positions +/// (including inside the first two seconds, where the drift anchor does not +/// exist yet). +fn random_video_case(rng: &mut Rng) -> VideoCase { + let fps = rng.range(10, 120) as u32; + let (width, height) = rng.pick(&[(160u32, 120u32), (320, 240), (640, 360)]); + let content = rng.pick(&[Content::Flat, Content::Noise, Content::Motion]); + let fragmented = rng.f64() < 0.75; + + let period = 1.0 / f64::from(fps); + let jitter = rng.f64() * 0.45; + let drop_prob = rng.f64() * 0.25; + let gap_count = rng.range(0, 2); + let mut gaps: Vec<(f64, f64)> = (0..gap_count) + .map(|_| { + let at = 0.4 + rng.f64() * (CONTENT_SECS - 1.0); + let len = 1.2 + rng.f64() * 2.0; + (at, len) + }) + .collect(); + gaps.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); + + let total = (CONTENT_SECS * f64::from(fps)) as u64; + let mut sent = Vec::new(); + for k in 0..total { + if rng.f64() < drop_prob { + continue; + } + let base = k as f64 * period; + let mut ts = (base + (rng.f64() - 0.5) * period * jitter).max(0.0); + for &(at, len) in &gaps { + if ts >= at { + ts += len; + } + } + sent.push(ts); + } + sent.sort_by(|a, b| a.partial_cmp(b).unwrap()); + sent.dedup_by(|a, b| (*a - *b).abs() < period * 0.25); + // Guarantee at least a handful of frames survive the drop lottery. + if sent.len() < 8 { + sent = (0..total).map(|k| k as f64 * period).collect(); + } + + VideoCase { + fps, + sent, + fragmented, + width, + height, + content, + rng_seed: rng.next(), + } +} + +/// A random audio device shape: any rate from the set real devices negotiate, +/// 1-8 channels, real-world buffer sizes, and a small crystal drift. +fn random_audio_case(rng: &mut Rng) -> AudioCase { + let rate = rng.pick(&[ + 8_000u32, 11_025, 12_000, 16_000, 22_050, 24_000, 32_000, 44_100, 48_000, 88_200, 96_000, + 176_400, 192_000, + ]); + let channels = rng.range(1, 8) as u16; + let chunk_ms = 3.0 + rng.f64() * 85.0; + let drift = 1.0 + (rng.f64() - 0.5) * 0.002; // +-0.1% + + AudioCase { + rate, + channels, + chunk_ms, + drift, + } +} + +#[tokio::test(flavor = "multi_thread")] +async fn synthetic_device_matrix_preserves_sync() { + let mut results: Vec = Vec::new(); + + // Randomized cases are reproducible: rerun with CAP_SYNC_MATRIX_SEED=. + let seed: u64 = std::env::var("CAP_SYNC_MATRIX_SEED") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or_else(|| { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos() as u64) + .unwrap_or(0x5EED) + }); + let random_cases: usize = std::env::var("CAP_SYNC_MATRIX_RANDOM_CASES") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(6); + eprintln!("randomized cases: {random_cases}, CAP_SYNC_MATRIX_SEED={seed}"); + + let video_cases: Vec<(u32, VideoScenario, bool)> = vec![ + (15, VideoScenario::Steady, true), + (30, VideoScenario::Steady, true), + (60, VideoScenario::Steady, true), + (120, VideoScenario::Steady, true), + (30, VideoScenario::Jitter, true), + (60, VideoScenario::Jitter, true), + (30, VideoScenario::Drops, true), + (60, VideoScenario::Drops, true), + (30, VideoScenario::Gap, true), + (60, VideoScenario::Gap, true), + (30, VideoScenario::Steady, false), + (30, VideoScenario::Gap, false), + ]; + + for (fps, scenario, fragmented) in video_cases { + let name = format!( + "video/{}fps/{}/{}", + fps, + scenario.name(), + if fragmented { "fragmented" } else { "mp4" } + ); + let outcome = run_video_case(VideoCase::curated(fps, scenario, fragmented)).await; + record(&mut results, name, outcome); + } + + record( + &mut results, + "video/30fps/pause-resume/mp4".to_string(), + run_video_pause_case().await, + ); + + let audio_cases: Vec<(u32, u16)> = vec![ + (8_000, 2), + (16_000, 2), + (22_050, 2), + (44_100, 2), + (48_000, 2), + (96_000, 2), + (48_000, 1), + (44_100, 1), + (48_000, 6), + ]; + + for (rate, channels) in audio_cases { + let name = format!("audio/{rate}hz/{channels}ch"); + let outcome = run_audio_case(AudioCase::curated(rate, channels)).await; + record(&mut results, name, outcome); + } + + // Non-predetermined coverage: random device shapes and delivery + // pathologies, combined audio+video like a real studio recording. + let mut rng = Rng(seed); + for i in 0..random_cases { + let video = random_video_case(&mut rng); + let audio = random_audio_case(&mut rng); + let name = format!( + "random/{i}/video-{}fps-{}x{}-{:?}-{}ts/audio-{}hz-{}ch-{:.0}ms-drift{:+.2}%", + video.fps, + video.width, + video.height, + video.content, + video.sent.len(), + audio.rate, + audio.channels, + audio.chunk_ms, + (audio.drift - 1.0) * 100.0, + ); + // Run both legs concurrently, as a real recording does. + let (video_outcome, audio_outcome) = + tokio::join!(run_video_case(video), run_audio_case(audio)); + let outcome = match (video_outcome, audio_outcome) { + (Ok(v), Ok(a)) => Ok(format!("video: {v}; audio: {a}")), + (Err(e), _) => Err(format!("video leg: {e}")), + (_, Err(e)) => Err(format!("audio leg: {e}")), + }; + record(&mut results, name, outcome); + } + + if let Ok(report_path) = std::env::var("CAP_SYNC_MATRIX_REPORT") { + #[derive(Serialize)] + struct Report<'a> { + seed: u64, + cases: &'a [CaseResult], + } + let json = serde_json::to_string_pretty(&Report { + seed, + cases: &results, + }) + .expect("serialize report"); + std::fs::write(&report_path, json).expect("write report"); + eprintln!("report written to {report_path}"); + } + + let failures: Vec<&CaseResult> = results.iter().filter(|r| !r.pass).collect(); + assert!( + failures.is_empty(), + "{} of {} matrix cases failed:\n{}", + failures.len(), + results.len(), + failures + .iter() + .map(|r| format!(" {} — {}", r.name, r.detail)) + .collect::>() + .join("\n") + ); +} diff --git a/crates/rendering/src/decoder/avassetreader.rs b/crates/rendering/src/decoder/avassetreader.rs index 26fd3f08c1..c450b31e7c 100644 --- a/crates/rendering/src/decoder/avassetreader.rs +++ b/crates/rendering/src/decoder/avassetreader.rs @@ -17,7 +17,10 @@ use crate::{DecodedFrame, PixelFormat}; use super::frame_converter::{copy_bgra_to_rgba, copy_rgba_plane}; use super::multi_position::{DecoderPoolManager, MultiPositionDecoderConfig, ScrubDetector}; -use super::{DecoderInitResult, DecoderType, FRAME_CACHE_SIZE, VideoDecoderMessage, pts_to_frame}; +use super::{ + DecoderInitResult, DecoderType, FRAME_CACHE_SIZE, VideoDecoderMessage, pts_to_frame, + record_pts_hole, +}; const MAX_RELAXED_FALLBACK_DISTANCE: u32 = 8; const SCRUB_REUSE_THRESHOLD_SECS: f32 = 0.5; @@ -31,7 +34,7 @@ struct FrameData { #[derive(Clone)] struct ProcessedFrame { - _number: u32, + number: u32, width: u32, height: u32, format: PixelFormat, @@ -218,7 +221,7 @@ impl CachedFrame { let mut img = image_buf; let (data, fmt, y_str, uv_str) = processor.extract_raw(&mut img); Self(ProcessedFrame { - _number: number, + number, width, height, format: fmt, @@ -232,7 +235,7 @@ impl CachedFrame { _ => { let black_frame = vec![0u8; (width * height * 4) as usize]; Self(ProcessedFrame { - _number: number, + number, width, height, format: PixelFormat::Rgba, @@ -341,6 +344,10 @@ struct DecoderInstance { path: PathBuf, tokio_handle: TokioHandle, keyframe_index: Option>, + /// Previous frame number this instance vended, for pts-hole discovery. + /// Lives on the instance because the reader vends in pts order between + /// resets, but request batches may be served by different pool decoders. + prev_vended: Option, } impl DecoderInstance { @@ -363,6 +370,7 @@ impl DecoderInstance { path, tokio_handle, keyframe_index, + prev_vended: None, }) } @@ -372,6 +380,7 @@ impl DecoderInstance { self.is_done = false; self.frames_iter_valid = true; self.health.reset_counters(); + self.prev_vended = None; } Err(e) => { tracing::error!( @@ -403,6 +412,7 @@ impl DecoderInstance { self.is_done = false; self.frames_iter_valid = true; self.health = DecoderHealth::new(); + self.prev_vended = None; Ok(()) } @@ -571,6 +581,16 @@ impl AVAssetReaderDecoder { let mut last_active_frame = None::; let last_sent_frame = Rc::new(RefCell::new(None::)); let first_ever_frame = Rc::new(RefCell::new(None::)); + // pts holes (start frame -> first frame after the hole) discovered + // from decode-order jumps. These are facts about the file — decoders + // vend samples in pts order, so a jump between consecutive vends can + // only mean no samples exist in between — and therefore survive + // resets and cache eviction. + let mut pts_holes = BTreeMap::::new(); + // Content of the most recently served VFR hold, kept across request + // batches so a hole keeps rendering its true frame even after the + // pre-hole frame leaves the cache. + let mut gap_hold: Option = None; let processor = ImageBufProcessor::new(); @@ -639,6 +659,40 @@ impl AVAssetReaderDecoder { } pending_requests = unfulfilled; + // Requests inside a KNOWN pts hole are answered with the hole's + // start frame — the true VFR hold (the frame simply stayed on + // screen) — without touching the decoder. This keeps the + // post-hole frames cached and the reader parked, however long the + // hole runs; decoding ahead here would evict the very frames the + // requests are marching towards. Only recorded holes qualify: a + // bare "some cached frame lies beyond the request" test would + // also match disjoint cache islands left by seeks, and serving + // stale content there would freeze playback on old frames. + let mut still_unfulfilled = Vec::with_capacity(pending_requests.len()); + for req in pending_requests.drain(..) { + let hole_start = pts_holes + .range(..=req.frame) + .next_back() + .filter(|&(_, &end)| req.frame < end) + .map(|(&start, _)| start); + let Some(hole_start) = hole_start else { + still_unfulfilled.push(req); + continue; + }; + let data = cache + .get(&hole_start) + .map(|c| c.data().clone()) + .or_else(|| gap_hold.clone().filter(|h| h.number == hole_start)); + if let Some(data) = data { + gap_hold = Some(data.clone()); + *last_sent_frame.borrow_mut() = Some(data.clone()); + let _ = req.sender.send(data.to_decoded_frame()); + } else { + still_unfulfilled.push(req); + } + } + pending_requests = still_unfulfilled; + if pending_requests.is_empty() { continue; } @@ -676,6 +730,10 @@ impl AVAssetReaderDecoder { let mut exit = false; let mut frames_iterated = 0u32; let mut last_decoded_position: Option = None; + // Newest vended frame below the fallback floor: after a seek the + // reader re-vends from the keyframe at-or-before the request, and + // that frame is the true VFR hold for requests inside a pts hole. + let mut hold_candidate: Option<(u32, R)> = None; { let decoder = &mut this.decoders[decoder_idx]; @@ -691,6 +749,10 @@ impl AVAssetReaderDecoder { error = %e, "Failed to read frame, skipping" ); + // A skipped frame breaks the vend continuity that + // hole discovery relies on; a jump across it is + // not evidence of a hole. + decoder.prev_vended = None; continue; } }; @@ -699,11 +761,24 @@ impl AVAssetReaderDecoder { let current_frame = pts_to_frame(frame.pts().value, Rational::new(1, frame.pts().scale), fps); + if let Some(prev) = decoder.prev_vended + && current_frame > prev + 1 + { + record_pts_hole(&mut pts_holes, prev, current_frame); + } + decoder.prev_vended = Some(current_frame); + let position_secs = current_frame as f32 / fps as f32; last_decoded_position = Some(position_secs); decoder.is_done = false; if current_frame < minimum_fallback_frame { + // Keep a handle to it instead of discarding it: if the + // requests land inside a pts hole this is the only + // at-or-before content the reader will ever vend. + if let Some(buf) = frame.image_buf() { + hold_candidate = Some((current_frame, buf.retained())); + } continue; } @@ -756,17 +831,55 @@ impl AVAssetReaderDecoder { *last_sent_frame.borrow_mut() = Some(data.clone()); let _ = req.sender.send(data.to_decoded_frame()); } else { - let nearest = cache + // Always answer. Prefer the newest frame at-or-before + // the request — from the cache, the hold candidate the + // seek re-vended, or the persistent gap hold — as the + // true VFR hold content (a pts gap means the frame + // stayed on screen). A later frame is the last resort; + // leaving the request unanswered would wedge the render + // loop. + let cached_before = cache .range(..=req.frame) .next_back() - .or_else(|| cache.range(req.frame..).next()); - - if let Some((&frame_num, cached)) = nearest { - let distance = req.frame.abs_diff(frame_num); - if distance <= req.max_fallback_distance { - let _ = - req.sender.send(cached.data().to_decoded_frame()); - } + .map(|(_, c)| c.data().clone()); + let hold_before = gap_hold.clone().filter(|h| { + pts_holes.get(&h.number).is_some_and(|&end| { + h.number <= req.frame && req.frame < end + }) + }); + let candidate_before = hold_candidate + .as_ref() + .filter(|(n, _)| *n <= req.frame) + .map(|(n, buf)| { + CachedFrame::new(&processor, buf.retained(), *n) + .data() + .clone() + }); + let best_before = + [cached_before, hold_before, candidate_before] + .into_iter() + .flatten() + .max_by_key(|d| d.number); + + if let Some(data) = best_before { + gap_hold = Some(data.clone()); + *last_sent_frame.borrow_mut() = Some(data.clone()); + let _ = req.sender.send(data.to_decoded_frame()); + } else if let Some((&frame_num, cached)) = + cache.range(req.frame..).next() + { + tracing::debug!( + req_frame = req.frame, + nearest_frame = frame_num, + "serving forward frame across pts gap" + ); + let _ = req.sender.send(cached.data().to_decoded_frame()); + } else { + tracing::warn!( + req_frame = req.frame, + current_frame, + "dropping overshot request: cache empty" + ); } } } else { @@ -895,26 +1008,46 @@ impl AVAssetReaderDecoder { req.max_fallback_distance }; - let nearest = cache + // Always answer with the newest frame at-or-before the request — + // from the cache, the hold candidate a seek re-vended, or the + // persistent gap hold — as the true VFR hold content. A later frame + // is the best remaining answer; dropping the request instead + // starves the render loop and wedges gap playback/export. + let cached_before = cache .range(..=req.frame) .next_back() - .or_else(|| cache.range(req.frame..).next()); - - if let Some((&frame_num, cached)) = nearest { - let distance = req.frame.abs_diff(frame_num); - if distance <= fallback_distance { - let _ = req.sender.send(cached.data().to_decoded_frame()); - } else if allow_relaxed_fallback - && let Some(ref last) = *last_sent_frame.borrow() - { - let _ = req.sender.send(last.to_decoded_frame()); - } else if allow_relaxed_fallback - && let Some(ref first) = *first_ever_frame.borrow() - { - let _ = req.sender.send(first.to_decoded_frame()); - } else { - unfulfilled_count += 1; + .map(|(_, c)| c.data().clone()); + let hold_before = gap_hold.clone().filter(|h| { + pts_holes + .get(&h.number) + .is_some_and(|&end| h.number <= req.frame && req.frame < end) + }); + let candidate_before = hold_candidate + .as_ref() + .filter(|(n, _)| *n <= req.frame) + .map(|(n, buf)| { + CachedFrame::new(&processor, buf.retained(), *n) + .data() + .clone() + }); + let best_before = [cached_before, hold_before, candidate_before] + .into_iter() + .flatten() + .max_by_key(|d| d.number); + + if let Some(data) = best_before { + gap_hold = Some(data.clone()); + *last_sent_frame.borrow_mut() = Some(data.clone()); + let _ = req.sender.send(data.to_decoded_frame()); + } else if let Some((&frame_num, cached)) = cache.range(req.frame..).next() { + if req.frame.abs_diff(frame_num) > fallback_distance { + tracing::debug!( + req_frame = req.frame, + nearest_frame = frame_num, + "serving forward frame across pts gap" + ); } + let _ = req.sender.send(cached.data().to_decoded_frame()); } else if allow_relaxed_fallback && let Some(ref last) = *last_sent_frame.borrow() { diff --git a/crates/rendering/src/decoder/ffmpeg.rs b/crates/rendering/src/decoder/ffmpeg.rs index 1e26eeb25d..b703a76d94 100644 --- a/crates/rendering/src/decoder/ffmpeg.rs +++ b/crates/rendering/src/decoder/ffmpeg.rs @@ -19,7 +19,7 @@ use cap_video_decode::FrameTextures; use super::{ DecoderInitResult, DecoderType, FRAME_CACHE_SIZE, VideoDecoderMessage, - frame_converter::FrameConverter, pts_to_frame, + frame_converter::FrameConverter, pts_to_frame, record_pts_hole, }; #[derive(Clone)] @@ -70,7 +70,6 @@ struct PendingRequest { } const MAX_FRAME_LOOKBACK_TOLERANCE: u32 = 2; -const MAX_FRAME_FALLBACK_DISTANCE: u32 = 90; fn extract_yuv_planes(frame: &frame::Video) -> Option<(Vec, PixelFormat, u32, u32)> { let height = frame.height(); @@ -250,6 +249,17 @@ impl FfmpegDecoder { let mut cache = BTreeMap::::new(); let mut last_active_frame = None::; + // pts holes (start frame -> first frame after the hole) discovered + // from decode-order jumps; facts about the file, so they survive + // resets and cache eviction. + let mut pts_holes = BTreeMap::::new(); + // Content of the most recently served VFR hold, kept across + // requests so a hole keeps rendering its true frame even after + // the pre-hole frame leaves the cache. + let mut gap_hold: Option = None; + // Previous vended frame number since the last reset, for pts-hole + // discovery (the reader vends in pts order between resets). + let mut prev_vended: Option = None; let last_sent_frame = Rc::new(RefCell::new(None::)); let first_ever_frame = Rc::new(RefCell::new(None::)); @@ -273,6 +283,14 @@ impl FfmpegDecoder { let mut sw_cache = BTreeMap::::new(); let mut sw_last_active_frame = None::; + // pts holes (start frame -> first frame after the hole) from + // decode-order jumps; facts about the file that survive resets + // and cache eviction. + let mut sw_pts_holes = BTreeMap::::new(); + // Content of the most recently served VFR hold. + let mut sw_gap_hold: Option = None; + // Previous vended frame number since the last reset. + let mut sw_prev_vended: Option = None; let sw_last_sent_frame = Rc::new(RefCell::new(None::)); let sw_first_ever_frame = Rc::new(RefCell::new(None::)); let mut sw_frames = sw_this.frames(); @@ -289,6 +307,9 @@ impl FfmpegDecoder { sw_cache.insert(current_frame, cache_frame); *sw_first_ever_frame.borrow_mut() = Some(output.clone()); *sw_last_sent_frame.borrow_mut() = Some(output); + // The pre-decoded frame is a real vend: without seeding + // this, an opening static hold would never be recorded. + sw_prev_vended = Some(current_frame); } let sw_decoder_type = DecoderType::FFmpegSoftware; @@ -391,6 +412,29 @@ impl FfmpegDecoder { sw_frames = sw_this.frames(); *sw_last_sent_frame.borrow_mut() = None; sw_cache.clear(); + sw_prev_vended = None; + } + + // Requests inside a KNOWN pts hole are answered with the + // hole's start frame — the true VFR hold — without + // touching the decoder; see the hardware path above. + if !is_backward_seek + && let Some(hole_start) = sw_pts_holes + .range(..=requested_frame) + .next_back() + .filter(|&(_, &end)| requested_frame < end) + .map(|(&start, _)| start) + { + let data = sw_cache + .get_mut(&hole_start) + .map(|c| c.produce(&mut sw_converter)) + .or_else(|| sw_gap_hold.clone().filter(|h| h.number == hole_start)); + if let Some(data) = data { + sw_gap_hold = Some(data.clone()); + *sw_last_sent_frame.borrow_mut() = Some(data.clone()); + let _ = reply.send(data.frame); + continue; + } } if reply.is_closed() { @@ -443,26 +487,47 @@ impl FfmpegDecoder { sw_frames = sw_this.frames(); *sw_last_sent_frame.borrow_mut() = None; sw_cache.clear(); + sw_prev_vended = None; } let mut exit = false; + // Newest vended frame below the cache window: after a reset + // the reader re-vends from the keyframe before the request, + // and that frame is the true VFR hold for a request inside + // a pts hole. + let mut hold_candidate: Option<(u32, CachedFrame)> = None; for frame in &mut sw_frames { if reply_cell.borrow().as_ref().is_none_or(|r| r.is_closed()) { respond.take(); + // The frame just pulled is discarded: it breaks + // the vend continuity that hole discovery + // relies on. + sw_prev_vended = None; break; } let Ok(frame) = frame.map_err(|e| format!("read frame / {e}")) else { + // A skipped frame breaks the vend continuity + // that hole discovery relies on. + sw_prev_vended = None; continue; }; let Some(pts) = frame.pts() else { + sw_prev_vended = None; continue; }; let current_frame = pts_to_frame(pts - sw_start_time, sw_time_base, fps); + if let Some(prev) = sw_prev_vended + && current_frame > prev + 1 + { + record_pts_hole(&mut sw_pts_holes, prev, current_frame); + } + sw_prev_vended = Some(current_frame); + let mut cache_frame = CachedFrame::Raw { frame, number: current_frame, @@ -476,16 +541,19 @@ impl FfmpegDecoder { let exceeds_cache_bounds = current_frame > cache_max; let too_small_for_cache_bounds = current_frame < cache_min; - let cache_frame = if !too_small_for_cache_bounds { - cache_frame.produce(&mut sw_converter); - - if current_frame == requested_frame - && let Some(respond) = respond.take() - { - let output = cache_frame.produce(&mut sw_converter); - (respond)(output); - break; + if too_small_for_cache_bounds { + // Keep the newest pre-request frame as the VFR hold + // candidate instead of discarding it; a frame below + // the cache window can never exceed the request, so + // nothing else in this iteration applies to it. + if current_frame <= requested_frame { + hold_candidate = Some((current_frame, cache_frame)); } + continue; + } + + { + cache_frame.produce(&mut sw_converter); if sw_cache.len() >= FRAME_CACHE_SIZE { if let Some(last_active_frame) = &sw_last_active_frame { @@ -505,28 +573,90 @@ impl FfmpegDecoder { } sw_cache.insert(current_frame, cache_frame); - sw_cache.get_mut(¤t_frame).unwrap() - } else { - &mut cache_frame - }; - if current_frame > requested_frame && respond.is_some() { - let last_sent_frame_clone = sw_last_sent_frame.borrow().clone(); + // Serve exact matches from the cache so + // sequentially played frames stay available as + // at-or-before holds for later gap requests. + if current_frame == requested_frame + && let Some(respond) = respond.take() + { + let output = sw_cache + .get_mut(¤t_frame) + .unwrap() + .produce(&mut sw_converter); + (respond)(output); + break; + } + } - if let Some((respond, last_frame)) = last_sent_frame_clone - .filter(|l| { - l.number <= requested_frame - && requested_frame.saturating_sub(l.number) - <= MAX_FRAME_FALLBACK_DISTANCE + if current_frame > requested_frame && respond.is_some() { + // A frame at-or-before the request is the true content for + // that time in a VFR recording: a pts gap means the frame + // stayed on screen, however long the gap is. Prefer the + // newest such frame among the cache, the hold candidate a + // reset re-vended, and the persistent gap hold. + let cached_before = sw_cache + .range(..=requested_frame) + .next_back() + .map(|(&n, _)| n); + let candidate_number = hold_candidate + .as_ref() + .map(|(n, _)| *n) + .filter(|&n| n <= requested_frame); + let hold_before = sw_gap_hold.clone().filter(|h| { + sw_pts_holes.get(&h.number).is_some_and(|&end| { + h.number <= requested_frame && requested_frame < end }) - .and_then(|l| Some((respond.take()?, l))) + }); + let last_before = sw_last_sent_frame + .borrow() + .as_ref() + .filter(|l| l.number <= requested_frame) + .cloned(); + let best_number = [ + cached_before, + candidate_number, + hold_before.as_ref().map(|h| h.number), + last_before.as_ref().map(|l| l.number), + ] + .into_iter() + .flatten() + .max(); + + if let Some(best_number) = best_number + && let Some(respond) = respond.take() { - (respond)(last_frame); + let output = if cached_before == Some(best_number) { + sw_cache + .get_mut(&best_number) + .unwrap() + .produce(&mut sw_converter) + } else if candidate_number == Some(best_number) { + let (_, mut candidate) = hold_candidate.take().unwrap(); + candidate.produce(&mut sw_converter) + } else if hold_before.as_ref().map(|h| h.number) + == Some(best_number) + { + hold_before.unwrap() + } else { + last_before.unwrap() + }; + sw_gap_hold = Some(output.clone()); + (respond)(output); } else if let Some(respond) = respond.take() { - let output = cache_frame.produce(&mut sw_converter); - *sw_last_sent_frame.borrow_mut() = Some(output.clone()); + let output = sw_cache + .get_mut(¤t_frame) + .unwrap() + .produce(&mut sw_converter); (respond)(output); } + + // The request is answered; stop here so the + // next sample stays in the decoder for the + // next request. Pulling it now would discard + // it at the reply guard and poison the vend + // continuity that hole discovery relies on. + break; } exit = exit || exceeds_cache_bounds; @@ -539,18 +669,55 @@ impl FfmpegDecoder { sw_last_active_frame = Some(requested_frame); if let Some(respond) = respond.take() { - let best_cached = sw_cache + // The newest frame at-or-before the request is always a + // valid VFR hold, regardless of how far back it is — + // whether it is cached, was re-vended below the cache + // window by a reset, or is the persistent gap hold. + let cached_before = sw_cache .range(..=requested_frame) .next_back() - .filter(|(k, _)| { - requested_frame.saturating_sub(**k) - <= MAX_FRAME_FALLBACK_DISTANCE + .map(|(&n, _)| n); + let candidate_number = hold_candidate + .as_ref() + .map(|(n, _)| *n) + .filter(|&n| n <= requested_frame); + let hold_before = sw_gap_hold.clone().filter(|h| { + sw_pts_holes.get(&h.number).is_some_and(|&end| { + h.number <= requested_frame && requested_frame < end }) - .map(|(_, v)| v); - - if let Some(cached) = best_cached { - let output = cached.clone().produce(&mut sw_converter); - *sw_last_sent_frame.borrow_mut() = Some(output.clone()); + }); + let last_before = sw_last_sent_frame + .borrow() + .as_ref() + .filter(|l| l.number <= requested_frame) + .cloned(); + let best_number = [ + cached_before, + candidate_number, + hold_before.as_ref().map(|h| h.number), + last_before.as_ref().map(|l| l.number), + ] + .into_iter() + .flatten() + .max(); + + if let Some(best_number) = best_number { + let output = if cached_before == Some(best_number) { + sw_cache + .get_mut(&best_number) + .unwrap() + .produce(&mut sw_converter) + } else if candidate_number == Some(best_number) { + let (_, mut candidate) = hold_candidate.take().unwrap(); + candidate.produce(&mut sw_converter) + } else if hold_before.as_ref().map(|h| h.number) + == Some(best_number) + { + hold_before.unwrap() + } else { + last_before.unwrap() + }; + sw_gap_hold = Some(output.clone()); (respond)(output); } else { let last_frame_clone = sw_last_sent_frame.borrow().clone(); @@ -592,6 +759,9 @@ impl FfmpegDecoder { cache.insert(current_frame, cache_frame); *first_ever_frame.borrow_mut() = Some(output.clone()); *last_sent_frame.borrow_mut() = Some(output); + // The pre-decoded frame is a real vend: without seeding this, + // an opening static hold would never be recorded. + prev_vended = Some(current_frame); info!( "FFmpeg decoder '{}': pre-decoded first frame {} ({}x{})", name, current_frame, video_width, video_height @@ -701,6 +871,34 @@ impl FfmpegDecoder { frames = this.frames(); *last_sent_frame.borrow_mut() = None; cache.clear(); + prev_vended = None; + } + + // Requests inside a KNOWN pts hole are answered with the + // hole's start frame — the true VFR hold (the frame simply + // stayed on screen) — without touching the decoder. This + // keeps the post-hole frames cached and the reader parked, + // however long the hole runs. Only recorded holes qualify: a + // bare "some cached frame lies beyond the request" test would + // also match disjoint cache islands left by seeks, and + // serving stale content there would freeze playback. + if !is_backward_seek + && let Some(hole_start) = pts_holes + .range(..=requested_frame) + .next_back() + .filter(|&(_, &end)| requested_frame < end) + .map(|(&start, _)| start) + { + let data = cache + .get_mut(&hole_start) + .map(|c| c.produce(&mut converter)) + .or_else(|| gap_hold.clone().filter(|h| h.number == hole_start)); + if let Some(data) = data { + gap_hold = Some(data.clone()); + *last_sent_frame.borrow_mut() = Some(data.clone()); + let _ = reply.send(data.frame); + continue; + } } if reply.is_closed() { @@ -752,25 +950,45 @@ impl FfmpegDecoder { frames = this.frames(); *last_sent_frame.borrow_mut() = None; cache.clear(); + prev_vended = None; } let mut exit = false; + // Newest vended frame below the cache window: after a reset + // the reader re-vends from the keyframe before the request, + // and that frame is the true VFR hold for a request inside + // a pts hole. + let mut hold_candidate: Option<(u32, CachedFrame)> = None; for frame in &mut frames { if reply_cell.borrow().as_ref().is_none_or(|r| r.is_closed()) { respond.take(); + // The frame just pulled is discarded: it breaks the + // vend continuity that hole discovery relies on. + prev_vended = None; break; } let Ok(frame) = frame.map_err(|e| format!("read frame / {e}")) else { + // A skipped frame breaks the vend continuity that + // hole discovery relies on. + prev_vended = None; continue; }; let Some(pts) = frame.pts() else { + prev_vended = None; continue; }; let current_frame = pts_to_frame(pts - start_time, time_base, fps); + if let Some(prev) = prev_vended + && current_frame > prev + 1 + { + record_pts_hole(&mut pts_holes, prev, current_frame); + } + prev_vended = Some(current_frame); + let mut cache_frame = CachedFrame::Raw { frame, number: current_frame, @@ -784,17 +1002,19 @@ impl FfmpegDecoder { let exceeds_cache_bounds = current_frame > cache_max; let too_small_for_cache_bounds = current_frame < cache_min; - let cache_frame = if !too_small_for_cache_bounds { - cache_frame.produce(&mut converter); - - if current_frame == requested_frame - && let Some(respond) = respond.take() - { - let output = cache_frame.produce(&mut converter); - (respond)(output); - - break; + if too_small_for_cache_bounds { + // Keep the newest pre-request frame as the VFR hold + // candidate instead of discarding it; a frame below + // the cache window can never exceed the request, so + // nothing else in this iteration applies to it. + if current_frame <= requested_frame { + hold_candidate = Some((current_frame, cache_frame)); } + continue; + } + + { + cache_frame.produce(&mut converter); if cache.len() >= FRAME_CACHE_SIZE { if let Some(last_active_frame) = &last_active_frame { @@ -816,28 +1036,85 @@ impl FfmpegDecoder { } cache.insert(current_frame, cache_frame); - cache.get_mut(¤t_frame).unwrap() - } else { - &mut cache_frame - }; - if current_frame > requested_frame && respond.is_some() { - let last_sent_frame_clone = last_sent_frame.borrow().clone(); + // Serve exact matches from the cache so + // sequentially played frames stay available as + // at-or-before holds for later gap requests. + if current_frame == requested_frame + && let Some(respond) = respond.take() + { + let output = cache + .get_mut(¤t_frame) + .unwrap() + .produce(&mut converter); + (respond)(output); + break; + } + } - if let Some((respond, last_frame)) = last_sent_frame_clone - .filter(|l| { - l.number <= requested_frame - && requested_frame.saturating_sub(l.number) - <= MAX_FRAME_FALLBACK_DISTANCE + if current_frame > requested_frame && respond.is_some() { + // A frame at-or-before the request is the true content for + // that time in a VFR recording: a pts gap means the frame + // stayed on screen, however long the gap is. Prefer the + // newest such frame among the cache, the hold candidate a + // reset re-vended, and the persistent gap hold. + let cached_before = + cache.range(..=requested_frame).next_back().map(|(&n, _)| n); + let candidate_number = hold_candidate + .as_ref() + .map(|(n, _)| *n) + .filter(|&n| n <= requested_frame); + let hold_before = gap_hold.clone().filter(|h| { + pts_holes.get(&h.number).is_some_and(|&end| { + h.number <= requested_frame && requested_frame < end }) - .and_then(|l| Some((respond.take()?, l))) + }); + let last_before = last_sent_frame + .borrow() + .as_ref() + .filter(|l| l.number <= requested_frame) + .cloned(); + let best_number = [ + cached_before, + candidate_number, + hold_before.as_ref().map(|h| h.number), + last_before.as_ref().map(|l| l.number), + ] + .into_iter() + .flatten() + .max(); + + if let Some(best_number) = best_number + && let Some(respond) = respond.take() { - (respond)(last_frame); + let output = if cached_before == Some(best_number) { + cache.get_mut(&best_number).unwrap().produce(&mut converter) + } else if candidate_number == Some(best_number) { + let (_, mut candidate) = hold_candidate.take().unwrap(); + candidate.produce(&mut converter) + } else if hold_before.as_ref().map(|h| h.number) + == Some(best_number) + { + hold_before.unwrap() + } else { + last_before.unwrap() + }; + gap_hold = Some(output.clone()); + (respond)(output); } else if let Some(respond) = respond.take() { - let output = cache_frame.produce(&mut converter); - *last_sent_frame.borrow_mut() = Some(output.clone()); + let output = cache + .get_mut(¤t_frame) + .unwrap() + .produce(&mut converter); (respond)(output); } + + // The request is answered; stop here so the next + // sample stays in the decoder for the next + // request. Pulling it now would discard it at the + // reply guard and poison the vend continuity that + // hole discovery relies on. + break; } exit = exit || exceeds_cache_bounds; @@ -850,17 +1127,48 @@ impl FfmpegDecoder { last_active_frame = Some(requested_frame); if let Some(respond) = respond.take() { - let best_cached = cache - .range(..=requested_frame) - .next_back() - .filter(|(k, _)| { - requested_frame.saturating_sub(**k) <= MAX_FRAME_FALLBACK_DISTANCE + // The newest frame at-or-before the request is always a valid + // VFR hold, regardless of how far back it is — whether it is + // cached, was re-vended below the cache window by a reset, or + // is the persistent gap hold. + let cached_before = + cache.range(..=requested_frame).next_back().map(|(&n, _)| n); + let candidate_number = hold_candidate + .as_ref() + .map(|(n, _)| *n) + .filter(|&n| n <= requested_frame); + let hold_before = gap_hold.clone().filter(|h| { + pts_holes.get(&h.number).is_some_and(|&end| { + h.number <= requested_frame && requested_frame < end }) - .map(|(_, v)| v); - - if let Some(cached) = best_cached { - let output = cached.clone().produce(&mut converter); - *last_sent_frame.borrow_mut() = Some(output.clone()); + }); + let last_before = last_sent_frame + .borrow() + .as_ref() + .filter(|l| l.number <= requested_frame) + .cloned(); + let best_number = [ + cached_before, + candidate_number, + hold_before.as_ref().map(|h| h.number), + last_before.as_ref().map(|l| l.number), + ] + .into_iter() + .flatten() + .max(); + + if let Some(best_number) = best_number { + let output = if cached_before == Some(best_number) { + cache.get_mut(&best_number).unwrap().produce(&mut converter) + } else if candidate_number == Some(best_number) { + let (_, mut candidate) = hold_candidate.take().unwrap(); + candidate.produce(&mut converter) + } else if hold_before.as_ref().map(|h| h.number) == Some(best_number) { + hold_before.unwrap() + } else { + last_before.unwrap() + }; + gap_hold = Some(output.clone()); (respond)(output); } else { let last_frame_clone = last_sent_frame.borrow().clone(); diff --git a/crates/rendering/src/decoder/mod.rs b/crates/rendering/src/decoder/mod.rs index 44456cea07..8cf323adc8 100644 --- a/crates/rendering/src/decoder/mod.rs +++ b/crates/rendering/src/decoder/mod.rs @@ -449,6 +449,27 @@ pub fn pts_to_frame(pts: i64, time_base: Rational, fps: u32) -> u32 { pub const FRAME_CACHE_SIZE: usize = 90; const DEFAULT_MAX_FALLBACK_DISTANCE: u32 = 90; +/// Records a pts hole discovered from a decode-order vend jump (frames vend +/// in pts order, so a jump means no samples exist in between). The map stays +/// bounded by dropping the narrowest hole — wide static-screen holds matter +/// most. +pub(super) fn record_pts_hole( + holes: &mut std::collections::BTreeMap, + start: u32, + end: u32, +) { + const MAX_TRACKED_HOLES: usize = 64; + holes.insert(start, end); + if holes.len() > MAX_TRACKED_HOLES + && let Some(narrowest) = holes + .iter() + .min_by_key(|&(&s, &e)| e.saturating_sub(s)) + .map(|(&s, _)| s) + { + holes.remove(&narrowest); + } +} + #[derive(Clone)] pub struct AsyncVideoDecoderHandle { sender: mpsc::Sender, @@ -505,6 +526,10 @@ impl AsyncVideoDecoderHandle { )) .is_err() { + tracing::warn!( + time = adjusted_time, + "decoder thread is gone; frame request dropped" + ); return None; } diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs index e46b0f68ef..f5b3edb9c5 100644 --- a/crates/rendering/src/lib.rs +++ b/crates/rendering/src/lib.rs @@ -393,6 +393,10 @@ impl RecordingSegmentDecoders { let camera_frame = camera.flatten(); + if screen.is_none() { + tracing::warn!(segment_time, "screen decoder returned no frame"); + } + Some(DecodedSegmentFrames { screen_frame: Some(screen?), camera_frame,