From d0657fb4858cdeab64ba0635b03a2d375fe7927d Mon Sep 17 00:00:00 2001 From: Charles Vien Date: Sat, 6 Jun 2026 16:37:28 -0700 Subject: [PATCH 1/4] enrich renderer crash events for error tracking --- apps/code/src/main/index.ts | 42 +++++++++++----- .../src/main/services/posthog-analytics.ts | 26 ++++++++++ apps/code/src/main/trpc/routers/analytics.ts | 10 ++++ .../src/main/utils/crash-diagnostics.test.ts | 48 ++++++++++++++++++ apps/code/src/main/utils/crash-diagnostics.ts | 49 +++++++++++++++++++ apps/code/src/main/utils/uuidv7.test.ts | 29 +++++++++++ apps/code/src/main/utils/uuidv7.ts | 29 +++++++++++ apps/code/src/main/window.ts | 5 ++ apps/code/src/renderer/App.tsx | 26 +++++++--- apps/code/src/renderer/utils/analytics.ts | 13 ++++- 10 files changed, 258 insertions(+), 19 deletions(-) create mode 100644 apps/code/src/main/utils/crash-diagnostics.test.ts create mode 100644 apps/code/src/main/utils/crash-diagnostics.ts create mode 100644 apps/code/src/main/utils/uuidv7.test.ts create mode 100644 apps/code/src/main/utils/uuidv7.ts diff --git a/apps/code/src/main/index.ts b/apps/code/src/main/index.ts index 6a005d365e..be56180350 100644 --- a/apps/code/src/main/index.ts +++ b/apps/code/src/main/index.ts @@ -30,6 +30,7 @@ import type { SuspensionService } from "./services/suspension/service"; import type { TaskLinkService } from "./services/task-link/service"; import type { UpdatesService } from "./services/updates/service"; import type { WorkspaceService } from "./services/workspace/service"; +import { collectMemorySnapshot } from "./utils/crash-diagnostics"; import { ensureClaudeConfigDir } from "./utils/env"; import { getChromiumLogFilePath, @@ -72,6 +73,8 @@ function isCrashLoop(): boolean { } app.on("render-process-gone", (_event, webContents, details) => { + const memory = collectMemorySnapshot(() => app.getAppMetrics()); + const chromiumLogTail = readChromiumLogTail(); const props = { source: "main", type: "render-process-gone", @@ -80,15 +83,20 @@ app.on("render-process-gone", (_event, webContents, details) => { url: webContents.getURL(), title: webContents.getTitle(), webContentsId: String(webContents.id), + appUptimeSeconds: Math.round(process.uptime()), + memoryTotalWorkingSetKb: memory?.totalWorkingSetKb, + memoryPeakWorkingSetKb: memory?.peakWorkingSetKb, + memoryProcessCount: memory?.processCount, + memoryByType: memory ? JSON.stringify(memory.byType) : undefined, }; - log.error("Renderer process gone", { + log.error("Renderer process gone", { ...props, chromiumLogTail }); + captureException(new Error(`Renderer process gone: ${details.reason}`), { ...props, - chromiumLogTail: readChromiumLogTail(), + chromiumLogTail, + // Stack is always this handler, so default grouping collapses every + // renderer death into one issue. Split by reason instead. + $exception_fingerprint: ["render-process-gone", details.reason], }); - captureException( - new Error(`Renderer process gone: ${details.reason}`), - props, - ); getPostHogClient() ?.flush() .catch(() => {}); @@ -121,6 +129,8 @@ app.on("render-process-gone", (_event, webContents, details) => { }); app.on("child-process-gone", (_event, details) => { + const memory = collectMemorySnapshot(() => app.getAppMetrics()); + const chromiumLogTail = readChromiumLogTail(); const props = { source: "main", type: "child-process-gone", @@ -129,14 +139,24 @@ app.on("child-process-gone", (_event, details) => { exitCode: String(details.exitCode), serviceName: details.serviceName ?? "", name: details.name ?? "", + appUptimeSeconds: Math.round(process.uptime()), + memoryTotalWorkingSetKb: memory?.totalWorkingSetKb, + memoryPeakWorkingSetKb: memory?.peakWorkingSetKb, + memoryProcessCount: memory?.processCount, + memoryByType: memory ? JSON.stringify(memory.byType) : undefined, }; - log.error("Child process gone", { - ...props, - chromiumLogTail: readChromiumLogTail(), - }); + log.error("Child process gone", { ...props, chromiumLogTail }); captureException( new Error(`Child process gone (${details.type}): ${details.reason}`), - props, + { + ...props, + chromiumLogTail, + $exception_fingerprint: [ + "child-process-gone", + details.type, + details.reason, + ], + }, ); getPostHogClient() ?.flush() diff --git a/apps/code/src/main/services/posthog-analytics.ts b/apps/code/src/main/services/posthog-analytics.ts index 6eb43841e3..9a235093f1 100644 --- a/apps/code/src/main/services/posthog-analytics.ts +++ b/apps/code/src/main/services/posthog-analytics.ts @@ -1,8 +1,10 @@ import { PostHog } from "posthog-node"; import { getAppVersion } from "../utils/env"; +import { uuidv7 } from "../utils/uuidv7"; let posthogClient: PostHog | null = null; let currentUserId: string | null = null; +let sessionId: string | null = null; export function initializePostHog() { if (posthogClient) { @@ -32,6 +34,29 @@ export function getCurrentUserId() { return currentUserId; } +/** + * The PostHog session id is OWNED BY MAIN. Main mints one UUIDv7 and every + * renderer window bootstraps posthog-js with it (`bootstrap.sessionID`). + * Because main outlives the renderer, the id stays stable across a renderer + * crash + reload, so the replay is one continuous session spanning the crash + * and main-captured crash events (the renderer can't report its own OOM) + * always carry the right `$session_id` with no race or hand-off. + * + * Minted lazily on first request (a window asks at boot, before posthog-js + * init) so its UUIDv7 timestamp precedes the session's first event, as + * posthog-js requires. + */ +export function getOrCreateSessionId(): string { + if (!sessionId) { + sessionId = uuidv7(); + } + return sessionId; +} + +export function getSessionId(): string | null { + return sessionId; +} + export function trackAppEvent( eventName: string, properties?: Record, @@ -96,6 +121,7 @@ export function captureException( const distinctId = currentUserId || "anonymous-app-event"; posthogClient.captureException(error, distinctId, { team: "posthog-code", + ...(sessionId ? { $session_id: sessionId } : {}), ...additionalProperties, app_version: getAppVersion(), }); diff --git a/apps/code/src/main/trpc/routers/analytics.ts b/apps/code/src/main/trpc/routers/analytics.ts index c34744f64b..74abc478d8 100644 --- a/apps/code/src/main/trpc/routers/analytics.ts +++ b/apps/code/src/main/trpc/routers/analytics.ts @@ -1,5 +1,6 @@ import { z } from "zod"; import { + getOrCreateSessionId, identifyUser, resetUser, setCurrentUserId, @@ -29,6 +30,15 @@ export const analyticsRouter = router({ } }), + /** + * Return the main-owned session id for a window to bootstrap posthog-js with + * (`bootstrap.sessionID`). Main owns it so crash events captured from main + * link to the right replay, and the id survives renderer crash+reload. + */ + getSessionId: publicProcedure + .output(z.object({ sessionId: z.string() })) + .query(() => ({ sessionId: getOrCreateSessionId() })), + /** * Reset the current user (on logout) */ diff --git a/apps/code/src/main/utils/crash-diagnostics.test.ts b/apps/code/src/main/utils/crash-diagnostics.test.ts new file mode 100644 index 0000000000..1f7c5441d2 --- /dev/null +++ b/apps/code/src/main/utils/crash-diagnostics.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, it } from "vitest"; +import { collectMemorySnapshot } from "./crash-diagnostics"; + +function metric( + type: string, + workingSetSize: number, + peakWorkingSetSize: number, +): Electron.ProcessMetric { + return { + type, + memory: { workingSetSize, peakWorkingSetSize, privateBytes: 0 }, + } as unknown as Electron.ProcessMetric; +} + +describe("collectMemorySnapshot", () => { + it("sums working set, tracks peak, and groups by process type", () => { + const snapshot = collectMemorySnapshot(() => [ + metric("Browser", 100, 150), + metric("Tab", 200, 500), + metric("Tab", 50, 60), + metric("GPU", 80, 90), + ]); + + expect(snapshot).toEqual({ + totalWorkingSetKb: 430, + peakWorkingSetKb: 500, + processCount: 4, + byType: { Browser: 100, Tab: 250, GPU: 80 }, + }); + }); + + it("returns a zeroed snapshot for no processes", () => { + expect(collectMemorySnapshot(() => [])).toEqual({ + totalWorkingSetKb: 0, + peakWorkingSetKb: 0, + processCount: 0, + byType: {}, + }); + }); + + it("returns undefined instead of throwing (crash handler must not fail)", () => { + expect( + collectMemorySnapshot(() => { + throw new Error("getAppMetrics unavailable"); + }), + ).toBeUndefined(); + }); +}); diff --git a/apps/code/src/main/utils/crash-diagnostics.ts b/apps/code/src/main/utils/crash-diagnostics.ts new file mode 100644 index 0000000000..5b915a0c43 --- /dev/null +++ b/apps/code/src/main/utils/crash-diagnostics.ts @@ -0,0 +1,49 @@ +export interface MemorySnapshot { + totalWorkingSetKb: number; + peakWorkingSetKb: number; + processCount: number; + byType: Record; +} + +/** + * Summarize per-process memory (from `app.getAppMetrics()`, passed in by the + * caller so this stays free of a direct `electron` import) for crash + * diagnostics. Working-set sizes are in KB. Attached to renderer/child crash + * events so PostHog Error Tracking can show whether the app was under memory + * pressure: a hard OOM kills the renderer before it can log anything, so the + * chromium log usually goes silent and this is the only reliable signal. + * + * Defensive on purpose: a throw here would run before the crash handler's + * auto-recovery reload, so failures return `undefined` instead. + * + * Caveat: at `render-process-gone` time the dead renderer is already gone from + * the metrics, so the `Tab` total understates the renderer's real peak. The + * `unresponsive` sample (renderer still alive) is the more telling one. + */ +export function collectMemorySnapshot( + getMetrics: () => Electron.ProcessMetric[], +): MemorySnapshot | undefined { + try { + const metrics = getMetrics(); + let totalWorkingSetKb = 0; + let peakWorkingSetKb = 0; + const byType: Record = {}; + for (const metric of metrics) { + const workingSet = metric.memory.workingSetSize; + totalWorkingSetKb += workingSet; + peakWorkingSetKb = Math.max( + peakWorkingSetKb, + metric.memory.peakWorkingSetSize, + ); + byType[metric.type] = (byType[metric.type] ?? 0) + workingSet; + } + return { + totalWorkingSetKb, + peakWorkingSetKb, + processCount: metrics.length, + byType, + }; + } catch { + return undefined; + } +} diff --git a/apps/code/src/main/utils/uuidv7.test.ts b/apps/code/src/main/utils/uuidv7.test.ts new file mode 100644 index 0000000000..2df896d199 --- /dev/null +++ b/apps/code/src/main/utils/uuidv7.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, it } from "vitest"; +import { uuidv7 } from "./uuidv7"; + +const UUID_V7 = + /^[0-9a-f]{8}-[0-9a-f]{4}-7[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/; + +describe("uuidv7", () => { + it("produces a valid v7 string (version nibble 7, variant 10)", () => { + for (let i = 0; i < 100; i++) { + expect(uuidv7()).toMatch(UUID_V7); + } + }); + + it("encodes the current time so ids sort in creation order", () => { + const before = Date.now(); + const id = uuidv7(); + const after = Date.now(); + + // First 48 bits (first 12 hex chars, minus the dash) are the unix-ms stamp. + const stampMs = Number.parseInt(id.slice(0, 8) + id.slice(9, 13), 16); + expect(stampMs).toBeGreaterThanOrEqual(before); + expect(stampMs).toBeLessThanOrEqual(after); + }); + + it("is unique across rapid calls", () => { + const ids = new Set(Array.from({ length: 1000 }, () => uuidv7())); + expect(ids.size).toBe(1000); + }); +}); diff --git a/apps/code/src/main/utils/uuidv7.ts b/apps/code/src/main/utils/uuidv7.ts new file mode 100644 index 0000000000..4983ca679f --- /dev/null +++ b/apps/code/src/main/utils/uuidv7.ts @@ -0,0 +1,29 @@ +import { randomBytes } from "node:crypto"; + +/** + * Generate a UUIDv7 (time-ordered, RFC 9562). posthog-js requires this exact + * format for `bootstrap.sessionID`: a valid v7 whose 48-bit timestamp precedes + * the session's first event. Main mints it before any window starts posthog-js, + * so the ordering holds. + * + * Layout: 48-bit big-endian unix-ms timestamp, 4-bit version (7), 2-bit variant + * (10), 74 random bits. Hand-rolled to avoid a phantom dependency on `uuid` + * (transitive only, and several major versions resolve in the tree). + */ +export function uuidv7(): string { + const bytes = randomBytes(16); + const timestamp = Date.now(); + + bytes[0] = Math.floor(timestamp / 2 ** 40) & 0xff; + bytes[1] = Math.floor(timestamp / 2 ** 32) & 0xff; + bytes[2] = Math.floor(timestamp / 2 ** 24) & 0xff; + bytes[3] = Math.floor(timestamp / 2 ** 16) & 0xff; + bytes[4] = Math.floor(timestamp / 2 ** 8) & 0xff; + bytes[5] = timestamp & 0xff; + + bytes[6] = (bytes[6] & 0x0f) | 0x70; // version 7 + bytes[8] = (bytes[8] & 0x3f) | 0x80; // variant 10 + + const hex = bytes.toString("hex"); + return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20)}`; +} diff --git a/apps/code/src/main/window.ts b/apps/code/src/main/window.ts index 10aa4699d0..aa44263a46 100644 --- a/apps/code/src/main/window.ts +++ b/apps/code/src/main/window.ts @@ -14,6 +14,7 @@ import { MAIN_TOKENS } from "./di/tokens"; import { buildApplicationMenu } from "./menu"; import type { ElectronMainWindow } from "./platform-adapters/electron-main-window"; import { trpcRouter } from "./trpc/router"; +import { collectMemorySnapshot } from "./utils/crash-diagnostics"; import { isDevBuild } from "./utils/env"; import { logger, readChromiumLogTail } from "./utils/logger"; import { type WindowStateSchema, windowStateStore } from "./utils/store"; @@ -110,13 +111,17 @@ function setupCrashLogging(window: BrowserWindow): void { reason: details.reason, exitCode: details.exitCode, url: window.webContents.getURL(), + memory: collectMemorySnapshot(() => app.getAppMetrics()), chromiumLogTail: readChromiumLogTail(), }); }); + // Unresponsive often precedes an OOM kill, and here the renderer is still + // alive, so this memory sample reflects its real (bloated) footprint. window.on("unresponsive", () => { log.warn("Window unresponsive", { url: window.webContents.getURL(), + memory: collectMemorySnapshot(() => app.getAppMetrics()), chromiumLogTail: readChromiumLogTail(), }); }); diff --git a/apps/code/src/renderer/App.tsx b/apps/code/src/renderer/App.tsx index 3335551a17..16473c38d7 100644 --- a/apps/code/src/renderer/App.tsx +++ b/apps/code/src/renderer/App.tsx @@ -53,13 +53,25 @@ function App() { // Initialize PostHog analytics and register the app version super property. useEffect(() => { - initializePostHog(); - trpcClient.os.getAppVersion - .query() - .then(registerAppVersion) - .catch((error) => { - log.warn("Failed to register app version super property", { error }); - }); + // Fetch the main-owned session id BEFORE initializing posthog-js so the + // recording shares the id main stamps on crash events. Init is gated on it + // so the id is set before the first event (posthog-js requires the + // bootstrap id's timestamp to precede the session's first event). + void (async () => { + let sessionId: string | undefined; + try { + ({ sessionId } = await trpcClient.analytics.getSessionId.query()); + } catch (error) { + log.warn("Failed to fetch session id from main", { error }); + } + initializePostHog(sessionId); + trpcClient.os.getAppVersion + .query() + .then(registerAppVersion) + .catch((error) => { + log.warn("Failed to register app version super property", { error }); + }); + })(); }, []); // Initialize connectivity monitoring diff --git a/apps/code/src/renderer/utils/analytics.ts b/apps/code/src/renderer/utils/analytics.ts index d17665203f..8e70e70ef5 100644 --- a/apps/code/src/renderer/utils/analytics.ts +++ b/apps/code/src/renderer/utils/analytics.ts @@ -36,7 +36,12 @@ type PendingFlagListener = { // Subscribers added before initializePostHog runs. const pendingFlagListeners = new Set(); -export function initializePostHog() { +/** + * @param sessionId Main-owned session id (UUIDv7) to bootstrap posthog-js with, + * so the recording shares the id main stamps on crash events. Main owns it so + * it survives a renderer crash+reload as one continuous session. + */ +export function initializePostHog(sessionId?: string) { const apiKey = import.meta.env.VITE_POSTHOG_API_KEY; const apiHost = import.meta.env.VITE_POSTHOG_API_HOST || "https://internal-c.posthog.com"; @@ -51,6 +56,12 @@ export function initializePostHog() { api_host: apiHost, ui_host: uiHost, disable_session_recording: false, + // Hold the session open through long idle (max posthog-js allows) so its + // own rotation doesn't replace main's bootstrapped id mid-run. This app + // sits idle for hours with background tasks, which is exactly when a + // shorter timeout would silently rotate and break crash->replay linking. + session_idle_timeout_seconds: 36000, + ...(sessionId ? { bootstrap: { sessionID: sessionId } } : {}), capture_exceptions: import.meta.env.DEV ? false : { From 633586253d1db8f92292862ae8b0a54b8c966dd2 Mon Sep 17 00:00:00 2001 From: Charles Vien Date: Sat, 6 Jun 2026 17:11:51 -0700 Subject: [PATCH 2/4] protect session id and mint before first window --- apps/code/src/main/services/posthog-analytics.test.ts | 9 +++++++++ apps/code/src/main/services/posthog-analytics.ts | 9 ++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/apps/code/src/main/services/posthog-analytics.test.ts b/apps/code/src/main/services/posthog-analytics.test.ts index 64d746d40f..ef9501ff31 100644 --- a/apps/code/src/main/services/posthog-analytics.test.ts +++ b/apps/code/src/main/services/posthog-analytics.test.ts @@ -97,4 +97,13 @@ describe("posthog-analytics", () => { }), ); }); + + it("stamps the main-owned session id and ignores a caller override", () => { + captureException(new Error("boom"), { $session_id: "spoofed" }); + + const props = mockCaptureException.mock.calls.at(-1)?.[2]; + expect(props.$session_id).toMatch( + /^[0-9a-f]{8}-[0-9a-f]{4}-7[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/, + ); + }); }); diff --git a/apps/code/src/main/services/posthog-analytics.ts b/apps/code/src/main/services/posthog-analytics.ts index 9a235093f1..c6ca48072a 100644 --- a/apps/code/src/main/services/posthog-analytics.ts +++ b/apps/code/src/main/services/posthog-analytics.ts @@ -23,6 +23,11 @@ export function initializePostHog() { enableExceptionAutocapture: true, }); + // Mint the main-owned session id now, before the first window, so crash + // handlers can stamp $session_id even when the renderer crashes during + // startup (before it fetches the id to bootstrap posthog-js). + getOrCreateSessionId(); + return posthogClient; } @@ -121,8 +126,10 @@ export function captureException( const distinctId = currentUserId || "anonymous-app-event"; posthogClient.captureException(error, distinctId, { team: "posthog-code", - ...(sessionId ? { $session_id: sessionId } : {}), ...additionalProperties, + // System-owned fields last so callers can't overwrite them: main owns the + // session id used for crash->replay linking. + ...(sessionId ? { $session_id: sessionId } : {}), app_version: getAppVersion(), }); } From ed76b37646a8577f6ba4eef5f2b95f4c94426ac9 Mon Sep 17 00:00:00 2001 From: Charles Vien Date: Sun, 7 Jun 2026 17:52:43 -0700 Subject: [PATCH 3/4] dedupe crash diagnostics and add tests --- apps/code/src/main/index.ts | 38 +++++++++---------- .../main/services/posthog-analytics.test.ts | 12 +++++- .../src/main/services/posthog-analytics.ts | 4 -- .../src/main/utils/crash-diagnostics.test.ts | 27 ++++++++++++- apps/code/src/main/utils/crash-diagnostics.ts | 19 ++++++++++ apps/code/src/main/utils/uuidv7.test.ts | 14 ++++++- .../code/src/renderer/utils/analytics.test.ts | 25 ++++++++++++ apps/code/src/renderer/utils/analytics.ts | 13 ++++--- 8 files changed, 120 insertions(+), 32 deletions(-) diff --git a/apps/code/src/main/index.ts b/apps/code/src/main/index.ts index be56180350..038796addd 100644 --- a/apps/code/src/main/index.ts +++ b/apps/code/src/main/index.ts @@ -30,7 +30,10 @@ import type { SuspensionService } from "./services/suspension/service"; import type { TaskLinkService } from "./services/task-link/service"; import type { UpdatesService } from "./services/updates/service"; import type { WorkspaceService } from "./services/workspace/service"; -import { collectMemorySnapshot } from "./utils/crash-diagnostics"; +import { + collectMemorySnapshot, + flattenMemorySnapshot, +} from "./utils/crash-diagnostics"; import { ensureClaudeConfigDir } from "./utils/env"; import { getChromiumLogFilePath, @@ -72,9 +75,18 @@ function isCrashLoop(): boolean { return recentCrashTimestamps.length >= CRASH_LOOP_THRESHOLD; } +// Shared diagnostics attached to every crash event: uptime, the native chromium +// log tail, and flattened memory. A hard OOM kills the renderer before it can +// log, so the memory snapshot is often the only signal of what happened. +function crashDiagnostics() { + return { + appUptimeSeconds: Math.round(process.uptime()), + chromiumLogTail: readChromiumLogTail(), + ...flattenMemorySnapshot(collectMemorySnapshot(() => app.getAppMetrics())), + }; +} + app.on("render-process-gone", (_event, webContents, details) => { - const memory = collectMemorySnapshot(() => app.getAppMetrics()); - const chromiumLogTail = readChromiumLogTail(); const props = { source: "main", type: "render-process-gone", @@ -83,16 +95,11 @@ app.on("render-process-gone", (_event, webContents, details) => { url: webContents.getURL(), title: webContents.getTitle(), webContentsId: String(webContents.id), - appUptimeSeconds: Math.round(process.uptime()), - memoryTotalWorkingSetKb: memory?.totalWorkingSetKb, - memoryPeakWorkingSetKb: memory?.peakWorkingSetKb, - memoryProcessCount: memory?.processCount, - memoryByType: memory ? JSON.stringify(memory.byType) : undefined, + ...crashDiagnostics(), }; - log.error("Renderer process gone", { ...props, chromiumLogTail }); + log.error("Renderer process gone", props); captureException(new Error(`Renderer process gone: ${details.reason}`), { ...props, - chromiumLogTail, // Stack is always this handler, so default grouping collapses every // renderer death into one issue. Split by reason instead. $exception_fingerprint: ["render-process-gone", details.reason], @@ -129,8 +136,6 @@ app.on("render-process-gone", (_event, webContents, details) => { }); app.on("child-process-gone", (_event, details) => { - const memory = collectMemorySnapshot(() => app.getAppMetrics()); - const chromiumLogTail = readChromiumLogTail(); const props = { source: "main", type: "child-process-gone", @@ -139,18 +144,13 @@ app.on("child-process-gone", (_event, details) => { exitCode: String(details.exitCode), serviceName: details.serviceName ?? "", name: details.name ?? "", - appUptimeSeconds: Math.round(process.uptime()), - memoryTotalWorkingSetKb: memory?.totalWorkingSetKb, - memoryPeakWorkingSetKb: memory?.peakWorkingSetKb, - memoryProcessCount: memory?.processCount, - memoryByType: memory ? JSON.stringify(memory.byType) : undefined, + ...crashDiagnostics(), }; - log.error("Child process gone", { ...props, chromiumLogTail }); + log.error("Child process gone", props); captureException( new Error(`Child process gone (${details.type}): ${details.reason}`), { ...props, - chromiumLogTail, $exception_fingerprint: [ "child-process-gone", details.type, diff --git a/apps/code/src/main/services/posthog-analytics.test.ts b/apps/code/src/main/services/posthog-analytics.test.ts index ef9501ff31..1b27c241fb 100644 --- a/apps/code/src/main/services/posthog-analytics.test.ts +++ b/apps/code/src/main/services/posthog-analytics.test.ts @@ -10,6 +10,7 @@ vi.mock("posthog-node", () => ({ PostHog: MockPostHog })); import { captureException, + getOrCreateSessionId, initializePostHog, resetUser, shutdownPostHog, @@ -101,8 +102,15 @@ describe("posthog-analytics", () => { it("stamps the main-owned session id and ignores a caller override", () => { captureException(new Error("boom"), { $session_id: "spoofed" }); - const props = mockCaptureException.mock.calls.at(-1)?.[2]; - expect(props.$session_id).toMatch( + const [, , props] = mockCaptureException.mock.calls.at(-1) ?? []; + expect(props.$session_id).toBe(getOrCreateSessionId()); + }); + + it("mints a stable valid uuidv7 session id", () => { + const first = getOrCreateSessionId(); + + expect(getOrCreateSessionId()).toBe(first); + expect(first).toMatch( /^[0-9a-f]{8}-[0-9a-f]{4}-7[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/, ); }); diff --git a/apps/code/src/main/services/posthog-analytics.ts b/apps/code/src/main/services/posthog-analytics.ts index c6ca48072a..43d5d8ce17 100644 --- a/apps/code/src/main/services/posthog-analytics.ts +++ b/apps/code/src/main/services/posthog-analytics.ts @@ -58,10 +58,6 @@ export function getOrCreateSessionId(): string { return sessionId; } -export function getSessionId(): string | null { - return sessionId; -} - export function trackAppEvent( eventName: string, properties?: Record, diff --git a/apps/code/src/main/utils/crash-diagnostics.test.ts b/apps/code/src/main/utils/crash-diagnostics.test.ts index 1f7c5441d2..7a896108a1 100644 --- a/apps/code/src/main/utils/crash-diagnostics.test.ts +++ b/apps/code/src/main/utils/crash-diagnostics.test.ts @@ -1,5 +1,8 @@ import { describe, expect, it } from "vitest"; -import { collectMemorySnapshot } from "./crash-diagnostics"; +import { + collectMemorySnapshot, + flattenMemorySnapshot, +} from "./crash-diagnostics"; function metric( type: string, @@ -46,3 +49,25 @@ describe("collectMemorySnapshot", () => { ).toBeUndefined(); }); }); + +describe("flattenMemorySnapshot", () => { + it("flattens scalars and serializes byType for PostHog", () => { + expect( + flattenMemorySnapshot({ + totalWorkingSetKb: 430, + peakWorkingSetKb: 500, + processCount: 4, + byType: { Browser: 100, Tab: 250, GPU: 80 }, + }), + ).toEqual({ + memoryTotalWorkingSetKb: 430, + memoryPeakWorkingSetKb: 500, + memoryProcessCount: 4, + memoryByType: '{"Browser":100,"Tab":250,"GPU":80}', + }); + }); + + it("returns an empty object when no snapshot was collected", () => { + expect(flattenMemorySnapshot(undefined)).toEqual({}); + }); +}); diff --git a/apps/code/src/main/utils/crash-diagnostics.ts b/apps/code/src/main/utils/crash-diagnostics.ts index 5b915a0c43..ef13b7034a 100644 --- a/apps/code/src/main/utils/crash-diagnostics.ts +++ b/apps/code/src/main/utils/crash-diagnostics.ts @@ -47,3 +47,22 @@ export function collectMemorySnapshot( return undefined; } } + +/** + * Flatten a snapshot into scalar event properties for PostHog (which doesn't + * accept nested objects, so `byType` is serialized). Returns `{}` when no + * snapshot was collected, so it spreads cleanly into a crash event's props. + */ +export function flattenMemorySnapshot( + memory: MemorySnapshot | undefined, +): Record { + if (!memory) { + return {}; + } + return { + memoryTotalWorkingSetKb: memory.totalWorkingSetKb, + memoryPeakWorkingSetKb: memory.peakWorkingSetKb, + memoryProcessCount: memory.processCount, + memoryByType: JSON.stringify(memory.byType), + }; +} diff --git a/apps/code/src/main/utils/uuidv7.test.ts b/apps/code/src/main/utils/uuidv7.test.ts index 2df896d199..5dadfa8417 100644 --- a/apps/code/src/main/utils/uuidv7.test.ts +++ b/apps/code/src/main/utils/uuidv7.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import { uuidv7 } from "./uuidv7"; const UUID_V7 = @@ -26,4 +26,16 @@ describe("uuidv7", () => { const ids = new Set(Array.from({ length: 1000 }, () => uuidv7())); expect(ids.size).toBe(1000); }); + + it("writes the 48-bit millisecond timestamp big-endian into the first 6 bytes", () => { + vi.spyOn(Date, "now").mockReturnValue(0x0123456789ab); + try { + const id = uuidv7(); + // bytes 0-5 of 0x0123456789ab -> "01234567" then "89ab" + expect(id.slice(0, 8)).toBe("01234567"); + expect(id.slice(9, 13)).toBe("89ab"); + } finally { + vi.restoreAllMocks(); + } + }); }); diff --git a/apps/code/src/renderer/utils/analytics.test.ts b/apps/code/src/renderer/utils/analytics.test.ts index c0a329f038..1c29295fa1 100644 --- a/apps/code/src/renderer/utils/analytics.test.ts +++ b/apps/code/src/renderer/utils/analytics.test.ts @@ -148,4 +148,29 @@ describe("initializePostHog", () => { expect(mockPosthog.init).not.toHaveBeenCalled(); expect(mockPosthog.onFeatureFlags).not.toHaveBeenCalled(); }); + + it("bootstraps posthog with the main-owned session id", async () => { + const { initializePostHog } = await loadAnalytics(); + + initializePostHog("0190abcd-1234-7890-8abc-def012345678"); + + expect(mockPosthog.init).toHaveBeenCalledWith( + "test-key", + expect.objectContaining({ + bootstrap: { sessionID: "0190abcd-1234-7890-8abc-def012345678" }, + session_idle_timeout_seconds: 36_000, + }), + ); + }); + + it("omits bootstrap when no session id is provided", async () => { + const { initializePostHog } = await loadAnalytics(); + + initializePostHog(); + + expect(mockPosthog.init).toHaveBeenCalledWith( + "test-key", + expect.not.objectContaining({ bootstrap: expect.anything() }), + ); + }); }); diff --git a/apps/code/src/renderer/utils/analytics.ts b/apps/code/src/renderer/utils/analytics.ts index 8e70e70ef5..af03405ba0 100644 --- a/apps/code/src/renderer/utils/analytics.ts +++ b/apps/code/src/renderer/utils/analytics.ts @@ -36,6 +36,11 @@ type PendingFlagListener = { // Subscribers added before initializePostHog runs. const pendingFlagListeners = new Set(); +// 10 h, the posthog-js maximum. The app sits idle for hours during background +// tasks; a shorter timeout would rotate the session id and break the +// main-owned crash->replay link. +const SESSION_IDLE_TIMEOUT_SECONDS = 36_000; + /** * @param sessionId Main-owned session id (UUIDv7) to bootstrap posthog-js with, * so the recording shares the id main stamps on crash events. Main owns it so @@ -56,11 +61,9 @@ export function initializePostHog(sessionId?: string) { api_host: apiHost, ui_host: uiHost, disable_session_recording: false, - // Hold the session open through long idle (max posthog-js allows) so its - // own rotation doesn't replace main's bootstrapped id mid-run. This app - // sits idle for hours with background tasks, which is exactly when a - // shorter timeout would silently rotate and break crash->replay linking. - session_idle_timeout_seconds: 36000, + // Hold the session open through long idle so posthog-js's own rotation + // doesn't replace main's bootstrapped id mid-run. + session_idle_timeout_seconds: SESSION_IDLE_TIMEOUT_SECONDS, ...(sessionId ? { bootstrap: { sessionID: sessionId } } : {}), capture_exceptions: import.meta.env.DEV ? false From 4597b7772d4e8b3651cce15cd531eee7503c653e Mon Sep 17 00:00:00 2001 From: Charles Vien Date: Mon, 8 Jun 2026 08:13:19 -0700 Subject: [PATCH 4/4] remove verbose telemetry comments --- apps/code/src/main/index.ts | 5 ----- .../src/main/services/posthog-analytics.ts | 17 ---------------- apps/code/src/main/trpc/routers/analytics.ts | 5 ----- apps/code/src/main/utils/crash-diagnostics.ts | 20 ------------------- apps/code/src/main/utils/uuidv7.test.ts | 2 -- apps/code/src/main/utils/uuidv7.ts | 10 ---------- apps/code/src/main/window.ts | 2 -- apps/code/src/renderer/App.tsx | 4 ---- apps/code/src/renderer/utils/analytics.ts | 10 ---------- 9 files changed, 75 deletions(-) diff --git a/apps/code/src/main/index.ts b/apps/code/src/main/index.ts index 038796addd..ccc657de28 100644 --- a/apps/code/src/main/index.ts +++ b/apps/code/src/main/index.ts @@ -75,9 +75,6 @@ function isCrashLoop(): boolean { return recentCrashTimestamps.length >= CRASH_LOOP_THRESHOLD; } -// Shared diagnostics attached to every crash event: uptime, the native chromium -// log tail, and flattened memory. A hard OOM kills the renderer before it can -// log, so the memory snapshot is often the only signal of what happened. function crashDiagnostics() { return { appUptimeSeconds: Math.round(process.uptime()), @@ -100,8 +97,6 @@ app.on("render-process-gone", (_event, webContents, details) => { log.error("Renderer process gone", props); captureException(new Error(`Renderer process gone: ${details.reason}`), { ...props, - // Stack is always this handler, so default grouping collapses every - // renderer death into one issue. Split by reason instead. $exception_fingerprint: ["render-process-gone", details.reason], }); getPostHogClient() diff --git a/apps/code/src/main/services/posthog-analytics.ts b/apps/code/src/main/services/posthog-analytics.ts index 43d5d8ce17..04868ac94d 100644 --- a/apps/code/src/main/services/posthog-analytics.ts +++ b/apps/code/src/main/services/posthog-analytics.ts @@ -23,9 +23,6 @@ export function initializePostHog() { enableExceptionAutocapture: true, }); - // Mint the main-owned session id now, before the first window, so crash - // handlers can stamp $session_id even when the renderer crashes during - // startup (before it fetches the id to bootstrap posthog-js). getOrCreateSessionId(); return posthogClient; @@ -39,18 +36,6 @@ export function getCurrentUserId() { return currentUserId; } -/** - * The PostHog session id is OWNED BY MAIN. Main mints one UUIDv7 and every - * renderer window bootstraps posthog-js with it (`bootstrap.sessionID`). - * Because main outlives the renderer, the id stays stable across a renderer - * crash + reload, so the replay is one continuous session spanning the crash - * and main-captured crash events (the renderer can't report its own OOM) - * always carry the right `$session_id` with no race or hand-off. - * - * Minted lazily on first request (a window asks at boot, before posthog-js - * init) so its UUIDv7 timestamp precedes the session's first event, as - * posthog-js requires. - */ export function getOrCreateSessionId(): string { if (!sessionId) { sessionId = uuidv7(); @@ -123,8 +108,6 @@ export function captureException( posthogClient.captureException(error, distinctId, { team: "posthog-code", ...additionalProperties, - // System-owned fields last so callers can't overwrite them: main owns the - // session id used for crash->replay linking. ...(sessionId ? { $session_id: sessionId } : {}), app_version: getAppVersion(), }); diff --git a/apps/code/src/main/trpc/routers/analytics.ts b/apps/code/src/main/trpc/routers/analytics.ts index 74abc478d8..7b880777c1 100644 --- a/apps/code/src/main/trpc/routers/analytics.ts +++ b/apps/code/src/main/trpc/routers/analytics.ts @@ -30,11 +30,6 @@ export const analyticsRouter = router({ } }), - /** - * Return the main-owned session id for a window to bootstrap posthog-js with - * (`bootstrap.sessionID`). Main owns it so crash events captured from main - * link to the right replay, and the id survives renderer crash+reload. - */ getSessionId: publicProcedure .output(z.object({ sessionId: z.string() })) .query(() => ({ sessionId: getOrCreateSessionId() })), diff --git a/apps/code/src/main/utils/crash-diagnostics.ts b/apps/code/src/main/utils/crash-diagnostics.ts index ef13b7034a..7abf6c6495 100644 --- a/apps/code/src/main/utils/crash-diagnostics.ts +++ b/apps/code/src/main/utils/crash-diagnostics.ts @@ -5,21 +5,6 @@ export interface MemorySnapshot { byType: Record; } -/** - * Summarize per-process memory (from `app.getAppMetrics()`, passed in by the - * caller so this stays free of a direct `electron` import) for crash - * diagnostics. Working-set sizes are in KB. Attached to renderer/child crash - * events so PostHog Error Tracking can show whether the app was under memory - * pressure: a hard OOM kills the renderer before it can log anything, so the - * chromium log usually goes silent and this is the only reliable signal. - * - * Defensive on purpose: a throw here would run before the crash handler's - * auto-recovery reload, so failures return `undefined` instead. - * - * Caveat: at `render-process-gone` time the dead renderer is already gone from - * the metrics, so the `Tab` total understates the renderer's real peak. The - * `unresponsive` sample (renderer still alive) is the more telling one. - */ export function collectMemorySnapshot( getMetrics: () => Electron.ProcessMetric[], ): MemorySnapshot | undefined { @@ -48,11 +33,6 @@ export function collectMemorySnapshot( } } -/** - * Flatten a snapshot into scalar event properties for PostHog (which doesn't - * accept nested objects, so `byType` is serialized). Returns `{}` when no - * snapshot was collected, so it spreads cleanly into a crash event's props. - */ export function flattenMemorySnapshot( memory: MemorySnapshot | undefined, ): Record { diff --git a/apps/code/src/main/utils/uuidv7.test.ts b/apps/code/src/main/utils/uuidv7.test.ts index 5dadfa8417..a2516d6866 100644 --- a/apps/code/src/main/utils/uuidv7.test.ts +++ b/apps/code/src/main/utils/uuidv7.test.ts @@ -16,7 +16,6 @@ describe("uuidv7", () => { const id = uuidv7(); const after = Date.now(); - // First 48 bits (first 12 hex chars, minus the dash) are the unix-ms stamp. const stampMs = Number.parseInt(id.slice(0, 8) + id.slice(9, 13), 16); expect(stampMs).toBeGreaterThanOrEqual(before); expect(stampMs).toBeLessThanOrEqual(after); @@ -31,7 +30,6 @@ describe("uuidv7", () => { vi.spyOn(Date, "now").mockReturnValue(0x0123456789ab); try { const id = uuidv7(); - // bytes 0-5 of 0x0123456789ab -> "01234567" then "89ab" expect(id.slice(0, 8)).toBe("01234567"); expect(id.slice(9, 13)).toBe("89ab"); } finally { diff --git a/apps/code/src/main/utils/uuidv7.ts b/apps/code/src/main/utils/uuidv7.ts index 4983ca679f..de256136f4 100644 --- a/apps/code/src/main/utils/uuidv7.ts +++ b/apps/code/src/main/utils/uuidv7.ts @@ -1,15 +1,5 @@ import { randomBytes } from "node:crypto"; -/** - * Generate a UUIDv7 (time-ordered, RFC 9562). posthog-js requires this exact - * format for `bootstrap.sessionID`: a valid v7 whose 48-bit timestamp precedes - * the session's first event. Main mints it before any window starts posthog-js, - * so the ordering holds. - * - * Layout: 48-bit big-endian unix-ms timestamp, 4-bit version (7), 2-bit variant - * (10), 74 random bits. Hand-rolled to avoid a phantom dependency on `uuid` - * (transitive only, and several major versions resolve in the tree). - */ export function uuidv7(): string { const bytes = randomBytes(16); const timestamp = Date.now(); diff --git a/apps/code/src/main/window.ts b/apps/code/src/main/window.ts index aa44263a46..cdc01f2f70 100644 --- a/apps/code/src/main/window.ts +++ b/apps/code/src/main/window.ts @@ -116,8 +116,6 @@ function setupCrashLogging(window: BrowserWindow): void { }); }); - // Unresponsive often precedes an OOM kill, and here the renderer is still - // alive, so this memory sample reflects its real (bloated) footprint. window.on("unresponsive", () => { log.warn("Window unresponsive", { url: window.webContents.getURL(), diff --git a/apps/code/src/renderer/App.tsx b/apps/code/src/renderer/App.tsx index 16473c38d7..ede0031e0f 100644 --- a/apps/code/src/renderer/App.tsx +++ b/apps/code/src/renderer/App.tsx @@ -53,10 +53,6 @@ function App() { // Initialize PostHog analytics and register the app version super property. useEffect(() => { - // Fetch the main-owned session id BEFORE initializing posthog-js so the - // recording shares the id main stamps on crash events. Init is gated on it - // so the id is set before the first event (posthog-js requires the - // bootstrap id's timestamp to precede the session's first event). void (async () => { let sessionId: string | undefined; try { diff --git a/apps/code/src/renderer/utils/analytics.ts b/apps/code/src/renderer/utils/analytics.ts index af03405ba0..251c9e0071 100644 --- a/apps/code/src/renderer/utils/analytics.ts +++ b/apps/code/src/renderer/utils/analytics.ts @@ -36,16 +36,8 @@ type PendingFlagListener = { // Subscribers added before initializePostHog runs. const pendingFlagListeners = new Set(); -// 10 h, the posthog-js maximum. The app sits idle for hours during background -// tasks; a shorter timeout would rotate the session id and break the -// main-owned crash->replay link. const SESSION_IDLE_TIMEOUT_SECONDS = 36_000; -/** - * @param sessionId Main-owned session id (UUIDv7) to bootstrap posthog-js with, - * so the recording shares the id main stamps on crash events. Main owns it so - * it survives a renderer crash+reload as one continuous session. - */ export function initializePostHog(sessionId?: string) { const apiKey = import.meta.env.VITE_POSTHOG_API_KEY; const apiHost = @@ -61,8 +53,6 @@ export function initializePostHog(sessionId?: string) { api_host: apiHost, ui_host: uiHost, disable_session_recording: false, - // Hold the session open through long idle so posthog-js's own rotation - // doesn't replace main's bootstrapped id mid-run. session_idle_timeout_seconds: SESSION_IDLE_TIMEOUT_SECONDS, ...(sessionId ? { bootstrap: { sessionID: sessionId } } : {}), capture_exceptions: import.meta.env.DEV