update otel

Sg312 · Sg312 · commit a54470e3cd8b · 2026-04-16T18:16:27.000-07:00
diff --git a/apps/sim/instrumentation-node.ts b/apps/sim/instrumentation-node.ts
@@ -7,12 +7,21 @@
  * OTel `service.name = "mothership"` so every request shows up as one
  * service in the OTLP backend. To keep the two halves distinguishable:
  *
- *   - Every span emitted by this process is prefixed with `sim: ` on
- *     start, and gets a `mothership.origin = "sim"` attribute.
- *   - The Go side does the same with `go: ` / `mothership.origin = "go"`.
+ *   - Every span emitted by the mothership lifecycle on this process is
+ *     prefixed with `sim-mothership: ` on start, and gets a
+ *     `mothership.origin = "sim-mothership"` attribute.
+ *   - The Go side does the same with `go-mothership: ` /
+ *     `mothership.origin = "go-mothership"`.
  *
- * So in Jaeger/Tempo, filtering by `mothership.origin` (exact) or by
- * operation name prefix (`sim:` / `go:`) cleanly splits the two halves.
+ * The `-mothership` suffix on the origin is deliberate: this Sim process
+ * hosts plenty of non-mothership code (workflow executor, block runtime,
+ * indexer clients) that may emit its own traces in the future. Making
+ * the origin value explicit means a later "sim" origin can't collide
+ * with the mothership side.
+ *
+ * So in any OTLP backend, filter by `mothership.origin` (exact) or by
+ * operation name prefix (`sim-mothership:` / `go-mothership:`) to
+ * cleanly split the two halves.
  */
 
 import type { Attributes, Context, Link, SpanKind } from '@opentelemetry/api'
@@ -31,9 +40,18 @@ diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.ERROR)
 
 const logger = createLogger('OTelInstrumentation')
 
-const MOTHERSHIP_ORIGIN = 'sim' as const
+// Origin value lives on every mothership span as `mothership.origin`.
+// Longer form intentionally used (vs. plain "sim") so non-mothership
+// code running in this same Sim process can't collide if it later
+// starts emitting its own traces.
+const MOTHERSHIP_ORIGIN = 'sim-mothership' as const
 const SPAN_NAME_PREFIX = `${MOTHERSHIP_ORIGIN}: `
 
+// Short slug used only for `service.instance.id`. Kept as plain "sim"
+// so the instance id reads as `mothership-sim` — concise, already
+// scoped by `service.name = "mothership"` as the container.
+const SERVICE_INSTANCE_SLUG = 'sim' as const
+
 const DEFAULT_TELEMETRY_CONFIG = {
   endpoint: env.TELEMETRY_ENDPOINT || 'https://telemetry.simstudio.ai/v1/traces',
   // Joint Sim+Go service surface in Jaeger/Tempo. See header comment.
@@ -147,18 +165,29 @@ function resolveSamplingRatio(isLocalEndpoint: boolean): number {
 }
 
 /**
- * MothershipOriginSpanProcessor tags every span this process creates with
- * `mothership.origin` and prepends a `sim: ` prefix to the span name on
- * start, before any downstream processor (BatchSpanProcessor) reads it.
+ * MothershipOriginSpanProcessor tags mothership-lifecycle spans with
+ * `mothership.origin` and prepends the origin prefix to the span name
+ * on start, before any downstream processor (BatchSpanProcessor)
+ * reads it.
  *
- * Implemented as its own processor rather than a resource attribute so
- * the backend span/operation list (which keys on span name) is visually
- * split between sim and go even when both share service.name.
+ * Gated on `isBusinessSpan(name)` so only spans that already match
+ * the mothership allowlist get the label. The sampler drops
+ * non-mothership roots anyway, but keeping the tagger conditional
+ * means that if the sampler is ever relaxed (or a different
+ * instrumentation stream is added alongside mothership), unrelated
+ * spans won't accidentally inherit the mothership origin.
+ *
+ * Implemented as its own processor rather than a resource attribute
+ * so the backend span/operation list (which keys on span name) is
+ * visually split between sim and go even when both share service.name.
  */
 class MothershipOriginSpanProcessor implements SpanProcessor {
   onStart(span: Span): void {
-    span.setAttribute('mothership.origin', MOTHERSHIP_ORIGIN)
     const name = span.name
+    if (!isBusinessSpan(name)) {
+      return
+    }
+    span.setAttribute('mothership.origin', MOTHERSHIP_ORIGIN)
     if (!name.startsWith(SPAN_NAME_PREFIX)) {
       span.updateName(`${SPAN_NAME_PREFIX}${name}`)
     }
@@ -326,10 +355,12 @@ async function initializeOpenTelemetry() {
     // multi-second cross-machine clock drift within one group, and its
     // adjuster emits spurious "parent is not in the trace; skipping
     // clock skew adjustment" warnings on every cross-process child.
-    // Stable per-origin instance ID (`mothership-sim` / `mothership-go`)
-    // is enough to split the groups cleanly; Jaeger still shows both
-    // under the single `mothership` service in its service picker.
-    const serviceInstanceId = `${telemetryConfig.serviceName}-${MOTHERSHIP_ORIGIN}`
+    // Using the short slug (`sim` / `go`) keeps the instance id as
+    // `mothership-sim` / `mothership-go` — already scoped by
+    // `service.name = "mothership"` as the container. The longer
+    // `mothership.origin = "sim-mothership"` value does the
+    // disambiguation at the attribute level.
+    const serviceInstanceId = `${telemetryConfig.serviceName}-${SERVICE_INSTANCE_SLUG}`
     const resource = defaultResource().merge(
       resourceFromAttributes({
         [ATTR_SERVICE_NAME]: telemetryConfig.serviceName,
diff --git a/apps/sim/lib/copilot/chat/post.ts b/apps/sim/lib/copilot/chat/post.ts
@@ -374,10 +374,34 @@ function buildOnComplete(params: {
   requestId: string
   workspaceId?: string
   notifyWorkspaceStatus: boolean
+  /**
+   * Root agent span for this request. When present, the final
+   * assistant message + invoked tool calls are recorded as
+   * `gen_ai.output.messages` on it before persistence runs. Keeps
+   * the Honeycomb Gen AI view complete across both the Sim root
+   * span and the Go-side `llm.stream` spans.
+   */
+  otelRoot?: {
+    setOutputMessages: (output: {
+      assistantText?: string
+      toolCalls?: Array<{ id: string; name: string; arguments?: Record<string, unknown> }>
+    }) => void
+  }
 }) {
-  const { chatId, userMessageId, requestId, workspaceId, notifyWorkspaceStatus } = params
+  const { chatId, userMessageId, requestId, workspaceId, notifyWorkspaceStatus, otelRoot } = params
 
   return async (result: OrchestratorResult) => {
+    if (otelRoot && result.success) {
+      otelRoot.setOutputMessages({
+        assistantText: result.content,
+        toolCalls: result.toolCalls?.map((tc) => ({
+          id: tc.id,
+          name: tc.name,
+          arguments: tc.params,
+        })),
+      })
+    }
+
     if (!chatId) return
 
     try {
@@ -601,6 +625,11 @@ export async function handleUnifiedChatPost(req: NextRequest) {
       runId,
       transport: 'stream',
     })
+    // Emit `gen_ai.input.messages` on the root agent span for OTel
+    // GenAI spec compliance (Honeycomb's Gen AI view keys off this).
+    // Gated on OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT
+    // internally — safe to always call.
+    otelRoot.setInputMessages({ userMessage: body.message })
 
     // Wrap the rest of the handler so every nested withCopilotSpan /
     // withDbSpan (persistUserMessage, createRunSegment, resolveBranch DB
@@ -799,6 +828,7 @@ export async function handleUnifiedChatPost(req: NextRequest) {
             requestId: tracker.requestId,
             workspaceId,
             notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
+            otelRoot,
           }),
           onError: buildOnError({
             chatId: actualChatId,
diff --git a/apps/sim/lib/copilot/request/otel.ts b/apps/sim/lib/copilot/request/otel.ts
@@ -14,6 +14,136 @@ import type { RequestTraceV1Outcome } from '@/lib/copilot/generated/request-trac
 import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
 import { contextFromRequestHeaders } from '@/lib/copilot/request/go/propagation'
 
+/**
+ * OTel GenAI experimental semantic conventions env var. When set to a
+ * truthy value, each `gen_ai.*` span carries the full input and
+ * output conversation content as attributes. Mirrors the Go-side
+ * gate in `copilot/internal/providers/telemetry.go` so operators
+ * control both halves with one variable.
+ *
+ * Spec: https://opentelemetry.io/docs/specs/semconv/gen-ai/
+ */
+const GENAI_CAPTURE_ENV = 'OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT'
+
+/**
+ * Attribute-size cap for `gen_ai.{input,output}.messages`. Most OTLP
+ * backends reject attributes larger than ~64 KiB, so we truncate
+ * proactively to keep the rest of the span alive if a conversation
+ * runs long. Matches the Go-side cap to keep truncation behavior
+ * symmetrical between the two halves.
+ */
+const GENAI_MESSAGE_ATTR_MAX_BYTES = 60 * 1024
+
+function isGenAIMessageCaptureEnabled(): boolean {
+  const raw = (process.env[GENAI_CAPTURE_ENV] || '').toLowerCase().trim()
+  return raw === 'true' || raw === '1' || raw === 'yes'
+}
+
+/**
+ * Canonical OTel GenAI message shape used for both input and output
+ * attributes. Kept minimal — only the three part types we actually
+ * emit: `text`, `tool_call`, and `tool_call_response`. Adding more
+ * part types is cheap, but every additional shape here has to be
+ * mirrored in the Go serializer.
+ */
+interface GenAIAgentPart {
+  type: 'text' | 'tool_call' | 'tool_call_response'
+  content?: string
+  id?: string
+  name?: string
+  arguments?: Record<string, unknown>
+  response?: string
+}
+
+interface GenAIAgentMessage {
+  role: 'system' | 'user' | 'assistant' | 'tool'
+  parts: GenAIAgentPart[]
+}
+
+function marshalAgentMessages(messages: GenAIAgentMessage[]): string | undefined {
+  if (messages.length === 0) return undefined
+  const json = JSON.stringify(messages)
+  if (json.length <= GENAI_MESSAGE_ATTR_MAX_BYTES) return json
+  // Simple tail-preserving truncation: drop from the front until we
+  // fit. Matches the Go side's behavior. The last message is
+  // usually the most diagnostic for span-level outcome.
+  let remaining = messages.slice()
+  while (remaining.length > 1) {
+    remaining = remaining.slice(1)
+    const candidate = JSON.stringify(remaining)
+    if (candidate.length <= GENAI_MESSAGE_ATTR_MAX_BYTES) return candidate
+  }
+  // Single message still over cap — truncate the text part in place
+  // with a marker so the partial content is still readable.
+  const only = remaining[0]
+  for (const part of only.parts) {
+    if (part.type === 'text' && part.content) {
+      const headroom = GENAI_MESSAGE_ATTR_MAX_BYTES - 1024
+      if (part.content.length > headroom) {
+        part.content = `${part.content.slice(0, headroom)}\n\n[truncated: capture cap ${GENAI_MESSAGE_ATTR_MAX_BYTES} bytes]`
+      }
+    }
+  }
+  const final = JSON.stringify([only])
+  return final.length <= GENAI_MESSAGE_ATTR_MAX_BYTES ? final : undefined
+}
+
+export interface CopilotAgentInputMessages {
+  userMessage?: string
+  systemPrompt?: string
+}
+
+export interface CopilotAgentOutputMessages {
+  assistantText?: string
+  toolCalls?: Array<{
+    id: string
+    name: string
+    arguments?: Record<string, unknown>
+  }>
+}
+
+function setAgentInputMessages(span: Span, input: CopilotAgentInputMessages): void {
+  if (!isGenAIMessageCaptureEnabled()) return
+  const messages: GenAIAgentMessage[] = []
+  if (input.systemPrompt) {
+    messages.push({
+      role: 'system',
+      parts: [{ type: 'text', content: input.systemPrompt }],
+    })
+  }
+  if (input.userMessage) {
+    messages.push({
+      role: 'user',
+      parts: [{ type: 'text', content: input.userMessage }],
+    })
+  }
+  const serialized = marshalAgentMessages(messages)
+  if (serialized) {
+    span.setAttribute('gen_ai.input.messages', serialized)
+  }
+}
+
+function setAgentOutputMessages(span: Span, output: CopilotAgentOutputMessages): void {
+  if (!isGenAIMessageCaptureEnabled()) return
+  const parts: GenAIAgentPart[] = []
+  if (output.assistantText) {
+    parts.push({ type: 'text', content: output.assistantText })
+  }
+  for (const tc of output.toolCalls ?? []) {
+    parts.push({
+      type: 'tool_call',
+      id: tc.id,
+      name: tc.name,
+      ...(tc.arguments ? { arguments: tc.arguments } : {}),
+    })
+  }
+  if (parts.length === 0) return
+  const serialized = marshalAgentMessages([{ role: 'assistant', parts }])
+  if (serialized) {
+    span.setAttribute('gen_ai.output.messages', serialized)
+  }
+}
+
 /**
  * Reuse the generated RequestTraceV1Outcome string values for every
  * lifecycle outcome field. This keeps our OTel attributes, internal
@@ -262,6 +392,20 @@ export interface CopilotOtelRoot {
   span: Span
   context: Context
   finish: (outcome?: CopilotLifecycleOutcome, error?: unknown) => void
+  /**
+   * Record `gen_ai.input.messages` on the root agent span. Gated on
+   * `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` — no-op when
+   * capture is disabled. Safe to call multiple times; the latest
+   * call wins.
+   */
+  setInputMessages: (input: CopilotAgentInputMessages) => void
+  /**
+   * Record `gen_ai.output.messages` on the root agent span. Gated on
+   * the same env var as `setInputMessages`. Typically called from the
+   * stream finalize callback once the assistant's final content and
+   * invoked tool calls are known.
+   */
+  setOutputMessages: (output: CopilotAgentOutputMessages) => void
 }
 
 export function startCopilotOtelRoot(scope: CopilotOtelScope): CopilotOtelRoot {
@@ -300,7 +444,13 @@ export function startCopilotOtelRoot(scope: CopilotOtelScope): CopilotOtelRoot {
     span.end()
   }
 
-  return { span, context: rootContext, finish }
+  return {
+    span,
+    context: rootContext,
+    finish,
+    setInputMessages: (input) => setAgentInputMessages(span, input),
+    setOutputMessages: (output) => setAgentOutputMessages(span, output),
+  }
 }
 
 export async function withCopilotOtelContext<T>(