diff --git a/lib/DBSQLClient.ts b/lib/DBSQLClient.ts index ee53e790..d0430284 100644 --- a/lib/DBSQLClient.ts +++ b/lib/DBSQLClient.ts @@ -1,5 +1,6 @@ import thrift from 'thrift'; import Int64 from 'node-int64'; +import os from 'os'; import { EventEmitter } from 'events'; import TCLIService from '../thrift/TCLIService'; @@ -31,6 +32,14 @@ import IDBSQLLogger, { LogLevel } from './contracts/IDBSQLLogger'; import DBSQLLogger from './DBSQLLogger'; import CloseableCollection from './utils/CloseableCollection'; import IConnectionProvider from './connection/contracts/IConnectionProvider'; +import FeatureFlagCache from './telemetry/FeatureFlagCache'; +import TelemetryClientProvider from './telemetry/TelemetryClientProvider'; +import TelemetryEventEmitter from './telemetry/TelemetryEventEmitter'; +import MetricsAggregator from './telemetry/MetricsAggregator'; +import DatabricksTelemetryExporter from './telemetry/DatabricksTelemetryExporter'; +import { CircuitBreakerRegistry } from './telemetry/CircuitBreaker'; +import { DriverConfiguration, DRIVER_NAME } from './telemetry/types'; +import driverVersion from './version'; function prependSlash(str: string): string { if (str.length > 0 && str.charAt(0) !== '/') { @@ -75,6 +84,23 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I private readonly sessions = new CloseableCollection(); + // Telemetry components (instance-based, NOT singletons) + private host?: string; + + private httpPath?: string; + + private authType?: string; + + private featureFlagCache?: FeatureFlagCache; + + private telemetryClientProvider?: TelemetryClientProvider; + + private telemetryEmitter?: TelemetryEventEmitter; + + private telemetryAggregator?: MetricsAggregator; + + private circuitBreakerRegistry?: CircuitBreakerRegistry; + private static getDefaultLogger(): IDBSQLLogger { if (!this.defaultLogger) { this.defaultLogger = new DBSQLLogger(); @@ -101,6 +127,15 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I cloudFetchSpeedThresholdMBps: 0.1, useLZ4Compression: true, + + // Telemetry defaults + telemetryEnabled: true, // Enabled by default, gated by feature flag + telemetryBatchSize: 100, + telemetryFlushIntervalMs: 5000, + telemetryMaxRetries: 3, + telemetryAuthenticatedExport: true, + telemetryCircuitBreakerThreshold: 5, + telemetryCircuitBreakerTimeout: 60000, // 1 minute }; } @@ -212,6 +247,214 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I return new HttpConnection(this.getConnectionOptions(options), this); } + /** + * Extract workspace ID from hostname. + * @param host - The host string (e.g., "workspace-id.cloud.databricks.com") + * @returns Workspace ID or host if extraction fails + */ + private extractWorkspaceId(host: string): string { + // Extract workspace ID from hostname (first segment before first dot) + const parts = host.split('.'); + return parts.length > 0 ? parts[0] : host; + } + + /** + * Build driver configuration for telemetry reporting. + * @returns DriverConfiguration object with current driver settings + */ + private buildDriverConfiguration(): DriverConfiguration { + return { + driverVersion, + driverName: DRIVER_NAME, + nodeVersion: process.version, + platform: process.platform, + osVersion: os.release(), + osArch: os.arch(), + runtimeVendor: 'Node.js Foundation', + localeName: this.getLocaleName(), + charSetEncoding: 'UTF-8', + processName: this.getProcessName(), + authType: this.authType || 'pat', + + // Feature flags + cloudFetchEnabled: this.config.useCloudFetch ?? false, + lz4Enabled: this.config.useLZ4Compression ?? false, + arrowEnabled: this.config.arrowEnabled ?? false, + directResultsEnabled: true, // Direct results always enabled + + // Configuration values + socketTimeout: this.config.socketTimeout ?? 0, + retryMaxAttempts: this.config.retryMaxAttempts ?? 0, + cloudFetchConcurrentDownloads: this.config.cloudFetchConcurrentDownloads ?? 0, + + // Connection parameters + httpPath: this.httpPath, + enableMetricViewMetadata: this.config.enableMetricViewMetadata, + }; + } + + /** + * Map Node.js auth type to telemetry auth enum string. + * Distinguishes between U2M and M2M OAuth flows. + */ + private mapAuthType(options: ConnectionOptions): string { + if (options.authType === 'databricks-oauth') { + // Check if M2M (has client secret) or U2M (no client secret) + return options.oauthClientSecret === undefined + ? 'external-browser' // U2M OAuth (User-to-Machine) + : 'oauth-m2m'; // M2M OAuth (Machine-to-Machine) + } + + if (options.authType === 'custom') { + return 'custom'; // Custom auth provider + } + + // 'access-token' or undefined + return 'pat'; // Personal Access Token + } + + /** + * Get locale name in format language_country (e.g., en_US). + * Matches JDBC format: user.language + '_' + user.country + */ + private getLocaleName(): string { + try { + // Try to get from environment variables + const lang = process.env.LANG || process.env.LC_ALL || process.env.LC_MESSAGES || ''; + if (lang) { + // LANG format is typically "en_US.UTF-8", extract "en_US" + const match = lang.match(/^([a-z]{2}_[A-Z]{2})/); + if (match) { + return match[1]; + } + } + // Fallback to en_US + return 'en_US'; + } catch { + return 'en_US'; + } + } + + /** + * Get process name, similar to JDBC's ProcessNameUtil. + * Returns the script name or process title. + */ + private getProcessName(): string { + try { + // Try process.title first (can be set by application) + if (process.title && process.title !== 'node') { + return process.title; + } + // Try to get the main script name from argv[1] + if (process.argv && process.argv.length > 1) { + const scriptPath = process.argv[1]; + // Extract filename without path + const filename = scriptPath.split('/').pop()?.split('\\').pop() || ''; + // Remove extension + const nameWithoutExt = filename.replace(/\.[^.]*$/, ''); + if (nameWithoutExt) { + return nameWithoutExt; + } + } + return 'node'; + } catch { + return 'node'; + } + } + + /** + * Initialize telemetry components if enabled. + * CRITICAL: All errors swallowed and logged at LogLevel.debug ONLY. + * Driver NEVER throws exceptions due to telemetry. + */ + private async initializeTelemetry(): Promise { + if (!this.host) { + return; + } + + try { + // Create circuit breaker registry (shared by feature flags and telemetry) + this.circuitBreakerRegistry = new CircuitBreakerRegistry(this); + + // Create feature flag cache instance with circuit breaker protection + this.featureFlagCache = new FeatureFlagCache(this, this.circuitBreakerRegistry); + this.featureFlagCache.getOrCreateContext(this.host); + + // Check if telemetry enabled via feature flag + const enabled = await this.featureFlagCache.isTelemetryEnabled(this.host); + + if (!enabled) { + this.logger.log(LogLevel.debug, 'Telemetry: disabled'); + return; + } + + // Create telemetry components (all instance-based) + this.telemetryClientProvider = new TelemetryClientProvider(this); + this.telemetryEmitter = new TelemetryEventEmitter(this); + + // Get or create telemetry client for this host (increments refCount) + this.telemetryClientProvider.getOrCreateClient(this.host); + + // Create telemetry exporter with shared circuit breaker registry + const exporter = new DatabricksTelemetryExporter(this, this.host, this.circuitBreakerRegistry); + this.telemetryAggregator = new MetricsAggregator(this, exporter); + + // Wire up event listeners + this.telemetryEmitter.on('connection.open', (event) => { + try { + this.telemetryAggregator?.processEvent(event); + } catch (error: any) { + this.logger.log(LogLevel.debug, `Error processing connection.open event: ${error.message}`); + } + }); + + this.telemetryEmitter.on('connection.close', (event) => { + try { + this.telemetryAggregator?.processEvent(event); + } catch (error: any) { + this.logger.log(LogLevel.debug, `Error processing connection.close event: ${error.message}`); + } + }); + + this.telemetryEmitter.on('statement.start', (event) => { + try { + this.telemetryAggregator?.processEvent(event); + } catch (error: any) { + this.logger.log(LogLevel.debug, `Error processing statement.start event: ${error.message}`); + } + }); + + this.telemetryEmitter.on('statement.complete', (event) => { + try { + this.telemetryAggregator?.processEvent(event); + } catch (error: any) { + this.logger.log(LogLevel.debug, `Error processing statement.complete event: ${error.message}`); + } + }); + + this.telemetryEmitter.on('cloudfetch.chunk', (event) => { + try { + this.telemetryAggregator?.processEvent(event); + } catch (error: any) { + this.logger.log(LogLevel.debug, `Error processing cloudfetch.chunk event: ${error.message}`); + } + }); + + this.telemetryEmitter.on('error', (event) => { + try { + this.telemetryAggregator?.processEvent(event); + } catch (error: any) { + this.logger.log(LogLevel.debug, `Error processing error event: ${error.message}`); + } + }); + + this.logger.log(LogLevel.debug, 'Telemetry: enabled'); + } catch (error: any) { + // Swallow all telemetry initialization errors + this.logger.log(LogLevel.debug, `Telemetry initialization error: ${error.message}`); + } + } + /** * Connects DBSQLClient to endpoint * @public @@ -233,11 +476,27 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I } } + // Store connection params for telemetry + this.host = options.host; + this.httpPath = options.path; + this.authType = this.mapAuthType(options); + // Store enableMetricViewMetadata configuration if (options.enableMetricViewMetadata !== undefined) { this.config.enableMetricViewMetadata = options.enableMetricViewMetadata; } + // Override telemetry config if provided in options + if (options.telemetryEnabled !== undefined) { + this.config.telemetryEnabled = options.telemetryEnabled; + } + if (options.telemetryBatchSize !== undefined) { + this.config.telemetryBatchSize = options.telemetryBatchSize; + } + if (options.telemetryAuthenticatedExport !== undefined) { + this.config.telemetryAuthenticatedExport = options.telemetryAuthenticatedExport; + } + // Persist userAgentEntry so telemetry and feature-flag call sites reuse // the same value as the primary Thrift connection's User-Agent. if (options.userAgentEntry !== undefined) { @@ -277,6 +536,11 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I this.emit('timeout'); }); + // Initialize telemetry if enabled + if (this.config.telemetryEnabled) { + await this.initializeTelemetry(); + } + return this; } @@ -290,6 +554,9 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I * const session = await client.openSession(); */ public async openSession(request: OpenSessionRequest = {}): Promise { + // Track connection open latency + const startTime = Date.now(); + // Prepare session configuration const configuration = request.configuration ? { ...request.configuration } : {}; @@ -312,12 +579,54 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I serverProtocolVersion: response.serverProtocolVersion, }); this.sessions.add(session); + + // Emit connection.open telemetry event + if (this.telemetryEmitter && this.host) { + try { + const latencyMs = Date.now() - startTime; + const workspaceId = this.extractWorkspaceId(this.host); + const driverConfig = this.buildDriverConfiguration(); + this.telemetryEmitter.emitConnectionOpen({ + sessionId: session.id, + workspaceId, + driverConfig, + latencyMs, + }); + } catch (error: any) { + // CRITICAL: All telemetry exceptions swallowed + this.logger.log(LogLevel.debug, `Error emitting connection.open event: ${error.message}`); + } + } + return session; } public async close(): Promise { await this.sessions.closeAll(); + // Cleanup telemetry + if (this.host) { + try { + // Step 1: Close aggregator (stops timer, completes statements, final flush) + if (this.telemetryAggregator) { + this.telemetryAggregator.close(); + } + + // Step 2: Release telemetry client (decrements ref count, closes if last) + if (this.telemetryClientProvider) { + await this.telemetryClientProvider.releaseClient(this.host); + } + + // Step 3: Release feature flag context (decrements ref count) + if (this.featureFlagCache) { + this.featureFlagCache.releaseContext(this.host); + } + } catch (error: any) { + // Swallow all telemetry cleanup errors + this.logger.log(LogLevel.debug, `Telemetry cleanup error: ${error.message}`); + } + } + this.client = undefined; this.connectionProvider = undefined; this.authProvider = undefined; diff --git a/lib/DBSQLOperation.ts b/lib/DBSQLOperation.ts index fe22995d..7b72770c 100644 --- a/lib/DBSQLOperation.ts +++ b/lib/DBSQLOperation.ts @@ -34,11 +34,14 @@ import { definedOrError } from './utils'; import { OperationChunksIterator, OperationRowsIterator } from './utils/OperationIterator'; import HiveDriverError from './errors/HiveDriverError'; import IClientContext from './contracts/IClientContext'; +import ExceptionClassifier from './telemetry/ExceptionClassifier'; +import { mapOperationTypeToTelemetryType, mapResultFormatToTelemetryType } from './telemetry/telemetryTypeMappers'; interface DBSQLOperationConstructorOptions { handle: TOperationHandle; directResults?: TSparkDirectResults; context: IClientContext; + sessionId?: string; } async function delay(ms?: number): Promise { @@ -76,9 +79,17 @@ export default class DBSQLOperation implements IOperation { private resultHandler?: ResultSlicer; - constructor({ handle, directResults, context }: DBSQLOperationConstructorOptions) { + // Telemetry tracking fields + private startTime: number = Date.now(); + + private pollCount: number = 0; + + private sessionId?: string; + + constructor({ handle, directResults, context, sessionId }: DBSQLOperationConstructorOptions) { this.operationHandle = handle; this.context = context; + this.sessionId = sessionId; const useOnlyPrefetchedResults = Boolean(directResults?.closeOperation); @@ -95,6 +106,9 @@ export default class DBSQLOperation implements IOperation { ); this.closeOperation = directResults?.closeOperation; this.context.getLogger().log(LogLevel.debug, `Operation created with id: ${this.id}`); + + // Emit statement.start telemetry event + this.emitStatementStart(); } public iterateChunks(options?: IteratorOptions): IOperationChunksIterator { @@ -225,6 +239,9 @@ export default class DBSQLOperation implements IOperation { return this.operationStatus; } + // Track poll count for telemetry + this.pollCount += 1; + const driver = await this.context.getDriver(); const response = await driver.getOperationStatus({ operationHandle: this.operationHandle, @@ -279,6 +296,9 @@ export default class DBSQLOperation implements IOperation { this.closed = true; const result = new Status(response.status); + // Emit statement.complete telemetry event + await this.emitStatementComplete(); + this.onClose?.(); return result; } @@ -441,7 +461,7 @@ export default class DBSQLOperation implements IOperation { case TSparkRowSetType.URL_BASED_SET: resultSource = new ArrowResultConverter( this.context, - new CloudFetchResultHandler(this.context, this._data, metadata), + new CloudFetchResultHandler(this.context, this._data, metadata, this.id), metadata, ); break; @@ -481,4 +501,92 @@ export default class DBSQLOperation implements IOperation { return response; } + + /** + * Emit statement.start telemetry event. + * CRITICAL: All exceptions swallowed and logged at LogLevel.debug ONLY. + */ + private emitStatementStart(): void { + try { + const { telemetryEmitter } = this.context as any; + if (!telemetryEmitter) { + return; + } + + telemetryEmitter.emitStatementStart({ + statementId: this.id, + sessionId: this.sessionId || '', + operationType: mapOperationTypeToTelemetryType(this.operationHandle.operationType), + }); + } catch (error: any) { + this.context.getLogger().log(LogLevel.debug, `Error emitting statement.start event: ${error.message}`); + } + } + + /** + * Emit statement.complete telemetry event and complete aggregation. + * CRITICAL: All exceptions swallowed and logged at LogLevel.debug ONLY. + */ + private async emitStatementComplete(): Promise { + try { + const { telemetryEmitter } = this.context as any; + const { telemetryAggregator } = this.context as any; + if (!telemetryEmitter || !telemetryAggregator) { + return; + } + + // Fetch metadata if not already fetched to get result format + let resultFormat: string | undefined; + try { + if (!this.metadata && !this.cancelled) { + await this.getMetadata(); + } + resultFormat = mapResultFormatToTelemetryType(this.metadata?.resultFormat); + } catch (error) { + // If metadata fetch fails, continue without it + resultFormat = undefined; + } + + const latencyMs = Date.now() - this.startTime; + + telemetryEmitter.emitStatementComplete({ + statementId: this.id, + sessionId: this.sessionId || '', + latencyMs, + resultFormat, + pollCount: this.pollCount, + }); + + // Complete statement aggregation + telemetryAggregator.completeStatement(this.id); + } catch (error: any) { + this.context.getLogger().log(LogLevel.debug, `Error emitting statement.complete event: ${error.message}`); + } + } + + /** + * Emit error telemetry event with terminal classification. + * CRITICAL: All exceptions swallowed and logged at LogLevel.debug ONLY. + */ + private emitErrorEvent(error: Error): void { + try { + const { telemetryEmitter } = this.context as any; + if (!telemetryEmitter) { + return; + } + + // Classify the exception + const isTerminal = ExceptionClassifier.isTerminal(error); + + telemetryEmitter.emitError({ + statementId: this.id, + sessionId: this.sessionId, + errorName: error.name || 'Error', + errorMessage: error.message || 'Unknown error', + isTerminal, + }); + } catch (emitError: any) { + this.context.getLogger().log(LogLevel.debug, `Error emitting error event: ${emitError.message}`); + } + } } diff --git a/lib/DBSQLSession.ts b/lib/DBSQLSession.ts index 9b4245c3..04ec137b 100644 --- a/lib/DBSQLSession.ts +++ b/lib/DBSQLSession.ts @@ -151,6 +151,8 @@ export default class DBSQLSession implements IDBSQLSession { private isOpen = true; + private openTime: number; + private serverProtocolVersion?: TProtocolVersion; public onClose?: () => void; @@ -169,6 +171,7 @@ export default class DBSQLSession implements IDBSQLSession { constructor({ handle, context, serverProtocolVersion }: DBSQLSessionConstructorOptions) { this.sessionHandle = handle; this.context = context; + this.openTime = Date.now(); // Get the server protocol version from the provided parameter (from TOpenSessionResp) this.serverProtocolVersion = serverProtocolVersion; this.context.getLogger().log(LogLevel.debug, `Session created with id: ${this.id}`); @@ -594,6 +597,16 @@ export default class DBSQLSession implements IDBSQLSession { this.onClose?.(); this.isOpen = false; + // Emit connection close telemetry + const closeLatency = Date.now() - this.openTime; + const { telemetryEmitter } = this.context as any; + if (telemetryEmitter) { + telemetryEmitter.emitConnectionClose({ + sessionId: this.id, + latencyMs: closeLatency, + }); + } + this.context.getLogger().log(LogLevel.debug, `Session closed with id: ${this.id}`); return new Status(response.status); } @@ -605,6 +618,7 @@ export default class DBSQLSession implements IDBSQLSession { handle, directResults: response.directResults, context: this.context, + sessionId: this.id, }); this.operations.add(operation); diff --git a/lib/contracts/IClientContext.ts b/lib/contracts/IClientContext.ts index c7274a1b..4ab7498b 100644 --- a/lib/contracts/IClientContext.ts +++ b/lib/contracts/IClientContext.ts @@ -2,6 +2,7 @@ import IDBSQLLogger from './IDBSQLLogger'; import IDriver from './IDriver'; import IConnectionProvider from '../connection/contracts/IConnectionProvider'; import IThriftClient from './IThriftClient'; +import IAuthentication from '../connection/contracts/IAuthentication'; export interface ClientConfig { directResultsDefaultMaxRows: number; @@ -50,4 +51,6 @@ export default interface IClientContext { getClient(): Promise; getDriver(): Promise; + + getAuthProvider(): IAuthentication | undefined; } diff --git a/lib/contracts/IDBSQLClient.ts b/lib/contracts/IDBSQLClient.ts index 4b2f39a4..991eabc9 100644 --- a/lib/contracts/IDBSQLClient.ts +++ b/lib/contracts/IDBSQLClient.ts @@ -54,6 +54,10 @@ export type ConnectionOptions = { socketTimeout?: number; proxy?: ProxyOptions; enableMetricViewMetadata?: boolean; + // Optional telemetry overrides + telemetryEnabled?: boolean; + telemetryBatchSize?: number; + telemetryAuthenticatedExport?: boolean; } & AuthOptions; export interface OpenSessionRequest { diff --git a/lib/index.ts b/lib/index.ts index 81e3aaae..38460647 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -23,10 +23,7 @@ import { LogLevel } from './contracts/IDBSQLLogger'; // Re-export types for TypeScript users export type { default as ITokenProvider } from './connection/auth/tokenProvider/ITokenProvider'; -// Re-export telemetry error classes so consumers can instanceof-check rather -// than string-matching error messages. export { CircuitBreakerOpenError, CIRCUIT_BREAKER_OPEN_CODE } from './telemetry/CircuitBreaker'; -export { TelemetryTerminalError } from './telemetry/DatabricksTelemetryExporter'; export const auth = { PlainHttpAuthentication, diff --git a/lib/result/CloudFetchResultHandler.ts b/lib/result/CloudFetchResultHandler.ts index 91878813..6d28b317 100644 --- a/lib/result/CloudFetchResultHandler.ts +++ b/lib/result/CloudFetchResultHandler.ts @@ -14,18 +14,24 @@ export default class CloudFetchResultHandler implements IResultsProvider = []; private downloadTasks: Array> = []; + private chunkIndex: number = 0; + constructor( context: IClientContext, source: IResultsProvider, - { lz4Compressed }: TGetResultSetMetadataResp, + metadata: TGetResultSetMetadataResp, + statementId?: string, ) { this.context = context; this.source = source; - this.isLZ4Compressed = lz4Compressed ?? false; + this.isLZ4Compressed = metadata.lz4Compressed ?? false; + this.statementId = statementId; if (this.isLZ4Compressed && !LZ4()) { throw new HiveDriverError('Cannot handle LZ4 compressed result: module `lz4` not installed'); @@ -106,6 +112,10 @@ export default class CloudFetchResultHandler implements IResultsProvider { if (!metrics || metrics.length === 0) { return; @@ -142,76 +141,64 @@ export default class DatabricksTelemetryExporter { const logger = this.context.getLogger(); try { - await this.circuitBreaker.execute(() => this.exportWithRetry(metrics)); + await this.circuitBreaker.execute(async () => { + await this.exportWithRetry(metrics); + }); } catch (error: any) { - if (error instanceof CircuitBreakerOpenError) { + // CRITICAL: All exceptions swallowed and logged at debug level ONLY + if (error.message === 'Circuit breaker OPEN') { logger.log(LogLevel.debug, 'Circuit breaker OPEN - dropping telemetry'); - } else if (error instanceof AuthenticationError) { - logger.log(LogLevel.debug, `Telemetry export auth failure: ${error.message}`); - } else if (error instanceof TelemetryTerminalError) { - logger.log(LogLevel.debug, `Telemetry export refused: ${error.message}`); } else { - logger.log(LogLevel.debug, `Telemetry export error: ${error?.message ?? error}`); + logger.log(LogLevel.debug, `Telemetry export error: ${error.message}`); } } } /** - * Retry wrapper shaped after HttpRetryPolicy: retries only on errors - * classified as retryable by ExceptionClassifier, stops on terminal ones, - * surfaces the last error to the circuit breaker. - * - * `maxRetries` is the number of retries *after* the first attempt (i.e. - * attempts = maxRetries + 1), matching HttpRetryPolicy's semantics. + * Export metrics with retry logic for retryable errors. + * Implements exponential backoff with jitter. */ private async exportWithRetry(metrics: TelemetryMetric[]): Promise { const config = this.context.getConfig(); const logger = this.context.getLogger(); - - const rawMaxRetries = config.telemetryMaxRetries ?? DEFAULT_TELEMETRY_CONFIG.maxRetries; - const maxRetries = - Number.isFinite(rawMaxRetries) && rawMaxRetries >= 0 ? rawMaxRetries : DEFAULT_TELEMETRY_CONFIG.maxRetries; - const baseMs = config.telemetryBackoffBaseMs ?? DEFAULT_TELEMETRY_CONFIG.backoffBaseMs; - const maxMs = config.telemetryBackoffMaxMs ?? DEFAULT_TELEMETRY_CONFIG.backoffMaxMs; - const jitterMs = config.telemetryBackoffJitterMs ?? DEFAULT_TELEMETRY_CONFIG.backoffJitterMs; - - const totalAttempts = maxRetries + 1; + const maxRetries = config.telemetryMaxRetries ?? DEFAULT_TELEMETRY_CONFIG.maxRetries; let lastError: Error | null = null; /* eslint-disable no-await-in-loop */ - for (let attempt = 0; attempt < totalAttempts; attempt += 1) { + for (let attempt = 0; attempt <= maxRetries; attempt += 1) { try { await this.exportInternal(metrics); - return; + return; // Success } catch (error: any) { lastError = error; - if ( - error instanceof AuthenticationError || - error instanceof TelemetryTerminalError || - ExceptionClassifier.isTerminal(error) - ) { - throw error; + // Check if error is terminal (don't retry) + if (ExceptionClassifier.isTerminal(error)) { + logger.log(LogLevel.debug, `Terminal error - no retry: ${error.message}`); + throw error; // Terminal error, propagate to circuit breaker } + + // Check if error is retryable if (!ExceptionClassifier.isRetryable(error)) { - throw error; + logger.log(LogLevel.debug, `Non-retryable error: ${error.message}`); + throw error; // Not retryable, propagate to circuit breaker } - if (attempt >= totalAttempts - 1) { - throw error; + + // Last attempt reached + if (attempt >= maxRetries) { + logger.log(LogLevel.debug, `Max retries reached (${maxRetries}): ${error.message}`); + throw error; // Max retries exhausted, propagate to circuit breaker } - const base = Math.min(baseMs * 2 ** attempt, maxMs); - const jitter = Math.random() * jitterMs; - const delay = Math.min(base + jitter, maxMs); + // Calculate backoff with exponential + jitter (100ms - 1000ms) + const baseDelay = Math.min(100 * 2 ** attempt, 1000); + const jitter = Math.random() * 100; + const delay = baseDelay + jitter; - // Include the failing error so ops can see what's being retried, - // not just the cadence. logger.log( LogLevel.debug, - `Retrying telemetry export (attempt ${attempt + 1}/${totalAttempts}) after ${Math.round(delay)}ms: ${ - error?.statusCode ?? '' - } ${redactSensitive(error?.message ?? '')}`, + `Retrying telemetry export (attempt ${attempt + 1}/${maxRetries}) after ${Math.round(delay)}ms`, ); await this.sleep(delay); @@ -219,130 +206,122 @@ export default class DatabricksTelemetryExporter { } /* eslint-enable no-await-in-loop */ + // Should not reach here, but just in case if (lastError) { throw lastError; } } + /** + * Internal export implementation that makes the HTTP call. + */ private async exportInternal(metrics: TelemetryMetric[]): Promise { const config = this.context.getConfig(); - const logger = this.context.getLogger(); + // Determine endpoint based on authentication mode const authenticatedExport = config.telemetryAuthenticatedExport ?? DEFAULT_TELEMETRY_CONFIG.authenticatedExport; - const endpoint = buildTelemetryUrl(this.host, authenticatedExport ? '/telemetry-ext' : '/telemetry-unauth'); - if (!endpoint) { - // Malformed / deny-listed host — drop the batch rather than letting - // it target an attacker-controlled destination. - throw new TelemetryTerminalError('Refusing telemetry export: host failed validation'); - } + const endpoint = authenticatedExport + ? buildUrl(this.host, '/telemetry-ext') + : buildUrl(this.host, '/telemetry-unauth'); - const userAgent = authenticatedExport ? this.authenticatedUserAgent : this.unauthenticatedUserAgent; - let headers: Record = { - 'Content-Type': 'application/json', - 'User-Agent': userAgent, - }; + // Format payload - each log is JSON-stringified to match JDBC format + const telemetryLogs = metrics.map((m) => this.toTelemetryLog(m)); + const protoLogs = telemetryLogs.map((log) => JSON.stringify(log)); - if (authenticatedExport) { - headers = { ...headers, ...(await this.getAuthHeaders()) }; - if (!hasAuthorization(headers)) { - if (!this.authMissingWarned) { - this.authMissingWarned = true; - logger.log(LogLevel.warn, 'Telemetry: Authorization header missing — metrics will be dropped'); - } - throw new AuthenticationError('Telemetry export: missing Authorization header'); - } - } - - const protoLogs = metrics.map((m) => this.toTelemetryLog(m, authenticatedExport, userAgent)); - const body = JSON.stringify({ + const payload: DatabricksTelemetryPayload = { uploadTime: Date.now(), - items: [], - protoLogs: protoLogs.map((log) => JSON.stringify(log)), - }); + items: [], // Required but unused + protoLogs, + }; - logger.log( - LogLevel.debug, - `Exporting ${metrics.length} telemetry metrics to ${ - authenticatedExport ? 'authenticated' : 'unauthenticated' - } endpoint`, - ); + const authHeaders = authenticatedExport ? ((await this.context.getAuthProvider()?.authenticate()) ?? {}) : {}; - const response = await this.sendRequest(endpoint, { + // Get agent with proxy settings (same pattern as CloudFetchResultHandler and DBSQLSession) + const connectionProvider = await this.context.getConnectionProvider(); + const agent = await connectionProvider.getAgent(); + + // Make HTTP POST request with authentication and proxy support + const response: Response = await this.fetchFn(endpoint, { method: 'POST', - headers, - body, - timeout: 10000, + headers: { + ...authHeaders, + 'Content-Type': 'application/json', + 'User-Agent': this.userAgent, + }, + body: JSON.stringify(payload), + agent, // Include agent for proxy support }); if (!response.ok) { - await response.text().catch(() => {}); const error: any = new Error(`Telemetry export failed: ${response.status} ${response.statusText}`); error.statusCode = response.status; throw error; } - - await response.text().catch(() => {}); - // Successful round-trip re-arms the "auth missing" warn so operators see - // a fresh signal the next time auth breaks. - this.authMissingWarned = false; - logger.log(LogLevel.debug, `Successfully exported ${metrics.length} telemetry metrics`); } - private async getAuthHeaders(): Promise> { - if (!this.authProvider) { - return {}; + /** + * Map Operation.Type to Statement.Type for statement_type field. + * Operation.Type (EXECUTE_STATEMENT, LIST_CATALOGS, etc.) maps to Statement.Type (QUERY, METADATA, etc.) + */ + private mapOperationToStatementType(operationType?: string): string { + if (!operationType) { + return 'TYPE_UNSPECIFIED'; } - const logger = this.context.getLogger(); - try { - return normalizeHeaders(await this.authProvider.authenticate()); - } catch (error: any) { - logger.log(LogLevel.debug, `Telemetry: auth provider threw: ${error?.message ?? error}`); - return {}; + + // Metadata operations map to METADATA + if ( + operationType === 'LIST_TYPE_INFO' || + operationType === 'LIST_CATALOGS' || + operationType === 'LIST_SCHEMAS' || + operationType === 'LIST_TABLES' || + operationType === 'LIST_TABLE_TYPES' || + operationType === 'LIST_COLUMNS' || + operationType === 'LIST_FUNCTIONS' || + operationType === 'LIST_PRIMARY_KEYS' || + operationType === 'LIST_IMPORTED_KEYS' || + operationType === 'LIST_EXPORTED_KEYS' || + operationType === 'LIST_CROSS_REFERENCES' + ) { + return 'METADATA'; } - } - private async sendRequest(url: string, init: RequestInit): Promise { - const connectionProvider = await this.context.getConnectionProvider(); - const agent = await connectionProvider.getAgent(); - const retryPolicy = await connectionProvider.getRetryPolicy(); - const requestConfig: RequestInit = { agent, ...init }; - const result = await retryPolicy.invokeWithRetry(() => { - const request = new Request(url, requestConfig); - return fetch(request).then((response) => ({ request, response })); - }); - return result.response; + // EXECUTE_STATEMENT maps to QUERY + if (operationType === 'EXECUTE_STATEMENT') { + return 'QUERY'; + } + + // Default to TYPE_UNSPECIFIED + return 'TYPE_UNSPECIFIED'; } - private toTelemetryLog( - metric: TelemetryMetric, - authenticatedExport: boolean, - userAgent: string, - ): DatabricksTelemetryLog { - // Unauthenticated export must not ship correlation IDs, fingerprint - // data, or raw error detail — an on-path observer could otherwise link - // sessions → workspaces → user activity without any auth. - const includeCorrelation = authenticatedExport; + /** + * Convert TelemetryMetric to Databricks telemetry log format. + */ + private toTelemetryLog(metric: TelemetryMetric): DatabricksTelemetryLog { + // Filter out NIL UUID for statement ID + const statementId = + metric.statementId && metric.statementId !== '00000000-0000-0000-0000-000000000000' + ? metric.statementId + : undefined; const log: DatabricksTelemetryLog = { - workspace_id: includeCorrelation ? metric.workspaceId : undefined, - frontend_log_event_id: uuidv4(), + frontend_log_event_id: this.generateUUID(), context: { client_context: { timestamp_millis: metric.timestamp, - user_agent: userAgent, + user_agent: this.userAgent, }, }, entry: { sql_driver_log: { - session_id: includeCorrelation ? metric.sessionId : undefined, - sql_statement_id: includeCorrelation ? metric.statementId : undefined, + session_id: metric.sessionId, + sql_statement_id: statementId, }, }, }; - if (metric.metricType === 'connection' && metric.driverConfig && includeCorrelation) { - // system_configuration is a high-entropy client fingerprint (OS, arch, - // locale, process, runtime). Only ship on the authenticated path. + // Include system_configuration, driver_connection_params, and auth_type for ALL metrics (if available) + if (metric.driverConfig) { log.entry.sql_driver_log.system_configuration = { driver_version: metric.driverConfig.driverVersion, driver_name: metric.driverConfig.driverName, @@ -354,14 +333,63 @@ export default class DatabricksTelemetryExporter { os_arch: metric.driverConfig.osArch, locale_name: metric.driverConfig.localeName, char_set_encoding: metric.driverConfig.charSetEncoding, - process_name: sanitizeProcessName(metric.driverConfig.processName) || undefined, + process_name: metric.driverConfig.processName, }; + + // Include driver connection params (only if we have fields to include) + if ( + metric.driverConfig.httpPath || + metric.driverConfig.socketTimeout || + metric.driverConfig.enableMetricViewMetadata !== undefined + ) { + log.entry.sql_driver_log.driver_connection_params = { + ...(metric.driverConfig.httpPath && { http_path: metric.driverConfig.httpPath }), + ...(metric.driverConfig.socketTimeout && { socket_timeout: metric.driverConfig.socketTimeout }), + ...(metric.driverConfig.arrowEnabled !== undefined && { enable_arrow: metric.driverConfig.arrowEnabled }), + ...(metric.driverConfig.directResultsEnabled !== undefined && { + enable_direct_results: metric.driverConfig.directResultsEnabled, + }), + ...(metric.driverConfig.enableMetricViewMetadata !== undefined && { + enable_metric_view_metadata: metric.driverConfig.enableMetricViewMetadata, + }), + }; + } + + // Include auth type at top level + log.entry.sql_driver_log.auth_type = metric.driverConfig.authType; + } + + // Add metric-specific fields based on proto definition + if (metric.metricType === 'connection') { + // Include connection latency + if (metric.latencyMs !== undefined) { + log.entry.sql_driver_log.operation_latency_ms = metric.latencyMs; + } + + // Include operation type in operation_detail (CREATE_SESSION or DELETE_SESSION) + if (metric.operationType) { + log.entry.sql_driver_log.sql_operation = { + operation_detail: { + operation_type: metric.operationType, + }, + }; + } } else if (metric.metricType === 'statement') { log.entry.sql_driver_log.operation_latency_ms = metric.latencyMs; - if (metric.resultFormat || metric.chunkCount) { + // Only create sql_operation if we have any fields to include + if (metric.operationType || metric.compressed !== undefined || metric.resultFormat || metric.chunkCount) { log.entry.sql_driver_log.sql_operation = { - execution_result: metric.resultFormat, + // Map operationType to statement_type (Statement.Type enum) + statement_type: this.mapOperationToStatementType(metric.operationType), + ...(metric.compressed !== undefined && { is_compressed: metric.compressed }), + ...(metric.resultFormat && { execution_result: metric.resultFormat }), + // Include operation_type in operation_detail + ...(metric.operationType && { + operation_detail: { + operation_type: metric.operationType, + }, + }), }; if (metric.chunkCount && metric.chunkCount > 0) { @@ -372,18 +400,41 @@ export default class DatabricksTelemetryExporter { } } } else if (metric.metricType === 'error') { - const stackOrMessage = metric.errorStack ?? metric.errorMessage ?? ''; log.entry.sql_driver_log.error_info = { error_name: metric.errorName || 'UnknownError', - // Redact common secret shapes and cap length. On the unauth path we - // keep only the error class — no message body. - stack_trace: includeCorrelation ? redactSensitive(stackOrMessage) : '', + stack_trace: metric.errorMessage || '', }; } return log; } + /** + * Generate a UUID v4. + */ + private generateUUID(): string { + return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) => { + const r = (Math.random() * 16) | 0; + const v = c === 'x' ? r : (r & 0x3) | 0x8; + return v.toString(16); + }); + } + + /** + * Get driver version from package.json. + */ + private getDriverVersion(): string { + try { + // In production, this would read from package.json + return '1.0.0'; + } catch { + return 'unknown'; + } + } + + /** + * Sleep for the specified number of milliseconds. + */ private sleep(ms: number): Promise { return new Promise((resolve) => { setTimeout(resolve, ms); diff --git a/lib/telemetry/FeatureFlagCache.ts b/lib/telemetry/FeatureFlagCache.ts index 06bc59a9..48ef1183 100644 --- a/lib/telemetry/FeatureFlagCache.ts +++ b/lib/telemetry/FeatureFlagCache.ts @@ -14,54 +14,55 @@ * limitations under the License. */ -import fetch, { RequestInit, Response, Request } from 'node-fetch'; +import fetch from 'node-fetch'; import IClientContext from '../contracts/IClientContext'; import { LogLevel } from '../contracts/IDBSQLLogger'; -import IAuthentication from '../connection/contracts/IAuthentication'; -import { buildTelemetryUrl, normalizeHeaders } from './telemetryUtils'; -import buildUserAgentString from '../utils/buildUserAgentString'; import driverVersion from '../version'; +import { buildUrl } from './urlUtils'; +import { CircuitBreakerRegistry } from './CircuitBreaker'; +/** + * Context holding feature flag state for a specific host. + * Stores all feature flags from the server for extensibility. + */ export interface FeatureFlagContext { - telemetryEnabled?: boolean; + flags: Map; // All feature flags from server (extensible for future flags) lastFetched?: Date; refCount: number; - cacheDuration: number; + cacheDuration: number; // 15 minutes in ms } /** - * Per-host feature-flag cache used to gate telemetry emission. Responsibilities: - * - dedupe in-flight fetches (thundering-herd protection); - * - ref-count so context goes away when the last consumer closes; - * - clamp server-provided TTL into a safe band. - * - * Shares HTTP plumbing (agent, user agent) with DatabricksTelemetryExporter. - * Consumer wiring lands in a later PR in this stack (see PR description). + * Manages feature flag cache per host. + * Prevents rate limiting by caching feature flag responses. + * Instance-based, stored in DBSQLClient. */ export default class FeatureFlagCache { private contexts: Map; - private fetchPromises: Map> = new Map(); - - private readonly userAgent: string; - - private readonly CACHE_DURATION_MS = 15 * 60 * 1000; - - private readonly MIN_CACHE_DURATION_S = 60; - - private readonly MAX_CACHE_DURATION_S = 3600; + private readonly CACHE_DURATION_MS = 15 * 60 * 1000; // 15 minutes private readonly FEATURE_FLAG_NAME = 'databricks.partnerplatform.clientConfigsFeatureFlags.enableTelemetryForNodeJs'; - constructor(private context: IClientContext, private authProvider?: IAuthentication) { + private circuitBreakerRegistry: CircuitBreakerRegistry; + + constructor( + private context: IClientContext, + circuitBreakerRegistry?: CircuitBreakerRegistry, + ) { this.contexts = new Map(); - this.userAgent = buildUserAgentString(this.context.getConfig().userAgentEntry); + this.circuitBreakerRegistry = circuitBreakerRegistry || new CircuitBreakerRegistry(context); } + /** + * Gets or creates a feature flag context for the host. + * Increments reference count. + */ getOrCreateContext(host: string): FeatureFlagContext { let ctx = this.contexts.get(host); if (!ctx) { ctx = { + flags: new Map(), refCount: 0, cacheDuration: this.CACHE_DURATION_MS, }; @@ -71,18 +72,29 @@ export default class FeatureFlagCache { return ctx; } + /** + * Decrements reference count for the host. + * Removes context when ref count reaches zero. + */ releaseContext(host: string): void { const ctx = this.contexts.get(host); if (ctx) { ctx.refCount -= 1; if (ctx.refCount <= 0) { this.contexts.delete(host); - this.fetchPromises.delete(host); } } } - async isTelemetryEnabled(host: string): Promise { + /** + * Generic method to check if a feature flag is enabled. + * Uses cached value if available and not expired. + * + * @param host The host to check + * @param flagName The feature flag name to query + * @returns true if flag is enabled (value is "true"), false otherwise + */ + async isFeatureEnabled(host: string, flagName: string): Promise { const logger = this.context.getLogger(); const ctx = this.contexts.get(host); @@ -93,123 +105,116 @@ export default class FeatureFlagCache { const isExpired = !ctx.lastFetched || Date.now() - ctx.lastFetched.getTime() > ctx.cacheDuration; if (isExpired) { - if (!this.fetchPromises.has(host)) { - const fetchPromise = this.fetchFeatureFlag(host) - .then((enabled) => { - ctx.telemetryEnabled = enabled; - ctx.lastFetched = new Date(); - return enabled; - }) - .catch((error: any) => { - logger.log(LogLevel.debug, `Error fetching feature flag: ${error.message}`); - return ctx.telemetryEnabled ?? false; - }) - .finally(() => { - this.fetchPromises.delete(host); - }); - this.fetchPromises.set(host, fetchPromise); + try { + // Fetch all feature flags from server with circuit breaker protection + const circuitBreaker = this.circuitBreakerRegistry.getCircuitBreaker(host); + await circuitBreaker.execute(async () => { + await this.fetchFeatureFlags(host); + }); + ctx.lastFetched = new Date(); + } catch (error: any) { + // Log at debug level only, never propagate exceptions + // Circuit breaker OPEN or fetch failed - use cached values + if (error.message === 'Circuit breaker OPEN') { + logger.log(LogLevel.debug, 'Feature flags: Circuit breaker OPEN - using cached values'); + } else { + logger.log(LogLevel.debug, `Error fetching feature flags: ${error.message}`); + } } - - await this.fetchPromises.get(host); } - return ctx.telemetryEnabled ?? false; + // Get flag value and parse as boolean + const value = ctx.flags.get(flagName); + return value?.toLowerCase() === 'true'; } /** - * Strips the `-oss` suffix the feature-flag API does not accept. The server - * keys off the SemVer triplet only, so anything appended would 404. + * Convenience method to check if telemetry is enabled for the host. + * Uses cached value if available and not expired. */ - private getDriverVersion(): string { - return driverVersion.replace(/-oss$/, ''); + async isTelemetryEnabled(host: string): Promise { + return this.isFeatureEnabled(host, this.FEATURE_FLAG_NAME); } - private async fetchFeatureFlag(host: string): Promise { + /** + * Fetches all feature flags from server using connector-service API. + * Calls GET /api/2.0/connector-service/feature-flags/NODEJS/{version} + * Stores all flags in the context for extensibility. + * + * @param host The host to fetch feature flags for + */ + private async fetchFeatureFlags(host: string): Promise { const logger = this.context.getLogger(); try { - const endpoint = buildTelemetryUrl( - host, - `/api/2.0/connector-service/feature-flags/NODEJS/${this.getDriverVersion()}`, - ); - if (!endpoint) { - logger.log(LogLevel.debug, `Feature flag fetch skipped: invalid host ${host}`); - return false; - } + // Get driver version for endpoint + const version = this.getDriverVersion(); - const headers: Record = { - 'Content-Type': 'application/json', - 'User-Agent': this.userAgent, - ...(await this.getAuthHeaders()), - }; + // Build feature flags endpoint for Node.js driver + const endpoint = buildUrl(host, `/api/2.0/connector-service/feature-flags/NODEJS/${version}`); + + const authHeaders = (await this.context.getAuthProvider()?.authenticate()) ?? {}; logger.log(LogLevel.debug, `Fetching feature flags from ${endpoint}`); - const response = await this.fetchWithRetry(endpoint, { + // Get agent with proxy settings (same pattern as CloudFetchResultHandler and DBSQLSession) + const connectionProvider = await this.context.getConnectionProvider(); + const agent = await connectionProvider.getAgent(); + + // Make HTTP GET request with authentication and proxy support + const response = await fetch(endpoint, { method: 'GET', - headers, - timeout: 10000, + headers: { + ...authHeaders, + 'Content-Type': 'application/json', + 'User-Agent': `databricks-sql-nodejs/${driverVersion}`, + }, + agent, // Include agent for proxy support }); if (!response.ok) { - await response.text().catch(() => {}); - logger.log(LogLevel.debug, `Feature flag fetch failed: ${response.status} ${response.statusText}`); - return false; + logger.log(LogLevel.debug, `Feature flags fetch failed: ${response.status} ${response.statusText}`); + return; } + // Parse response JSON const data: any = await response.json(); + // Response format: { flags: [{ name: string, value: string }], ttl_seconds?: number } if (data && data.flags && Array.isArray(data.flags)) { const ctx = this.contexts.get(host); - if (ctx && typeof data.ttl_seconds === 'number' && data.ttl_seconds > 0) { - const clampedTtl = Math.max(this.MIN_CACHE_DURATION_S, Math.min(this.MAX_CACHE_DURATION_S, data.ttl_seconds)); - ctx.cacheDuration = clampedTtl * 1000; - logger.log(LogLevel.debug, `Updated cache duration to ${clampedTtl} seconds`); + if (!ctx) { + return; } - const flag = data.flags.find((f: any) => f.name === this.FEATURE_FLAG_NAME); - if (flag) { - const enabled = String(flag.value).toLowerCase() === 'true'; - logger.log(LogLevel.debug, `Feature flag ${this.FEATURE_FLAG_NAME}: ${enabled}`); - return enabled; + // Clear existing flags and store all flags from response + ctx.flags.clear(); + for (const flag of data.flags) { + if (flag.name && flag.value !== undefined) { + ctx.flags.set(flag.name, String(flag.value)); + } } - } - logger.log(LogLevel.debug, `Feature flag ${this.FEATURE_FLAG_NAME} not found in response`); - return false; + logger.log(LogLevel.debug, `Stored ${ctx.flags.size} feature flags from server`); + + // Update cache duration if TTL provided + if (data.ttl_seconds) { + ctx.cacheDuration = data.ttl_seconds * 1000; // Convert to milliseconds + logger.log(LogLevel.debug, `Updated cache duration to ${data.ttl_seconds} seconds`); + } + } } catch (error: any) { - logger.log(LogLevel.debug, `Error fetching feature flag from ${host}: ${error.message}`); - return false; + // Log at debug level only, never propagate exceptions + logger.log(LogLevel.debug, `Error fetching feature flags from ${host}: ${error.message}`); } } /** - * Retries transient network errors once before giving up. Without a retry - * a single hiccup would leave telemetry disabled for the full cache TTL - * (15 min). One retry gives an ephemeral DNS / connection-reset failure - * a second chance without pushing sustained load at a broken endpoint. + * Gets the driver version without -oss suffix for API calls. + * Format: "1.12.0" from "1.12.0-oss" */ - private async fetchWithRetry(url: string, init: RequestInit): Promise { - const connectionProvider = await this.context.getConnectionProvider(); - const agent = await connectionProvider.getAgent(); - const retryPolicy = await connectionProvider.getRetryPolicy(); - const requestConfig: RequestInit = { agent, ...init }; - const result = await retryPolicy.invokeWithRetry(() => { - const request = new Request(url, requestConfig); - return fetch(request).then((response) => ({ request, response })); - }); - return result.response; - } - - private async getAuthHeaders(): Promise> { - if (!this.authProvider) { - return {}; - } - try { - return normalizeHeaders(await this.authProvider.authenticate()); - } catch (error: any) { - this.context.getLogger().log(LogLevel.debug, `Feature flag auth failed: ${error?.message ?? error}`); - return {}; - } + private getDriverVersion(): string { + // Remove -oss suffix if present + return driverVersion.replace(/-oss$/, ''); } } diff --git a/lib/telemetry/MetricsAggregator.ts b/lib/telemetry/MetricsAggregator.ts index d160db10..a9926471 100644 --- a/lib/telemetry/MetricsAggregator.ts +++ b/lib/telemetry/MetricsAggregator.ts @@ -16,9 +16,19 @@ import IClientContext from '../contracts/IClientContext'; import { LogLevel } from '../contracts/IDBSQLLogger'; -import { TelemetryEvent, TelemetryEventType, TelemetryMetric, DEFAULT_TELEMETRY_CONFIG } from './types'; +import { + TelemetryEvent, + TelemetryEventType, + TelemetryMetric, + DriverConfiguration, + DEFAULT_TELEMETRY_CONFIG, +} from './types'; import DatabricksTelemetryExporter from './DatabricksTelemetryExporter'; +import ExceptionClassifier from './ExceptionClassifier'; +/** + * Per-statement telemetry details for aggregation + */ interface StatementTelemetryDetails { statementId: string; sessionId: string; @@ -37,10 +47,17 @@ interface StatementTelemetryDetails { /** * Aggregates telemetry events by statement_id and manages batching/flushing. * - * Overflow policy — when the pending buffer hits `maxPendingMetrics`, error - * metrics are preserved preferentially over connection/statement metrics. - * The first-failure error is usually the most valuable signal in post-mortem - * debugging; dropping it FIFO would defeat the purpose of capture. + * Features: + * - Aggregates events by statement_id + * - Connection events emitted immediately (no aggregation) + * - Statement events buffered until completeStatement() called + * - Terminal exceptions flushed immediately + * - Retryable exceptions buffered until statement complete + * - Batch size and periodic timer trigger flushes + * - CRITICAL: All exceptions swallowed and logged at LogLevel.debug ONLY + * - CRITICAL: NO console logging + * + * Follows JDBC TelemetryCollector.java:29-30 pattern. */ export default class MetricsAggregator { private statementMetrics: Map = new Map(); @@ -49,127 +66,154 @@ export default class MetricsAggregator { private flushTimer: NodeJS.Timeout | null = null; - private closed = false; - - private closing = false; - private batchSize: number; private flushIntervalMs: number; - private maxPendingMetrics: number; - - private maxErrorsPerStatement: number; - - private statementTtlMs: number; + private driverConfig?: DriverConfiguration; constructor(private context: IClientContext, private exporter: DatabricksTelemetryExporter) { try { const config = context.getConfig(); this.batchSize = config.telemetryBatchSize ?? DEFAULT_TELEMETRY_CONFIG.batchSize; this.flushIntervalMs = config.telemetryFlushIntervalMs ?? DEFAULT_TELEMETRY_CONFIG.flushIntervalMs; - this.maxPendingMetrics = config.telemetryMaxPendingMetrics ?? DEFAULT_TELEMETRY_CONFIG.maxPendingMetrics; - this.maxErrorsPerStatement = - config.telemetryMaxErrorsPerStatement ?? DEFAULT_TELEMETRY_CONFIG.maxErrorsPerStatement; - this.statementTtlMs = config.telemetryStatementTtlMs ?? DEFAULT_TELEMETRY_CONFIG.statementTtlMs; + // Start periodic flush timer this.startFlushTimer(); } catch (error: any) { + // CRITICAL: All exceptions swallowed and logged at debug level ONLY const logger = this.context.getLogger(); logger.log(LogLevel.debug, `MetricsAggregator constructor error: ${error.message}`); + // Initialize with default values this.batchSize = DEFAULT_TELEMETRY_CONFIG.batchSize; this.flushIntervalMs = DEFAULT_TELEMETRY_CONFIG.flushIntervalMs; - this.maxPendingMetrics = DEFAULT_TELEMETRY_CONFIG.maxPendingMetrics; - this.maxErrorsPerStatement = DEFAULT_TELEMETRY_CONFIG.maxErrorsPerStatement; - this.statementTtlMs = DEFAULT_TELEMETRY_CONFIG.statementTtlMs; } } + /** + * Process a telemetry event. Never throws. + * + * @param event - The telemetry event to process + */ processEvent(event: TelemetryEvent): void { - if (this.closed) return; const logger = this.context.getLogger(); try { + // Connection events are emitted immediately (no aggregation) if (event.eventType === TelemetryEventType.CONNECTION_OPEN) { this.processConnectionEvent(event); return; } + if (event.eventType === TelemetryEventType.CONNECTION_CLOSE) { + this.processConnectionCloseEvent(event); + return; + } + + // Error events - check if terminal or retryable if (event.eventType === TelemetryEventType.ERROR) { this.processErrorEvent(event); return; } + // Statement events - buffer until complete if (event.statementId) { this.processStatementEvent(event); } } catch (error: any) { + // CRITICAL: All exceptions swallowed and logged at debug level ONLY logger.log(LogLevel.debug, `MetricsAggregator.processEvent error: ${error.message}`); } } + /** + * Process connection event (emit immediately) + */ private processConnectionEvent(event: TelemetryEvent): void { + // Cache driver config for use in all subsequent metrics + if (event.driverConfig) { + this.driverConfig = event.driverConfig; + } + const metric: TelemetryMetric = { metricType: 'connection', timestamp: event.timestamp, sessionId: event.sessionId, workspaceId: event.workspaceId, driverConfig: event.driverConfig, + operationType: 'CREATE_SESSION', + latencyMs: event.latencyMs, }; this.addPendingMetric(metric); } + /** + * Process connection close event (emit immediately) + */ + private processConnectionCloseEvent(event: TelemetryEvent): void { + const metric: TelemetryMetric = { + metricType: 'connection', + timestamp: event.timestamp, + sessionId: event.sessionId, + driverConfig: this.driverConfig, + operationType: 'DELETE_SESSION', + latencyMs: event.latencyMs, + }; + + this.addPendingMetric(metric); + } + + /** + * Process error event (terminal errors flushed immediately, retryable buffered) + */ private processErrorEvent(event: TelemetryEvent): void { const logger = this.context.getLogger(); - // `isTerminal` is carried on the event by the emitter (it knows the - // call site's taxonomy). If callers ever drop it we default to - // retryable — buffering by statement is the safer choice. - const isTerminal = event.isTerminal === true; + // Create error object for classification + const error: any = new Error(event.errorMessage || 'Unknown error'); + error.name = event.errorName || 'UnknownError'; + + // Check if terminal using isTerminal field or ExceptionClassifier + const isTerminal = event.isTerminal ?? ExceptionClassifier.isTerminal(error); if (isTerminal) { - logger.log(LogLevel.debug, 'Terminal error detected - flushing immediately'); + // Terminal error - flush immediately + logger.log(LogLevel.debug, `Terminal error detected - flushing immediately`); + // If associated with a statement, complete and flush it if (event.statementId && this.statementMetrics.has(event.statementId)) { const details = this.statementMetrics.get(event.statementId)!; - this.pushBoundedError(details, event); + details.errors.push(event); this.completeStatement(event.statementId); } else { + // Standalone error - emit immediately (include cached driver config for context) const metric: TelemetryMetric = { metricType: 'error', timestamp: event.timestamp, sessionId: event.sessionId, statementId: event.statementId, workspaceId: event.workspaceId, + driverConfig: this.driverConfig, errorName: event.errorName, errorMessage: event.errorMessage, - errorStack: event.errorStack, }; this.addPendingMetric(metric); } - // Fire-and-forget on the terminal-error path so customer code doesn't - // stall on telemetry HTTP. Do NOT reset the periodic flush timer: - // under burst failures that would keep the tail-drain timer from - // ever firing. - Promise.resolve(this.flush(false)).catch((err: any) => { - logger.log(LogLevel.debug, `Terminal-error flush failed: ${err?.message ?? err}`); - }); + // Flush immediately for terminal errors + this.flush(); } else if (event.statementId) { + // Retryable error - buffer until statement complete const details = this.getOrCreateStatementDetails(event); - this.pushBoundedError(details, event); - } - } - - private pushBoundedError(details: StatementTelemetryDetails, event: TelemetryEvent): void { - if (details.errors.length >= this.maxErrorsPerStatement) { - details.errors.shift(); + details.errors.push(event); } - details.errors.push(event); } + /** + * Process statement event (buffer until complete) + */ private processStatementEvent(event: TelemetryEvent): void { const details = this.getOrCreateStatementDetails(event); @@ -196,10 +240,14 @@ export default class MetricsAggregator { break; default: + // Unknown event type - ignore break; } } + /** + * Get or create statement details for the given event + */ private getOrCreateStatementDetails(event: TelemetryEvent): StatementTelemetryDetails { const statementId = event.statementId!; @@ -220,41 +268,11 @@ export default class MetricsAggregator { } /** - * Drop entries older than `statementTtlMs`, emitting their buffered error - * events as standalone metrics first so the first-failure signal survives - * the eviction. Called from the periodic flush timer so idle clients - * don't leak orphan entries. + * Complete a statement and prepare it for flushing. Never throws. + * + * @param statementId - The statement ID to complete */ - private evictExpiredStatements(): void { - const cutoff = Date.now() - this.statementTtlMs; - let evicted = 0; - for (const [id, details] of this.statementMetrics) { - if (details.startTime < cutoff) { - for (const errorEvent of details.errors) { - this.addPendingMetric({ - metricType: 'error', - timestamp: errorEvent.timestamp, - sessionId: details.sessionId, - statementId: details.statementId, - workspaceId: details.workspaceId, - errorName: errorEvent.errorName, - errorMessage: errorEvent.errorMessage, - errorStack: errorEvent.errorStack, - }); - } - this.statementMetrics.delete(id); - evicted += 1; - } - } - if (evicted > 0) { - this.context - .getLogger() - .log(LogLevel.debug, `Evicted ${evicted} abandoned statement(s) past ${this.statementTtlMs}ms TTL`); - } - } - completeStatement(statementId: string): void { - if (this.closed) return; const logger = this.context.getLogger(); try { @@ -263,21 +281,26 @@ export default class MetricsAggregator { return; } + // Create statement metric (include cached driver config for context) const metric: TelemetryMetric = { metricType: 'statement', timestamp: details.startTime, sessionId: details.sessionId, statementId: details.statementId, workspaceId: details.workspaceId, + driverConfig: this.driverConfig, + operationType: details.operationType, latencyMs: details.executionLatencyMs, resultFormat: details.resultFormat, chunkCount: details.chunkCount, bytesDownloaded: details.bytesDownloaded, pollCount: details.pollCount, + compressed: details.compressionEnabled, }; this.addPendingMetric(metric); + // Add buffered error metrics (include cached driver config for context) for (const errorEvent of details.errors) { const errorMetric: TelemetryMetric = { metricType: 'error', @@ -285,101 +308,67 @@ export default class MetricsAggregator { sessionId: details.sessionId, statementId: details.statementId, workspaceId: details.workspaceId, + driverConfig: this.driverConfig, errorName: errorEvent.errorName, errorMessage: errorEvent.errorMessage, - errorStack: errorEvent.errorStack, }; this.addPendingMetric(errorMetric); } + // Remove from map this.statementMetrics.delete(statementId); } catch (error: any) { + // CRITICAL: All exceptions swallowed and logged at debug level ONLY logger.log(LogLevel.debug, `MetricsAggregator.completeStatement error: ${error.message}`); } } /** - * Append `metric` to the pending buffer, enforcing `maxPendingMetrics`. - * - * Overflow drops the oldest non-error entry (single `splice` — no new - * allocation). Under an all-error buffer it falls back to dropping the - * oldest entry at index 0. + * Add a metric to pending batch and flush if batch size reached */ private addPendingMetric(metric: TelemetryMetric): void { - if (this.closed) return; this.pendingMetrics.push(metric); - if (this.pendingMetrics.length > this.maxPendingMetrics) { - const dropIndex = this.findDropIndex(); - this.pendingMetrics.splice(dropIndex, 1); - const logger = this.context.getLogger(); - logger.log( - LogLevel.debug, - `Dropped 1 oldest non-error telemetry metric (buffer full at ${this.maxPendingMetrics})`, - ); - } - - if (this.pendingMetrics.length >= this.batchSize && !this.closing) { - // resetTimer=false so the periodic tail-drain keeps its cadence even - // under sustained batch-size bursts. - const logger = this.context.getLogger(); - Promise.resolve(this.flush(false)).catch((err: any) => { - logger.log(LogLevel.debug, `Batch-trigger flush failed: ${err?.message ?? err}`); - }); - } - } - - private findDropIndex(): number { - for (let i = 0; i < this.pendingMetrics.length; i += 1) { - if (this.pendingMetrics[i].metricType !== 'error') { - return i; - } + // Check if batch size reached + if (this.pendingMetrics.length >= this.batchSize) { + this.flush(); } - return 0; } /** - * Drain the pending buffer and return a promise that resolves when the - * exporter finishes with the drained batch. `close()` awaits this so - * `process.exit()` after `client.close()` doesn't truncate the POST. + * Flush all pending metrics to exporter. Never throws. + * + * @param resetTimer If true, resets the flush timer after flushing (default: true) */ - async flush(resetTimer: boolean = true): Promise { + flush(resetTimer: boolean = true): void { const logger = this.context.getLogger(); - let exportPromise: Promise | null = null; try { if (this.pendingMetrics.length === 0) { - if (resetTimer && !this.closed) { - this.startFlushTimer(); - } return; } - const metricsToExport = this.pendingMetrics; + const metricsToExport = [...this.pendingMetrics]; this.pendingMetrics = []; - logger.log(LogLevel.debug, `Flushing ${metricsToExport.length} telemetry metrics`); + // Export metrics (exporter.export never throws) + this.exporter.export(metricsToExport); - exportPromise = this.exporter.export(metricsToExport); - - if (resetTimer && !this.closed) { + // Reset timer to avoid rapid successive flushes (e.g., batch flush at 25s then timer flush at 30s) + // This ensures consistent spacing between exports and helps avoid rate limiting + if (resetTimer) { this.startFlushTimer(); } } catch (error: any) { + // CRITICAL: All exceptions swallowed and logged at debug level ONLY logger.log(LogLevel.debug, `MetricsAggregator.flush error: ${error.message}`); } - - if (exportPromise) { - try { - await exportPromise; - } catch (err: any) { - logger.log(LogLevel.debug, `Unexpected export error: ${err?.message ?? err}`); - } - } } + /** + * Start the periodic flush timer + */ private startFlushTimer(): void { - if (this.closed) return; const logger = this.context.getLogger(); try { @@ -388,46 +377,40 @@ export default class MetricsAggregator { } this.flushTimer = setInterval(() => { - // Idle eviction: run before the flush so orphan-error metrics have - // a chance to batch into this drain rather than wait for the next. - try { - this.evictExpiredStatements(); - } catch (err: any) { - logger.log(LogLevel.debug, `evictExpiredStatements error: ${err?.message ?? err}`); - } - Promise.resolve(this.flush(false)).catch((err: any) => { - logger.log(LogLevel.debug, `Periodic flush failed: ${err?.message ?? err}`); - }); + // Don't reset timer when flush is triggered by the timer itself + this.flush(false); }, this.flushIntervalMs); + // Prevent timer from keeping Node.js process alive this.flushTimer.unref(); } catch (error: any) { + // CRITICAL: All exceptions swallowed and logged at debug level ONLY logger.log(LogLevel.debug, `MetricsAggregator.startFlushTimer error: ${error.message}`); } } - async close(): Promise { + /** + * Close the aggregator and flush remaining metrics. Never throws. + */ + close(): void { const logger = this.context.getLogger(); try { - // Suppress batch-triggered fire-and-forget flushes from addPendingMetric - // so no promises escape past the single awaited flush below. - this.closing = true; - + // Stop flush timer if (this.flushTimer) { clearInterval(this.flushTimer); this.flushTimer = null; } - // closed is still false here so completeStatement → addPendingMetric works normally. - const remainingStatements = [...this.statementMetrics.keys()]; - for (const statementId of remainingStatements) { + // Complete any remaining statements + for (const statementId of this.statementMetrics.keys()) { this.completeStatement(statementId); } - this.closed = true; - await this.flush(false); + // Final flush - don't reset timer since we're closing + this.flush(false); } catch (error: any) { + // CRITICAL: All exceptions swallowed and logged at debug level ONLY logger.log(LogLevel.debug, `MetricsAggregator.close error: ${error.message}`); } } diff --git a/lib/telemetry/TelemetryEventEmitter.ts b/lib/telemetry/TelemetryEventEmitter.ts index bbb0b757..8b9e1ef8 100644 --- a/lib/telemetry/TelemetryEventEmitter.ts +++ b/lib/telemetry/TelemetryEventEmitter.ts @@ -45,7 +45,12 @@ export default class TelemetryEventEmitter extends EventEmitter { * * @param data Connection event data including sessionId, workspaceId, and driverConfig */ - emitConnectionOpen(data: { sessionId: string; workspaceId: string; driverConfig: DriverConfiguration }): void { + emitConnectionOpen(data: { + sessionId: string; + workspaceId: string; + driverConfig: DriverConfiguration; + latencyMs: number; + }): void { if (!this.enabled) return; const logger = this.context.getLogger(); @@ -56,6 +61,7 @@ export default class TelemetryEventEmitter extends EventEmitter { sessionId: data.sessionId, workspaceId: data.workspaceId, driverConfig: data.driverConfig, + latencyMs: data.latencyMs, }; this.emit(TelemetryEventType.CONNECTION_OPEN, event); } catch (error: any) { @@ -64,6 +70,29 @@ export default class TelemetryEventEmitter extends EventEmitter { } } + /** + * Emit a connection close event. + * + * @param data Connection close event data including sessionId and latencyMs + */ + emitConnectionClose(data: { sessionId: string; latencyMs: number }): void { + if (!this.enabled) return; + + const logger = this.context.getLogger(); + try { + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_CLOSE, + timestamp: Date.now(), + sessionId: data.sessionId, + latencyMs: data.latencyMs, + }; + this.emit(TelemetryEventType.CONNECTION_CLOSE, event); + } catch (error: any) { + // Swallow all exceptions - log at debug level only + logger.log(LogLevel.debug, `Error emitting connection close event: ${error.message}`); + } + } + /** * Emit a statement start event. * diff --git a/lib/telemetry/telemetryTypeMappers.ts b/lib/telemetry/telemetryTypeMappers.ts new file mode 100644 index 00000000..d022739d --- /dev/null +++ b/lib/telemetry/telemetryTypeMappers.ts @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2025 Databricks Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { TOperationType, TSparkRowSetType } from '../../thrift/TCLIService_types'; + +/** + * Map Thrift TOperationType to telemetry Operation.Type enum string. + * Returns 'TYPE_UNSPECIFIED' if operationType is undefined or unknown. + */ +export function mapOperationTypeToTelemetryType(operationType?: TOperationType): string { + if (operationType === undefined) { + return 'TYPE_UNSPECIFIED'; + } + + switch (operationType) { + case TOperationType.EXECUTE_STATEMENT: + return 'EXECUTE_STATEMENT'; + case TOperationType.GET_TYPE_INFO: + return 'LIST_TYPE_INFO'; + case TOperationType.GET_CATALOGS: + return 'LIST_CATALOGS'; + case TOperationType.GET_SCHEMAS: + return 'LIST_SCHEMAS'; + case TOperationType.GET_TABLES: + return 'LIST_TABLES'; + case TOperationType.GET_TABLE_TYPES: + return 'LIST_TABLE_TYPES'; + case TOperationType.GET_COLUMNS: + return 'LIST_COLUMNS'; + case TOperationType.GET_FUNCTIONS: + return 'LIST_FUNCTIONS'; + case TOperationType.UNKNOWN: + default: + return 'TYPE_UNSPECIFIED'; + } +} + +/** + * Map Thrift TSparkRowSetType to telemetry ExecutionResult.Format enum string. + */ +export function mapResultFormatToTelemetryType(resultFormat?: TSparkRowSetType): string | undefined { + if (resultFormat === undefined) { + return undefined; + } + + switch (resultFormat) { + case TSparkRowSetType.ARROW_BASED_SET: + return 'INLINE_ARROW'; + case TSparkRowSetType.COLUMN_BASED_SET: + return 'COLUMNAR_INLINE'; + case TSparkRowSetType.ROW_BASED_SET: + return 'INLINE_JSON'; + case TSparkRowSetType.URL_BASED_SET: + return 'EXTERNAL_LINKS'; + default: + return 'FORMAT_UNSPECIFIED'; + } +} diff --git a/lib/telemetry/types.ts b/lib/telemetry/types.ts index 6a4a25a9..8b05ddd1 100644 --- a/lib/telemetry/types.ts +++ b/lib/telemetry/types.ts @@ -24,6 +24,7 @@ export const DRIVER_NAME = 'nodejs-sql-driver'; */ export enum TelemetryEventType { CONNECTION_OPEN = 'connection.open', + CONNECTION_CLOSE = 'connection.close', STATEMENT_START = 'statement.start', STATEMENT_COMPLETE = 'statement.complete', CLOUDFETCH_CHUNK = 'cloudfetch.chunk', @@ -78,7 +79,7 @@ export interface TelemetryConfiguration { * Default telemetry configuration values */ export const DEFAULT_TELEMETRY_CONFIG: Readonly> = Object.freeze({ - enabled: false, + enabled: true, // Enabled by default, gated by feature flag batchSize: 100, flushIntervalMs: 5000, maxRetries: 3, @@ -178,12 +179,15 @@ export interface TelemetryMetric { /** Workspace ID */ workspaceId?: string; - /** Driver configuration (for connection metrics) */ + /** Driver configuration (included in all metrics for context) */ driverConfig?: DriverConfiguration; /** Execution latency in milliseconds */ latencyMs?: number; + /** Type of operation (SELECT, INSERT, etc.) */ + operationType?: string; + /** Result format (inline, cloudfetch, arrow) */ resultFormat?: string; @@ -196,6 +200,9 @@ export interface TelemetryMetric { /** Number of poll operations */ pollCount?: number; + /** Whether compression was used */ + compressed?: boolean; + /** Error name/type */ errorName?: string; @@ -243,6 +250,9 @@ export interface DriverConfiguration { */ processName: string; + /** Authentication type (pat, external-browser, oauth-m2m, custom) */ + authType: string; + // Feature flags /** Whether CloudFetch is enabled */ cloudFetchEnabled: boolean; @@ -265,6 +275,13 @@ export interface DriverConfiguration { /** Number of concurrent CloudFetch downloads */ cloudFetchConcurrentDownloads: number; + + // Connection parameters for telemetry + /** HTTP path for API calls */ + httpPath?: string; + + /** Whether metric view metadata is enabled */ + enableMetricViewMetadata?: boolean; } /** diff --git a/tests/e2e/telemetry/telemetry-integration.test.ts b/tests/e2e/telemetry/telemetry-integration.test.ts new file mode 100644 index 00000000..c41ebc76 --- /dev/null +++ b/tests/e2e/telemetry/telemetry-integration.test.ts @@ -0,0 +1,374 @@ +/** + * Copyright (c) 2025 Databricks Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { expect } from 'chai'; +import sinon from 'sinon'; +import { DBSQLClient } from '../../../lib'; +import config from '../utils/config'; +import FeatureFlagCache from '../../../lib/telemetry/FeatureFlagCache'; +import TelemetryClientProvider from '../../../lib/telemetry/TelemetryClientProvider'; +import TelemetryEventEmitter from '../../../lib/telemetry/TelemetryEventEmitter'; +import MetricsAggregator from '../../../lib/telemetry/MetricsAggregator'; + +describe('Telemetry Integration', () => { + describe('Initialization', () => { + it('should initialize telemetry when telemetryEnabled is true', async function () { + this.timeout(30000); + + const client = new DBSQLClient(); + + // Spy on initialization components + const featureFlagCacheSpy = sinon.spy(FeatureFlagCache.prototype, 'getOrCreateContext'); + const telemetryProviderSpy = sinon.spy(TelemetryClientProvider.prototype, 'getOrCreateClient'); + + try { + await client.connect({ + host: config.host, + path: config.path, + token: config.token, + telemetryEnabled: true, + }); + + // Verify telemetry components were initialized + expect(featureFlagCacheSpy.called).to.be.true; + + await client.close(); + } finally { + featureFlagCacheSpy.restore(); + telemetryProviderSpy.restore(); + } + }); + + it('should not initialize telemetry when telemetryEnabled is false', async function () { + this.timeout(30000); + + const client = new DBSQLClient(); + + const featureFlagCacheSpy = sinon.spy(FeatureFlagCache.prototype, 'getOrCreateContext'); + + try { + await client.connect({ + host: config.host, + path: config.path, + token: config.token, + telemetryEnabled: false, + }); + + // Verify telemetry was not initialized + expect(featureFlagCacheSpy.called).to.be.false; + + await client.close(); + } finally { + featureFlagCacheSpy.restore(); + } + }); + + it('should respect feature flag when telemetry is enabled', async function () { + this.timeout(30000); + + const client = new DBSQLClient(); + + // Stub feature flag to return false + const featureFlagStub = sinon.stub(FeatureFlagCache.prototype, 'isTelemetryEnabled').resolves(false); + + try { + await client.connect({ + host: config.host, + path: config.path, + token: config.token, + telemetryEnabled: true, + }); + + // Verify feature flag was checked + expect(featureFlagStub.called).to.be.true; + + await client.close(); + } finally { + featureFlagStub.restore(); + } + }); + }); + + describe('Reference Counting', () => { + it('should share telemetry client across multiple connections to same host', async function () { + this.timeout(60000); + + const client1 = new DBSQLClient(); + const client2 = new DBSQLClient(); + + const getOrCreateClientSpy = sinon.spy(TelemetryClientProvider.prototype, 'getOrCreateClient'); + const releaseClientSpy = sinon.spy(TelemetryClientProvider.prototype, 'releaseClient'); + + try { + // Enable telemetry for both clients + await client1.connect({ + host: config.host, + path: config.path, + token: config.token, + telemetryEnabled: true, + }); + + await client2.connect({ + host: config.host, + path: config.path, + token: config.token, + telemetryEnabled: true, + }); + + // Both clients should get the same telemetry client for the host + expect(getOrCreateClientSpy.callCount).to.be.at.least(2); + + // Close first client + await client1.close(); + expect(releaseClientSpy.callCount).to.be.at.least(1); + + // Close second client + await client2.close(); + expect(releaseClientSpy.callCount).to.be.at.least(2); + } finally { + getOrCreateClientSpy.restore(); + releaseClientSpy.restore(); + } + }); + + it('should cleanup telemetry on close', async function () { + this.timeout(30000); + + const client = new DBSQLClient(); + + const releaseClientSpy = sinon.spy(TelemetryClientProvider.prototype, 'releaseClient'); + const releaseContextSpy = sinon.spy(FeatureFlagCache.prototype, 'releaseContext'); + const flushSpy = sinon.spy(MetricsAggregator.prototype, 'flush'); + + try { + await client.connect({ + host: config.host, + path: config.path, + token: config.token, + telemetryEnabled: true, + }); + + await client.close(); + + // Verify cleanup was called + expect(releaseClientSpy.called || flushSpy.called || releaseContextSpy.called).to.be.true; + } finally { + releaseClientSpy.restore(); + releaseContextSpy.restore(); + flushSpy.restore(); + } + }); + }); + + describe('Error Handling', () => { + it('should continue driver operation when telemetry initialization fails', async function () { + this.timeout(30000); + + const client = new DBSQLClient(); + + // Stub feature flag to throw an error + const featureFlagStub = sinon + .stub(FeatureFlagCache.prototype, 'isTelemetryEnabled') + .rejects(new Error('Feature flag fetch failed')); + + try { + // Connection should succeed even if telemetry fails + await client.connect({ + host: config.host, + path: config.path, + token: config.token, + telemetryEnabled: true, + }); + + // Should be able to open a session + const session = await client.openSession({ + initialCatalog: config.catalog, + initialSchema: config.schema, + }); + + // Should be able to execute a query + const operation = await session.executeStatement('SELECT 1 AS test'); + const result = await operation.fetchAll(); + + expect(result).to.have.lengthOf(1); + expect(result[0]).to.deep.equal({ test: 1 }); + + await session.close(); + await client.close(); + } finally { + featureFlagStub.restore(); + } + }); + + it('should continue driver operation when feature flag fetch fails', async function () { + this.timeout(30000); + + const client = new DBSQLClient(); + + // Stub getOrCreateContext to throw + const contextStub = sinon + .stub(FeatureFlagCache.prototype, 'getOrCreateContext') + .throws(new Error('Context creation failed')); + + try { + // Connection should succeed even if telemetry fails + await client.connect({ + host: config.host, + path: config.path, + token: config.token, + telemetryEnabled: true, + }); + + // Should be able to open a session + const session = await client.openSession({ + initialCatalog: config.catalog, + initialSchema: config.schema, + }); + + await session.close(); + await client.close(); + } finally { + contextStub.restore(); + } + }); + + it('should not throw exceptions due to telemetry errors', async function () { + this.timeout(30000); + + const client = new DBSQLClient(); + + // Stub multiple telemetry methods to throw + const emitterStub = sinon + .stub(TelemetryEventEmitter.prototype, 'emitConnectionOpen') + .throws(new Error('Emitter failed')); + const aggregatorStub = sinon + .stub(MetricsAggregator.prototype, 'processEvent') + .throws(new Error('Aggregator failed')); + + try { + // Connection should not throw + await client.connect({ + host: config.host, + path: config.path, + token: config.token, + telemetryEnabled: true, + }); + + // Driver operations should work normally + const session = await client.openSession({ + initialCatalog: config.catalog, + initialSchema: config.schema, + }); + + await session.close(); + await client.close(); + } finally { + emitterStub.restore(); + aggregatorStub.restore(); + } + }); + }); + + describe('Configuration', () => { + it('should read telemetry config from ClientConfig', async function () { + this.timeout(30000); + + const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + + // Verify default telemetry config exists + expect(clientConfig).to.have.property('telemetryEnabled'); + expect(clientConfig).to.have.property('telemetryBatchSize'); + expect(clientConfig).to.have.property('telemetryFlushIntervalMs'); + expect(clientConfig).to.have.property('telemetryMaxRetries'); + expect(clientConfig).to.have.property('telemetryAuthenticatedExport'); + expect(clientConfig).to.have.property('telemetryCircuitBreakerThreshold'); + expect(clientConfig).to.have.property('telemetryCircuitBreakerTimeout'); + + // Verify default values + expect(clientConfig.telemetryEnabled).to.equal(false); // Initially disabled + expect(clientConfig.telemetryBatchSize).to.equal(100); + expect(clientConfig.telemetryFlushIntervalMs).to.equal(5000); + expect(clientConfig.telemetryMaxRetries).to.equal(3); + expect(clientConfig.telemetryAuthenticatedExport).to.equal(true); + expect(clientConfig.telemetryCircuitBreakerThreshold).to.equal(5); + expect(clientConfig.telemetryCircuitBreakerTimeout).to.equal(60000); + }); + + it('should allow override via ConnectionOptions', async function () { + this.timeout(30000); + + const client = new DBSQLClient(); + + // Default should be false + expect(client.getConfig().telemetryEnabled).to.equal(false); + + try { + // Override to true + await client.connect({ + host: config.host, + path: config.path, + token: config.token, + telemetryEnabled: true, + }); + + // Config should be updated + expect(client.getConfig().telemetryEnabled).to.equal(true); + + await client.close(); + } catch (error) { + // Clean up even if test fails + await client.close(); + throw error; + } + }); + }); + + describe('End-to-End Telemetry Flow', () => { + it('should emit events during driver operations when telemetry is enabled', async function () { + this.timeout(30000); + + const client = new DBSQLClient(); + + const emitSpy = sinon.spy(TelemetryEventEmitter.prototype, 'emit'); + + try { + await client.connect({ + host: config.host, + path: config.path, + token: config.token, + telemetryEnabled: true, + }); + + const session = await client.openSession({ + initialCatalog: config.catalog, + initialSchema: config.schema, + }); + + const operation = await session.executeStatement('SELECT 1 AS test'); + await operation.fetchAll(); + + // Events may or may not be emitted depending on feature flag + // But the driver should work regardless + + await session.close(); + await client.close(); + } finally { + emitSpy.restore(); + } + }); + }); +}); diff --git a/tests/unit/.stubs/ClientContextStub.ts b/tests/unit/.stubs/ClientContextStub.ts index 519316ff..f2559231 100644 --- a/tests/unit/.stubs/ClientContextStub.ts +++ b/tests/unit/.stubs/ClientContextStub.ts @@ -3,6 +3,7 @@ import IConnectionProvider from '../../../lib/connection/contracts/IConnectionPr import IDriver from '../../../lib/contracts/IDriver'; import IThriftClient from '../../../lib/contracts/IThriftClient'; import IDBSQLLogger from '../../../lib/contracts/IDBSQLLogger'; +import IAuthentication from '../../../lib/connection/contracts/IAuthentication'; import DBSQLClient from '../../../lib/DBSQLClient'; import LoggerStub from './LoggerStub'; @@ -48,4 +49,8 @@ export default class ClientContextStub implements IClientContext { public async getDriver(): Promise { return this.driver; } + + public getAuthProvider(): IAuthentication | undefined { + return undefined; + } } diff --git a/tests/unit/.stubs/TelemetryExporterStub.ts b/tests/unit/.stubs/TelemetryExporterStub.ts new file mode 100644 index 00000000..50571916 --- /dev/null +++ b/tests/unit/.stubs/TelemetryExporterStub.ts @@ -0,0 +1,65 @@ +/** + * Copyright (c) 2025 Databricks Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { TelemetryMetric } from '../../../lib/telemetry/types'; + +/** + * Stub implementation of DatabricksTelemetryExporter for testing. + * Records exported metrics for verification in tests. + */ +export default class TelemetryExporterStub { + public exportedMetrics: TelemetryMetric[][] = []; + public exportCount = 0; + public shouldThrow = false; + public throwError: Error | null = null; + + /** + * Stub export method that records metrics. + */ + async export(metrics: TelemetryMetric[]): Promise { + this.exportCount++; + this.exportedMetrics.push([...metrics]); + + if (this.shouldThrow && this.throwError) { + throw this.throwError; + } + } + + /** + * Reset the stub state. + */ + reset(): void { + this.exportedMetrics = []; + this.exportCount = 0; + this.shouldThrow = false; + this.throwError = null; + } + + /** + * Get all exported metrics flattened. + */ + getAllExportedMetrics(): TelemetryMetric[] { + return this.exportedMetrics.flat(); + } + + /** + * Configure stub to throw an error on export. + */ + throwOnExport(error: Error): void { + this.shouldThrow = true; + this.throwError = error; + } +} diff --git a/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts b/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts index fb347bf6..435112c4 100644 --- a/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts +++ b/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts @@ -17,482 +17,621 @@ import { expect } from 'chai'; import sinon from 'sinon'; import DatabricksTelemetryExporter from '../../../lib/telemetry/DatabricksTelemetryExporter'; -import { CircuitBreakerRegistry } from '../../../lib/telemetry/CircuitBreaker'; +import { CircuitBreakerRegistry, CircuitBreakerState } from '../../../lib/telemetry/CircuitBreaker'; import { TelemetryMetric } from '../../../lib/telemetry/types'; import ClientContextStub from '../.stubs/ClientContextStub'; import { LogLevel } from '../../../lib/contracts/IDBSQLLogger'; -import IAuthentication from '../../../lib/connection/contracts/IAuthentication'; -const fakeAuthProvider: IAuthentication = { - authenticate: async () => ({ Authorization: 'Bearer test-token' }), -}; - -function makeMetric(overrides: Partial = {}): TelemetryMetric { - return { - metricType: 'connection', - timestamp: Date.now(), - sessionId: 'session-1', - ...overrides, - }; -} +describe('DatabricksTelemetryExporter', () => { + let context: ClientContextStub; + let circuitBreakerRegistry: CircuitBreakerRegistry; + let exporter: DatabricksTelemetryExporter; + let fetchStub: sinon.SinonStub; + let logSpy: sinon.SinonSpy; -function makeOkResponse() { - return Promise.resolve({ ok: true, status: 200, statusText: 'OK', text: () => Promise.resolve('') }); -} + beforeEach(() => { + context = new ClientContextStub({ + telemetryAuthenticatedExport: true, + telemetryMaxRetries: 3, + }); + circuitBreakerRegistry = new CircuitBreakerRegistry(context); -function makeErrorResponse(status: number, statusText: string) { - return Promise.resolve({ ok: false, status, statusText, text: () => Promise.resolve('') }); -} + // Create fetch stub + fetchStub = sinon.stub(); -describe('DatabricksTelemetryExporter', () => { - let clock: sinon.SinonFakeTimers; + // Create exporter with injected fetch function + exporter = new DatabricksTelemetryExporter( + context, + 'test.databricks.com', + circuitBreakerRegistry, + fetchStub as any, + ); - beforeEach(() => { - clock = sinon.useFakeTimers(); + // Spy on logger + logSpy = sinon.spy(context.logger, 'log'); }); afterEach(() => { - clock.restore(); sinon.restore(); }); - describe('export() - basic', () => { - it('should return immediately for empty metrics array', async () => { - const context = new ClientContextStub(); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); - - await exporter.export([]); - - expect(sendRequestStub.called).to.be.false; + describe('Constructor', () => { + it('should create exporter with IClientContext', () => { + expect(exporter).to.be.instanceOf(DatabricksTelemetryExporter); }); - it('should call sendRequest with correct endpoint for authenticated export', async () => { - const context = new ClientContextStub({ telemetryAuthenticatedExport: true } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); - - await exporter.export([makeMetric()]); + it('should create circuit breaker for host', () => { + const breaker = circuitBreakerRegistry.getCircuitBreaker('test.databricks.com'); + expect(breaker.getState()).to.equal(CircuitBreakerState.CLOSED); + }); + }); - expect(sendRequestStub.calledOnce).to.be.true; - const url = sendRequestStub.firstCall.args[0] as string; - expect(url).to.include('telemetry-ext'); - expect(url).to.include('https://'); + describe('export() - endpoint selection', () => { + it('should export to authenticated endpoint when enabled', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + workspaceId: 'ws-1', + }, + ]; + + fetchStub.resolves({ + ok: true, + status: 200, + statusText: 'OK', + }); + + await exporter.export(metrics); + + expect(fetchStub.calledOnce).to.be.true; + const call = fetchStub.getCall(0); + expect(call.args[0]).to.equal('https://test.databricks.com/telemetry-ext'); }); - it('should call sendRequest with unauthenticated endpoint when configured', async () => { - const context = new ClientContextStub({ telemetryAuthenticatedExport: false } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + it('should export to unauthenticated endpoint when disabled', async () => { + context = new ClientContextStub({ + telemetryAuthenticatedExport: false, + telemetryMaxRetries: 3, + }); + + // Create new exporter with updated context and inject fetchStub + exporter = new DatabricksTelemetryExporter( + context, + 'test.databricks.com', + circuitBreakerRegistry, + fetchStub as any, + ); + + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + workspaceId: 'ws-1', + }, + ]; + + fetchStub.resolves({ + ok: true, + status: 200, + statusText: 'OK', + }); + + await exporter.export(metrics); + + expect(fetchStub.calledOnce).to.be.true; + const call = fetchStub.getCall(0); + expect(call.args[0]).to.equal('https://test.databricks.com/telemetry-unauth'); + }); + }); - await exporter.export([makeMetric()]); + describe('export() - payload format', () => { + it('should format connection metric correctly', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: 1234567890, + sessionId: 'session-1', + workspaceId: 'ws-1', + driverConfig: { + driverVersion: '1.0.0', + driverName: 'databricks-sql-nodejs', + nodeVersion: 'v16.0.0', + platform: 'linux', + osVersion: 'Ubuntu 20.04', + osArch: 'x64', + runtimeVendor: 'Node.js Foundation', + localeName: 'en_US', + charSetEncoding: 'UTF-8', + processName: 'node', + authType: 'pat', + cloudFetchEnabled: true, + lz4Enabled: true, + arrowEnabled: false, + directResultsEnabled: true, + socketTimeout: 3000, + retryMaxAttempts: 3, + cloudFetchConcurrentDownloads: 10, + }, + }, + ]; - const url = sendRequestStub.firstCall.args[0] as string; - expect(url).to.include('telemetry-unauth'); - }); + fetchStub.resolves({ + ok: true, + status: 200, + statusText: 'OK', + }); - it('should preserve host protocol if already set', async () => { - const context = new ClientContextStub({ telemetryAuthenticatedExport: true } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'https://host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + await exporter.export(metrics); - await exporter.export([makeMetric()]); + expect(fetchStub.calledOnce).to.be.true; + const call = fetchStub.getCall(0); + const body = JSON.parse(call.args[1].body); - const url = sendRequestStub.firstCall.args[0] as string; - expect(url).to.equal('https://host.example.com/telemetry-ext'); + expect(body.protoLogs).to.have.lengthOf(1); + const log = JSON.parse(body.protoLogs[0]); + expect(log.entry.sql_driver_log.session_id).to.equal('session-1'); + expect(log.entry.sql_driver_log.system_configuration).to.deep.equal( + metrics[0].driverConfig + ? { + driver_version: metrics[0].driverConfig.driverVersion, + driver_name: metrics[0].driverConfig.driverName, + runtime_name: 'Node.js', + runtime_version: metrics[0].driverConfig.nodeVersion, + runtime_vendor: metrics[0].driverConfig.runtimeVendor, + os_name: metrics[0].driverConfig.platform, + os_version: metrics[0].driverConfig.osVersion, + os_arch: metrics[0].driverConfig.osArch, + locale_name: metrics[0].driverConfig.localeName, + char_set_encoding: metrics[0].driverConfig.charSetEncoding, + process_name: metrics[0].driverConfig.processName, + } + : undefined, + ); }); - it('should never throw even when sendRequest fails', async () => { - const context = new ClientContextStub(); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - sinon.stub(exporter as any, 'sendRequest').rejects(new Error('network error')); - - let threw = false; - try { - await exporter.export([makeMetric()]); - } catch { - threw = true; - } - expect(threw).to.be.false; + it('should format statement metric correctly', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'statement', + timestamp: 1234567890, + sessionId: 'session-1', + statementId: 'stmt-1', + workspaceId: 'ws-1', + latencyMs: 1500, + resultFormat: 'cloudfetch', + chunkCount: 5, + bytesDownloaded: 1024000, + }, + ]; + + fetchStub.resolves({ + ok: true, + status: 200, + statusText: 'OK', + }); + + await exporter.export(metrics); + + expect(fetchStub.calledOnce).to.be.true; + const call = fetchStub.getCall(0); + const body = JSON.parse(call.args[1].body); + + expect(body.protoLogs).to.have.lengthOf(1); + const log = JSON.parse(body.protoLogs[0]); + expect(log.entry.sql_driver_log.session_id).to.equal('session-1'); + expect(log.entry.sql_driver_log.sql_statement_id).to.equal('stmt-1'); + expect(log.entry.sql_driver_log.operation_latency_ms).to.equal(1500); + expect(log.entry.sql_driver_log.sql_operation.execution_result).to.equal('cloudfetch'); + expect(log.entry.sql_driver_log.sql_operation.chunk_details.total_chunks_present).to.equal(5); + expect(log.entry.sql_driver_log.sql_operation.chunk_details.total_chunks_iterated).to.equal(5); }); - }); - describe('export() - retry logic', () => { - it('should retry on retryable HTTP errors (503)', async () => { - const context = new ClientContextStub({ telemetryMaxRetries: 2 } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - // Fail twice with 503, then succeed - const sendRequestStub = sinon - .stub(exporter as any, 'sendRequest') - .onFirstCall() - .returns(makeErrorResponse(503, 'Service Unavailable')) - .onSecondCall() - .returns(makeErrorResponse(503, 'Service Unavailable')) - .onThirdCall() - .returns(makeOkResponse()); - - // Advance fake timers automatically for sleep calls - const exportPromise = exporter.export([makeMetric()]); - await clock.runAllAsync(); - await exportPromise; - - expect(sendRequestStub.callCount).to.equal(3); + it('should format error metric correctly', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'error', + timestamp: 1234567890, + sessionId: 'session-1', + statementId: 'stmt-1', + workspaceId: 'ws-1', + errorName: 'AuthenticationError', + errorMessage: 'Invalid credentials', + }, + ]; + + fetchStub.resolves({ + ok: true, + status: 200, + statusText: 'OK', + }); + + await exporter.export(metrics); + + expect(fetchStub.calledOnce).to.be.true; + const call = fetchStub.getCall(0); + const body = JSON.parse(call.args[1].body); + + expect(body.protoLogs).to.have.lengthOf(1); + const log = JSON.parse(body.protoLogs[0]); + expect(log.entry.sql_driver_log.error_info.error_name).to.equal('AuthenticationError'); + expect(log.entry.sql_driver_log.error_info.stack_trace).to.equal('Invalid credentials'); }); - it('should not retry on terminal HTTP errors (400)', async () => { - const context = new ClientContextStub({ telemetryMaxRetries: 3 } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeErrorResponse(400, 'Bad Request')); - - await exporter.export([makeMetric()]); + it('should include workspace_id, session_id, and sql_statement_id', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'statement', + timestamp: Date.now(), + sessionId: 'session-123', + statementId: 'stmt-456', + workspaceId: 'ws-789', + latencyMs: 100, + }, + ]; + + fetchStub.resolves({ + ok: true, + status: 200, + statusText: 'OK', + }); + + await exporter.export(metrics); + + const call = fetchStub.getCall(0); + const body = JSON.parse(call.args[1].body); + const log = JSON.parse(body.protoLogs[0]); - // Only one call — no retry on terminal error - expect(sendRequestStub.callCount).to.equal(1); + expect(log.entry.sql_driver_log.session_id).to.equal('session-123'); + expect(log.entry.sql_driver_log.sql_statement_id).to.equal('stmt-456'); }); + }); - it('should not retry on terminal HTTP errors (401)', async () => { - const context = new ClientContextStub({ telemetryMaxRetries: 3 } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon - .stub(exporter as any, 'sendRequest') - .returns(makeErrorResponse(401, 'Unauthorized')); - - await exporter.export([makeMetric()]); + describe('export() - retry logic', () => { + it('should retry on retryable error (429)', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; + + // First call fails with 429, second succeeds + fetchStub.onFirstCall().resolves({ + ok: false, + status: 429, + statusText: 'Too Many Requests', + }); + fetchStub.onSecondCall().resolves({ + ok: true, + status: 200, + statusText: 'OK', + }); + + await exporter.export(metrics); + + expect(fetchStub.callCount).to.equal(2); + }); - expect(sendRequestStub.callCount).to.equal(1); + it('should retry on retryable error (500)', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; + + fetchStub.onFirstCall().resolves({ + ok: false, + status: 500, + statusText: 'Internal Server Error', + }); + fetchStub.onSecondCall().resolves({ + ok: true, + status: 200, + statusText: 'OK', + }); + + await exporter.export(metrics); + + expect(fetchStub.callCount).to.equal(2); }); - it('should give up after maxRetries are exhausted', async () => { - const context = new ClientContextStub({ telemetryMaxRetries: 2 } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon - .stub(exporter as any, 'sendRequest') - .returns(makeErrorResponse(503, 'Service Unavailable')); + it('should not retry on terminal error (400)', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; - const exportPromise = exporter.export([makeMetric()]); - await clock.runAllAsync(); - await exportPromise; + fetchStub.resolves({ + ok: false, + status: 400, + statusText: 'Bad Request', + }); - // 1 initial + 2 retries = 3 total calls - expect(sendRequestStub.callCount).to.equal(3); - }); - }); + await exporter.export(metrics); - describe('export() - circuit breaker integration', () => { - it('should drop telemetry when circuit breaker is OPEN', async () => { - // maxRetries: 0 avoids sleep delays; failureThreshold: 1 trips the breaker on first failure - const context = new ClientContextStub({ telemetryMaxRetries: 0 } as any); - const registry = new CircuitBreakerRegistry(context); - registry.getCircuitBreaker('host.example.com', { failureThreshold: 1 }); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon - .stub(exporter as any, 'sendRequest') - .returns(makeErrorResponse(503, 'Service Unavailable')); - - // Trip the circuit breaker (1 non-retryable-pathway failure is enough) - await exporter.export([makeMetric()]); - sendRequestStub.reset(); - - // Now circuit is OPEN, export should be dropped without calling sendRequest - await exporter.export([makeMetric()]); - - expect(sendRequestStub.called).to.be.false; + // Should only be called once (no retry) + expect(fetchStub.callCount).to.equal(1); }); - it('should log at debug level when circuit is OPEN', async () => { - const context = new ClientContextStub({ telemetryMaxRetries: 0 } as any); - const logSpy = sinon.spy((context as any).logger, 'log'); - const registry = new CircuitBreakerRegistry(context); - registry.getCircuitBreaker('host.example.com', { failureThreshold: 1 }); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - sinon.stub(exporter as any, 'sendRequest').returns(makeErrorResponse(503, 'Service Unavailable')); + it('should not retry on terminal error (401)', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; - await exporter.export([makeMetric()]); - logSpy.resetHistory(); + fetchStub.resolves({ + ok: false, + status: 401, + statusText: 'Unauthorized', + }); - await exporter.export([makeMetric()]); + await exporter.export(metrics); - expect(logSpy.calledWith(LogLevel.debug, sinon.match(/Circuit breaker OPEN/))).to.be.true; + expect(fetchStub.callCount).to.equal(1); }); - }); - describe('export() - payload format', () => { - it('should send POST request with JSON content-type', async () => { - const context = new ClientContextStub(); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); - - await exporter.export([makeMetric()]); - - const options = sendRequestStub.firstCall.args[1] as any; - expect(options.method).to.equal('POST'); - expect(options.headers['Content-Type']).to.equal('application/json'); + it('should respect max retry limit', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; + + // Always fail with retryable error + fetchStub.resolves({ + ok: false, + status: 503, + statusText: 'Service Unavailable', + }); + + await exporter.export(metrics); + + // Should try initial + 3 retries = 4 total + expect(fetchStub.callCount).to.equal(4); }); - it('should include protoLogs in payload body', async () => { - const context = new ClientContextStub(); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); - - await exporter.export([makeMetric(), makeMetric()]); - - const body = JSON.parse((sendRequestStub.firstCall.args[1] as any).body); - expect(body.protoLogs).to.be.an('array').with.length(2); - expect(body.items).to.be.an('array').that.is.empty; - expect(body.uploadTime).to.be.a('number'); + it('should use exponential backoff with jitter', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; + + // Mock all failures to test retry behavior + fetchStub.callsFake(() => { + return Promise.resolve({ + ok: false, + status: 503, + statusText: 'Service Unavailable', + }); + }); + + await exporter.export(metrics); + + // Should have multiple attempts (initial + retries) + expect(fetchStub.callCount).to.be.greaterThan(1); }); }); - describe('logging level compliance', () => { - it('should only log at debug level', async () => { - const context = new ClientContextStub(); - const logSpy = sinon.spy((context as any).logger, 'log'); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - sinon.stub(exporter as any, 'sendRequest').rejects(new Error('something went wrong')); + describe('export() - circuit breaker integration', () => { + it('should use circuit breaker for endpoint protection', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; + + fetchStub.resolves({ + ok: true, + status: 200, + statusText: 'OK', + }); - const exportPromise = exporter.export([makeMetric()]); - await clock.runAllAsync(); - await exportPromise; + await exporter.export(metrics); - expect(logSpy.neverCalledWith(LogLevel.error, sinon.match.any)).to.be.true; - // Note: circuit breaker logs at warn level when transitioning to OPEN, which is expected + const breaker = circuitBreakerRegistry.getCircuitBreaker('test.databricks.com'); + expect(breaker.getState()).to.equal(CircuitBreakerState.CLOSED); }); - }); - describe('Authorization header flow', () => { - it('sends Authorization header returned by the auth provider on authenticated export', async () => { - const context = new ClientContextStub({ telemetryAuthenticatedExport: true } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + it('should handle circuit breaker OPEN state gracefully', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; + + // Trigger circuit breaker to open + const breaker = circuitBreakerRegistry.getCircuitBreaker('test.databricks.com'); + fetchStub.rejects(new Error('Network failure')); + + for (let i = 0; i < 5; i++) { + try { + await breaker.execute(async () => { + throw new Error('Network failure'); + }); + } catch { + // Expected + } + } + + expect(breaker.getState()).to.equal(CircuitBreakerState.OPEN); - await exporter.export([makeMetric()]); + // Now export should be dropped without error + await exporter.export(metrics); - const init = sendRequestStub.firstCall.args[1] as any; - expect(init.headers.Authorization).to.equal('Bearer test-token'); + // Should log circuit breaker OPEN + expect(logSpy.calledWith(LogLevel.debug, 'Circuit breaker OPEN - dropping telemetry')).to.be.true; }); + }); - it('drops the batch when authenticated export is requested but auth returns no header', async () => { - const context = new ClientContextStub({ telemetryAuthenticatedExport: true, telemetryMaxRetries: 0 } as any); - const registry = new CircuitBreakerRegistry(context); - const emptyAuth = { authenticate: async () => ({}) }; - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, emptyAuth as any); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + describe('export() - exception handling', () => { + it('CRITICAL: should never throw on network failure', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; - await exporter.export([makeMetric()]); + fetchStub.rejects(new Error('Network failure')); - expect(sendRequestStub.called).to.be.false; - }); + // Should not throw + await exporter.export(metrics); - it('warns exactly once across consecutive auth-missing drops', async () => { - const context = new ClientContextStub({ telemetryAuthenticatedExport: true, telemetryMaxRetries: 0 } as any); - const logSpy = sinon.spy((context as any).logger, 'log'); - const registry = new CircuitBreakerRegistry(context); - const emptyAuth = { authenticate: async () => ({}) }; - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, emptyAuth as any); - sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); - - await exporter.export([makeMetric()]); - await exporter.export([makeMetric()]); - await exporter.export([makeMetric()]); - - const warnCalls = logSpy - .getCalls() - .filter((c) => c.args[0] === LogLevel.warn && /Authorization/.test(String(c.args[1]))); - expect(warnCalls.length).to.equal(1); + // Should log at debug level only + expect(logSpy.args.every((args) => args[0] === LogLevel.debug)).to.be.true; }); - it('re-arms the auth-missing warn after a successful export', async () => { - const context = new ClientContextStub({ telemetryAuthenticatedExport: true, telemetryMaxRetries: 0 } as any); - const logSpy = sinon.spy((context as any).logger, 'log'); - const registry = new CircuitBreakerRegistry(context); - let headers: Record = {}; - const toggleAuth = { authenticate: async () => headers }; - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, toggleAuth as any); - sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); - - await exporter.export([makeMetric()]); // warns once - headers = { Authorization: 'Bearer recovered' }; - await exporter.export([makeMetric()]); // success → re-arms - headers = {}; - await exporter.export([makeMetric()]); // warns again - - const warnCalls = logSpy - .getCalls() - .filter((c) => c.args[0] === LogLevel.warn && /Authorization/.test(String(c.args[1]))); - expect(warnCalls.length).to.equal(2); + it('CRITICAL: should never throw on invalid response', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; + + fetchStub.resolves({ + ok: false, + status: 500, + statusText: 'Internal Server Error', + }); + + // Should not throw + await exporter.export(metrics); + + // Should log at debug level only + expect(logSpy.args.every((args) => args[0] === LogLevel.debug)).to.be.true; }); - }); - describe('unauthenticated endpoint privacy', () => { - it('omits workspace_id, session_id, statement_id from unauth payload', async () => { - const context = new ClientContextStub({ telemetryAuthenticatedExport: false } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); - - await exporter.export([ - makeMetric({ + it('CRITICAL: should swallow all exceptions and log at debug level', async () => { + const metrics: TelemetryMetric[] = [ + { metricType: 'connection', - sessionId: 'session-xyz', - statementId: 'stmt-abc', - workspaceId: 'ws-123', - } as any), - ]); + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; - const body = JSON.parse((sendRequestStub.firstCall.args[1] as any).body); - const log = JSON.parse(body.protoLogs[0]); - expect(log.workspace_id).to.be.undefined; - expect(log.entry.sql_driver_log.session_id).to.be.undefined; - expect(log.entry.sql_driver_log.sql_statement_id).to.be.undefined; - }); + fetchStub.rejects(new Error('Unexpected error')); - it('omits system_configuration from unauth payload', async () => { - const context = new ClientContextStub({ telemetryAuthenticatedExport: false } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + await exporter.export(metrics); - await exporter.export([ - makeMetric({ - metricType: 'connection', - driverConfig: { - driverVersion: '1.x', - driverName: 'nodejs-sql-driver', - nodeVersion: '20.0', - platform: 'linux', - osVersion: '5.0', - osArch: 'x64', - runtimeVendor: 'v8', - localeName: 'en_US', - charSetEncoding: 'UTF-8', - processName: '/home/alice/worker.js', - }, - } as any), - ]); - - const body = JSON.parse((sendRequestStub.firstCall.args[1] as any).body); - const log = JSON.parse(body.protoLogs[0]); - expect(log.entry.sql_driver_log.system_configuration).to.be.undefined; + // Verify all logging is at debug level + logSpy.getCalls().forEach((call) => { + expect(call.args[0]).to.equal(LogLevel.debug); + }); }); - it('strips userAgentEntry from User-Agent on unauth path', async () => { - const context = new ClientContextStub({ - telemetryAuthenticatedExport: false, - userAgentEntry: 'MyTenantApp/1.2.3', - } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); - - await exporter.export([makeMetric()]); + it('CRITICAL: should handle empty metrics array gracefully', async () => { + await exporter.export([]); - const ua = (sendRequestStub.firstCall.args[1] as any).headers['User-Agent']; - expect(ua).to.not.include('MyTenantApp'); + // Should not call fetch + expect(fetchStub.called).to.be.false; }); - it('blanks stack_trace on unauth error metrics', async () => { - const context = new ClientContextStub({ telemetryAuthenticatedExport: false } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + it('CRITICAL: should handle null/undefined metrics gracefully', async () => { + await exporter.export(null as any); + await exporter.export(undefined as any); - await exporter.export([ - makeMetric({ - metricType: 'error', - errorName: 'SomeError', - errorMessage: 'Bearer leaked-token in the message', - errorStack: 'Error: leak\n at fn (dapi0123456789abcdef)', - } as any), - ]); - - const body = JSON.parse((sendRequestStub.firstCall.args[1] as any).body); - const log = JSON.parse(body.protoLogs[0]); - expect(log.entry.sql_driver_log.error_info.stack_trace).to.equal(''); - expect(log.entry.sql_driver_log.error_info.error_name).to.equal('SomeError'); + // Should not call fetch + expect(fetchStub.called).to.be.false; }); }); - describe('errorStack flow (authenticated)', () => { - it('redacts Bearer tokens in stack_trace before export', async () => { - const context = new ClientContextStub({ telemetryAuthenticatedExport: true } as any); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); - - await exporter.export([ - makeMetric({ - metricType: 'error', - errorName: 'AuthError', - errorMessage: 'ignored because errorStack is preferred', - errorStack: 'Error: boom\n at Bearer leaked-bearer-token', - } as any), - ]); - - const body = JSON.parse((sendRequestStub.firstCall.args[1] as any).body); - const log = JSON.parse(body.protoLogs[0]); - const stack = log.entry.sql_driver_log.stack_trace ?? log.entry.sql_driver_log.error_info?.stack_trace; - expect(stack).to.include(''); - expect(stack).to.not.include('leaked-bearer-token'); + describe('export() - logging', () => { + it('CRITICAL: should log only at debug level', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; + + fetchStub.resolves({ + ok: true, + status: 200, + statusText: 'OK', + }); + + await exporter.export(metrics); + + // All log calls should be at debug level + logSpy.getCalls().forEach((call) => { + expect(call.args[0]).to.equal(LogLevel.debug); + }); }); - }); - describe('host validation', () => { - it('drops the batch when host fails validation (malformed)', async () => { - const context = new ClientContextStub(); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, '//attacker.com', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + it('CRITICAL: should not use console logging', async () => { + const consoleLogSpy = sinon.spy(console, 'log'); + const consoleErrorSpy = sinon.spy(console, 'error'); + const consoleWarnSpy = sinon.spy(console, 'warn'); - await exporter.export([makeMetric()]); + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; - expect(sendRequestStub.called).to.be.false; - }); + fetchStub.rejects(new Error('Test error')); - it('drops the batch when host is loopback', async () => { - const context = new ClientContextStub(); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, '127.0.0.1', registry, fakeAuthProvider); - const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + await exporter.export(metrics); - await exporter.export([makeMetric()]); + expect(consoleLogSpy.called).to.be.false; + expect(consoleErrorSpy.called).to.be.false; + expect(consoleWarnSpy.called).to.be.false; - expect(sendRequestStub.called).to.be.false; + consoleLogSpy.restore(); + consoleErrorSpy.restore(); + consoleWarnSpy.restore(); }); }); - describe('dispose()', () => { - it('removes the per-host circuit breaker from the registry', () => { - const context = new ClientContextStub(); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); - - expect(registry.getAllBreakers().has('host.example.com')).to.be.true; + describe('export() - connection provider integration', () => { + it('should use connection provider from context', async () => { + const metrics: TelemetryMetric[] = [ + { + metricType: 'connection', + timestamp: Date.now(), + sessionId: 'session-1', + }, + ]; - exporter.dispose(); + const getConnectionProviderSpy = sinon.spy(context, 'getConnectionProvider'); - expect(registry.getAllBreakers().has('host.example.com')).to.be.false; - }); + fetchStub.resolves({ + ok: true, + status: 200, + statusText: 'OK', + }); - it('is idempotent', () => { - const context = new ClientContextStub(); - const registry = new CircuitBreakerRegistry(context); - const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); + await exporter.export(metrics); - exporter.dispose(); - expect(() => exporter.dispose()).to.not.throw(); + expect(getConnectionProviderSpy.called).to.be.true; }); }); }); diff --git a/tests/unit/telemetry/FeatureFlagCache.test.ts b/tests/unit/telemetry/FeatureFlagCache.test.ts index ed7bc79c..0dd80b10 100644 --- a/tests/unit/telemetry/FeatureFlagCache.test.ts +++ b/tests/unit/telemetry/FeatureFlagCache.test.ts @@ -42,7 +42,7 @@ describe('FeatureFlagCache', () => { expect(ctx).to.not.be.undefined; expect(ctx.refCount).to.equal(1); expect(ctx.cacheDuration).to.equal(15 * 60 * 1000); // 15 minutes - expect(ctx.telemetryEnabled).to.be.undefined; + expect(ctx.flags.size).to.equal(0); // Empty flags map initially expect(ctx.lastFetched).to.be.undefined; }); @@ -137,8 +137,16 @@ describe('FeatureFlagCache', () => { const cache = new FeatureFlagCache(context); const host = 'test-host.databricks.com'; - // Stub the private fetchFeatureFlag method - const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlag').resolves(true); + // Stub the private fetchFeatureFlags method to populate flags Map + const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlags').callsFake(async () => { + const ctx = (cache as any).contexts.get(host); + if (ctx) { + ctx.flags.set( + 'databricks.partnerplatform.clientConfigsFeatureFlags.enableTelemetryForNodeJs', + 'true', + ); + } + }); cache.getOrCreateContext(host); const enabled = await cache.isTelemetryEnabled(host); @@ -155,7 +163,15 @@ describe('FeatureFlagCache', () => { const cache = new FeatureFlagCache(context); const host = 'test-host.databricks.com'; - const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlag').resolves(true); + const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlags').callsFake(async () => { + const ctx = (cache as any).contexts.get(host); + if (ctx) { + ctx.flags.set( + 'databricks.partnerplatform.clientConfigsFeatureFlags.enableTelemetryForNodeJs', + 'true', + ); + } + }); cache.getOrCreateContext(host); @@ -179,9 +195,18 @@ describe('FeatureFlagCache', () => { const cache = new FeatureFlagCache(context); const host = 'test-host.databricks.com'; - const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlag'); - fetchStub.onFirstCall().resolves(true); - fetchStub.onSecondCall().resolves(false); + let callCount = 0; + const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlags').callsFake(async () => { + const ctx = (cache as any).contexts.get(host); + if (ctx) { + callCount++; + // First call returns true, second returns false + ctx.flags.set( + 'databricks.partnerplatform.clientConfigsFeatureFlags.enableTelemetryForNodeJs', + callCount === 1 ? 'true' : 'false', + ); + } + }); cache.getOrCreateContext(host); @@ -207,24 +232,24 @@ describe('FeatureFlagCache', () => { const cache = new FeatureFlagCache(context); const host = 'test-host.databricks.com'; - const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlag').rejects(new Error('Network error')); + const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlags').rejects(new Error('Network error')); cache.getOrCreateContext(host); const enabled = await cache.isTelemetryEnabled(host); expect(enabled).to.be.false; - expect(logSpy.calledWith(LogLevel.debug, 'Error fetching feature flag: Network error')).to.be.true; + expect(logSpy.calledWith(LogLevel.debug, 'Error fetching feature flags: Network error')).to.be.true; fetchStub.restore(); logSpy.restore(); }); - it('should not propagate exceptions from fetchFeatureFlag', async () => { + it('should not propagate exceptions from fetchFeatureFlags', async () => { const context = new ClientContextStub(); const cache = new FeatureFlagCache(context); const host = 'test-host.databricks.com'; - const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlag').rejects(new Error('Network error')); + const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlags').rejects(new Error('Network error')); cache.getOrCreateContext(host); @@ -235,12 +260,13 @@ describe('FeatureFlagCache', () => { fetchStub.restore(); }); - it('should return false when telemetryEnabled is undefined', async () => { + it('should return false when flag is not set in the map', async () => { const context = new ClientContextStub(); const cache = new FeatureFlagCache(context); const host = 'test-host.databricks.com'; - const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlag').resolves(undefined); + // Stub fetchFeatureFlags to do nothing (leaves flags map empty) + const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlags').resolves(); cache.getOrCreateContext(host); const enabled = await cache.isTelemetryEnabled(host); @@ -251,15 +277,19 @@ describe('FeatureFlagCache', () => { }); }); - describe('fetchFeatureFlag', () => { - it('should return false as placeholder implementation', async () => { + describe('fetchFeatureFlags', () => { + it('should handle fetch errors gracefully', async () => { const context = new ClientContextStub(); const cache = new FeatureFlagCache(context); const host = 'test-host.databricks.com'; - // Access private method through any cast - const result = await (cache as any).fetchFeatureFlag(host); - expect(result).to.be.false; + cache.getOrCreateContext(host); + + // Access private method through any cast - should not throw even if fetch fails + await (cache as any).fetchFeatureFlags(host); + + // Should log at debug level but not throw + // Exact behavior depends on network conditions, this just verifies no exception }); }); @@ -269,7 +299,15 @@ describe('FeatureFlagCache', () => { const cache = new FeatureFlagCache(context); const host = 'test-host.databricks.com'; - const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlag').resolves(true); + const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlags').callsFake(async () => { + const ctx = (cache as any).contexts.get(host); + if (ctx) { + ctx.flags.set( + 'databricks.partnerplatform.clientConfigsFeatureFlags.enableTelemetryForNodeJs', + 'true', + ); + } + }); // Simulate 3 connections to same host cache.getOrCreateContext(host); @@ -301,9 +339,16 @@ describe('FeatureFlagCache', () => { const host1 = 'host1.databricks.com'; const host2 = 'host2.databricks.com'; - const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlag'); - fetchStub.withArgs(host1).resolves(true); - fetchStub.withArgs(host2).resolves(false); + const fetchStub = sinon.stub(cache as any, 'fetchFeatureFlags').callsFake(async (host: string) => { + const ctx = (cache as any).contexts.get(host); + if (ctx) { + // host1 returns true, host2 returns false + ctx.flags.set( + 'databricks.partnerplatform.clientConfigsFeatureFlags.enableTelemetryForNodeJs', + host === host1 ? 'true' : 'false', + ); + } + }); cache.getOrCreateContext(host1); cache.getOrCreateContext(host2); diff --git a/tests/unit/telemetry/MetricsAggregator.test.ts b/tests/unit/telemetry/MetricsAggregator.test.ts index e3d1bb01..de1b44e8 100644 --- a/tests/unit/telemetry/MetricsAggregator.test.ts +++ b/tests/unit/telemetry/MetricsAggregator.test.ts @@ -17,316 +17,883 @@ import { expect } from 'chai'; import sinon from 'sinon'; import MetricsAggregator from '../../../lib/telemetry/MetricsAggregator'; -import DatabricksTelemetryExporter from '../../../lib/telemetry/DatabricksTelemetryExporter'; -import { TelemetryEvent, TelemetryEventType } from '../../../lib/telemetry/types'; -import ClientContextStub from '../.stubs/ClientContextStub'; -import { LogLevel } from '../../../lib/contracts/IDBSQLLogger'; - -function makeExporterStub(): sinon.SinonStubbedInstance { - return sinon.createStubInstance(DatabricksTelemetryExporter); -} - -function connectionEvent(overrides: Partial = {}): TelemetryEvent { - return { - eventType: TelemetryEventType.CONNECTION_OPEN, - timestamp: Date.now(), - sessionId: 'session-1', - workspaceId: 'workspace-1', - driverConfig: {} as any, - ...overrides, - }; -} - -function statementEvent(type: TelemetryEventType, overrides: Partial = {}): TelemetryEvent { - return { - eventType: type, - timestamp: Date.now(), - sessionId: 'session-1', - statementId: 'stmt-1', - ...overrides, - }; -} +import { TelemetryEvent, TelemetryEventType, DEFAULT_TELEMETRY_CONFIG } from '../../../lib/telemetry/types'; +import IClientContext from '../../../lib/contracts/IClientContext'; +import IDBSQLLogger, { LogLevel } from '../../../lib/contracts/IDBSQLLogger'; +import TelemetryExporterStub from '../.stubs/TelemetryExporterStub'; describe('MetricsAggregator', () => { + let context: IClientContext; + let logger: IDBSQLLogger; + let exporter: TelemetryExporterStub; + let aggregator: MetricsAggregator; let clock: sinon.SinonFakeTimers; beforeEach(() => { clock = sinon.useFakeTimers(); + + logger = { + log: sinon.stub(), + }; + + exporter = new TelemetryExporterStub(); + + context = { + getLogger: () => logger, + getConfig: () => ({ + telemetryBatchSize: 10, + telemetryFlushIntervalMs: 5000, + directResultsDefaultMaxRows: 10000, + fetchChunkDefaultMaxRows: 100000, + socketTimeout: 900000, + retryMaxAttempts: 30, + retriesTimeout: 900000, + retryDelayMin: 1000, + retryDelayMax: 30000, + useCloudFetch: true, + cloudFetchConcurrentDownloads: 10, + cloudFetchSpeedThresholdMBps: 0, + useLZ4Compression: true, + }), + } as any; + + aggregator = new MetricsAggregator(context, exporter as any); }); afterEach(() => { + if (aggregator) { + aggregator.close(); + } clock.restore(); + sinon.restore(); + }); + + describe('constructor', () => { + it('should create instance with default config values', () => { + const defaultContext = { + getLogger: () => logger, + getConfig: () => ({ + directResultsDefaultMaxRows: 10000, + fetchChunkDefaultMaxRows: 100000, + socketTimeout: 900000, + retryMaxAttempts: 30, + retriesTimeout: 900000, + retryDelayMin: 1000, + retryDelayMax: 30000, + useCloudFetch: true, + cloudFetchConcurrentDownloads: 10, + cloudFetchSpeedThresholdMBps: 0, + useLZ4Compression: true, + }), + } as any; + + const defaultAggregator = new MetricsAggregator(defaultContext, exporter as any); + expect(defaultAggregator).to.be.instanceOf(MetricsAggregator); + defaultAggregator.close(); + }); + + it('should use batch size from config', () => { + const customContext = { + getLogger: () => logger, + getConfig: () => ({ + telemetryBatchSize: 5, + telemetryFlushIntervalMs: 5000, + directResultsDefaultMaxRows: 10000, + fetchChunkDefaultMaxRows: 100000, + socketTimeout: 900000, + retryMaxAttempts: 30, + retriesTimeout: 900000, + retryDelayMin: 1000, + retryDelayMax: 30000, + useCloudFetch: true, + cloudFetchConcurrentDownloads: 10, + cloudFetchSpeedThresholdMBps: 0, + useLZ4Compression: true, + }), + } as any; + + const customAggregator = new MetricsAggregator(customContext, exporter as any); + + // Process 4 connection events (below batch size of 5) + for (let i = 0; i < 4; i++) { + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: `session-${i}`, + workspaceId: 'workspace-1', + }; + customAggregator.processEvent(event); + } + + // Should not flush yet (batch size is 5) + expect(exporter.exportCount).to.equal(0); + + // Process 5th event + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-5', + workspaceId: 'workspace-1', + }; + customAggregator.processEvent(event); + + // Should flush now (batch size reached) + expect(exporter.exportCount).to.equal(1); + customAggregator.close(); + }); }); - describe('processEvent() - connection events', () => { - it('should emit connection events immediately without waiting for completeStatement', () => { - const context = new ClientContextStub(); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + describe('processEvent - connection events', () => { + it('should emit connection events immediately', () => { + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-456', + driverConfig: { + driverVersion: '1.0.0', + driverName: 'databricks-sql-nodejs', + nodeVersion: process.version, + platform: process.platform, + osVersion: 'test-os', + osArch: 'x64', + runtimeVendor: 'Node.js Foundation', + localeName: 'en_US', + charSetEncoding: 'UTF-8', + processName: 'node', + authType: 'pat', + cloudFetchEnabled: true, + lz4Enabled: true, + arrowEnabled: false, + directResultsEnabled: true, + socketTimeout: 900000, + retryMaxAttempts: 30, + cloudFetchConcurrentDownloads: 10, + }, + }; + + aggregator.processEvent(event); - aggregator.processEvent(connectionEvent()); + // Should not flush yet (batch size is 10) + expect(exporter.exportCount).to.equal(0); - // flush happens when batchSize reached or timer fires — force it + // Complete to trigger flush aggregator.flush(); - expect(exporter.export.calledOnce).to.be.true; - const metrics = exporter.export.firstCall.args[0]; + expect(exporter.exportCount).to.equal(1); + const metrics = exporter.getAllExportedMetrics(); + expect(metrics).to.have.lengthOf(1); expect(metrics[0].metricType).to.equal('connection'); + expect(metrics[0].sessionId).to.equal('session-123'); + expect(metrics[0].workspaceId).to.equal('workspace-456'); + expect(metrics[0].driverConfig).to.deep.equal(event.driverConfig); + }); + + it('should handle multiple connection events', () => { + const event1: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-1', + workspaceId: 'workspace-1', + }; + + const event2: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-2', + workspaceId: 'workspace-2', + }; + + aggregator.processEvent(event1); + aggregator.processEvent(event2); + aggregator.flush(); + + const metrics = exporter.getAllExportedMetrics(); + expect(metrics).to.have.lengthOf(2); + expect(metrics[0].sessionId).to.equal('session-1'); + expect(metrics[1].sessionId).to.equal('session-2'); }); }); - describe('processEvent() - statement events', () => { - it('should buffer statement events until completeStatement is called', () => { - const context = new ClientContextStub(); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + describe('processEvent - statement events', () => { + it('should aggregate statement events by statement_id', () => { + const startEvent: TelemetryEvent = { + eventType: TelemetryEventType.STATEMENT_START, + timestamp: 1000, + statementId: 'stmt-123', + sessionId: 'session-123', + operationType: 'SELECT', + }; + + const completeEvent: TelemetryEvent = { + eventType: TelemetryEventType.STATEMENT_COMPLETE, + timestamp: 2500, + statementId: 'stmt-123', + sessionId: 'session-123', + latencyMs: 1500, + resultFormat: 'cloudfetch', + chunkCount: 5, + bytesDownloaded: 1024000, + pollCount: 3, + }; + + aggregator.processEvent(startEvent); + aggregator.processEvent(completeEvent); + + // Should not flush until completeStatement() called + expect(exporter.exportCount).to.equal(0); - aggregator.processEvent(statementEvent(TelemetryEventType.STATEMENT_START)); - aggregator.processEvent(statementEvent(TelemetryEventType.STATEMENT_COMPLETE, { latencyMs: 100 })); + aggregator.completeStatement('stmt-123'); - // Not yet flushed - expect(exporter.export.called).to.be.false; + // Should not flush yet (batch size is 10) + expect(exporter.exportCount).to.equal(0); - aggregator.completeStatement('stmt-1'); aggregator.flush(); - expect(exporter.export.calledOnce).to.be.true; - const metrics = exporter.export.firstCall.args[0]; + expect(exporter.exportCount).to.equal(1); + const metrics = exporter.getAllExportedMetrics(); + expect(metrics).to.have.lengthOf(1); expect(metrics[0].metricType).to.equal('statement'); - expect(metrics[0].statementId).to.equal('stmt-1'); + expect(metrics[0].statementId).to.equal('stmt-123'); + expect(metrics[0].sessionId).to.equal('session-123'); + expect(metrics[0].latencyMs).to.equal(1500); + expect(metrics[0].resultFormat).to.equal('cloudfetch'); + expect(metrics[0].chunkCount).to.equal(5); + expect(metrics[0].bytesDownloaded).to.equal(1024000); + expect(metrics[0].pollCount).to.equal(3); }); - it('should track chunk events and accumulate totals', () => { - const context = new ClientContextStub(); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + it('should buffer statement events until complete', () => { + const startEvent: TelemetryEvent = { + eventType: TelemetryEventType.STATEMENT_START, + timestamp: Date.now(), + statementId: 'stmt-123', + sessionId: 'session-123', + operationType: 'INSERT', + }; + + aggregator.processEvent(startEvent); + aggregator.flush(); - aggregator.processEvent(statementEvent(TelemetryEventType.STATEMENT_START)); - aggregator.processEvent(statementEvent(TelemetryEventType.CLOUDFETCH_CHUNK, { bytes: 100, compressed: true })); - aggregator.processEvent(statementEvent(TelemetryEventType.CLOUDFETCH_CHUNK, { bytes: 200, compressed: true })); + // Should not export statement until complete + expect(exporter.getAllExportedMetrics()).to.have.lengthOf(0); - aggregator.completeStatement('stmt-1'); + // Complete statement + aggregator.completeStatement('stmt-123'); aggregator.flush(); - const metrics = exporter.export.firstCall.args[0]; - const stmtMetric = metrics[0]; - expect(stmtMetric.chunkCount).to.equal(2); - expect(stmtMetric.bytesDownloaded).to.equal(300); + // Should export now + const metrics = exporter.getAllExportedMetrics(); + expect(metrics).to.have.lengthOf(1); + expect(metrics[0].metricType).to.equal('statement'); }); - it('should do nothing for completeStatement with unknown statementId', () => { - const context = new ClientContextStub(); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + it('should include both session_id and statement_id in metrics', () => { + const event: TelemetryEvent = { + eventType: TelemetryEventType.STATEMENT_START, + timestamp: Date.now(), + statementId: 'stmt-789', + sessionId: 'session-456', + }; + + aggregator.processEvent(event); + aggregator.completeStatement('stmt-789'); + aggregator.flush(); - expect(() => aggregator.completeStatement('unknown-stmt')).to.not.throw(); + const metrics = exporter.getAllExportedMetrics(); + expect(metrics[0].sessionId).to.equal('session-456'); + expect(metrics[0].statementId).to.equal('stmt-789'); }); }); - describe('processEvent() - error events', () => { - it('should flush immediately for terminal errors', () => { - const context = new ClientContextStub(); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + describe('processEvent - cloudfetch events', () => { + it('should aggregate cloudfetch chunk events', () => { + const startEvent: TelemetryEvent = { + eventType: TelemetryEventType.STATEMENT_START, + timestamp: Date.now(), + statementId: 'stmt-123', + sessionId: 'session-123', + }; - const errEvent: TelemetryEvent = { + const chunk1: TelemetryEvent = { + eventType: TelemetryEventType.CLOUDFETCH_CHUNK, + timestamp: Date.now(), + statementId: 'stmt-123', + chunkIndex: 0, + bytes: 100000, + compressed: true, + }; + + const chunk2: TelemetryEvent = { + eventType: TelemetryEventType.CLOUDFETCH_CHUNK, + timestamp: Date.now(), + statementId: 'stmt-123', + chunkIndex: 1, + bytes: 150000, + compressed: true, + }; + + aggregator.processEvent(startEvent); + aggregator.processEvent(chunk1); + aggregator.processEvent(chunk2); + aggregator.completeStatement('stmt-123'); + aggregator.flush(); + + const metrics = exporter.getAllExportedMetrics(); + expect(metrics).to.have.lengthOf(1); + expect(metrics[0].chunkCount).to.equal(2); + expect(metrics[0].bytesDownloaded).to.equal(250000); + }); + }); + + describe('processEvent - error events', () => { + it('should flush terminal exceptions immediately', () => { + const terminalError: TelemetryEvent = { eventType: TelemetryEventType.ERROR, timestamp: Date.now(), - sessionId: 'session-1', - errorName: 'AuthError', - errorMessage: 'auth failed', + sessionId: 'session-123', + statementId: 'stmt-123', + errorName: 'AuthenticationError', + errorMessage: 'Invalid credentials', isTerminal: true, }; - aggregator.processEvent(errEvent); + aggregator.processEvent(terminalError); - // Terminal error should trigger an immediate flush - expect(exporter.export.called).to.be.true; - const metrics = exporter.export.firstCall.args[0]; + // Should flush immediately for terminal errors + expect(exporter.exportCount).to.equal(1); + const metrics = exporter.getAllExportedMetrics(); + expect(metrics).to.have.lengthOf(1); expect(metrics[0].metricType).to.equal('error'); + expect(metrics[0].errorName).to.equal('AuthenticationError'); }); - it('should buffer retryable errors until statement completes', () => { - const context = new ClientContextStub(); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + it('should buffer retryable exceptions until statement complete', () => { + const startEvent: TelemetryEvent = { + eventType: TelemetryEventType.STATEMENT_START, + timestamp: Date.now(), + statementId: 'stmt-123', + sessionId: 'session-123', + }; - const retryableErr: TelemetryEvent = { + const retryableError: TelemetryEvent = { eventType: TelemetryEventType.ERROR, timestamp: Date.now(), - sessionId: 'session-1', - statementId: 'stmt-1', - errorName: 'NetworkError', - errorMessage: 'timeout', + sessionId: 'session-123', + statementId: 'stmt-123', + errorName: 'TimeoutError', + errorMessage: 'Request timed out', isTerminal: false, }; - aggregator.processEvent(retryableErr); - // Not flushed yet - expect(exporter.export.called).to.be.false; + aggregator.processEvent(startEvent); + aggregator.processEvent(retryableError); + + // Should not flush retryable error yet + expect(exporter.exportCount).to.equal(0); - aggregator.completeStatement('stmt-1'); + aggregator.completeStatement('stmt-123'); aggregator.flush(); - expect(exporter.export.called).to.be.true; + // Should export statement and error now + const metrics = exporter.getAllExportedMetrics(); + expect(metrics).to.have.lengthOf(2); + expect(metrics[0].metricType).to.equal('statement'); + expect(metrics[1].metricType).to.equal('error'); + expect(metrics[1].errorName).to.equal('TimeoutError'); + }); + + it('should flush terminal error for statement and complete it', () => { + const startEvent: TelemetryEvent = { + eventType: TelemetryEventType.STATEMENT_START, + timestamp: Date.now(), + statementId: 'stmt-123', + sessionId: 'session-123', + }; + + const terminalError: TelemetryEvent = { + eventType: TelemetryEventType.ERROR, + timestamp: Date.now(), + sessionId: 'session-123', + statementId: 'stmt-123', + errorName: 'AuthenticationError', + errorMessage: 'Invalid credentials', + isTerminal: true, + }; + + aggregator.processEvent(startEvent); + aggregator.processEvent(terminalError); + + // Should flush immediately for terminal error + expect(exporter.exportCount).to.equal(1); + const metrics = exporter.getAllExportedMetrics(); + expect(metrics).to.have.lengthOf(2); + expect(metrics[0].metricType).to.equal('statement'); + expect(metrics[1].metricType).to.equal('error'); }); }); - describe('flush() - batch size trigger', () => { - it('should flush when batchSize is reached', () => { - const context = new ClientContextStub({ telemetryBatchSize: 3 } as any); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + describe('batch size flushing', () => { + it('should flush when batch size reached', () => { + // Process 10 connection events (batch size is 10) + for (let i = 0; i < 10; i++) { + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: `session-${i}`, + workspaceId: 'workspace-1', + }; + aggregator.processEvent(event); + } + + // Should flush automatically + expect(exporter.exportCount).to.equal(1); + expect(exporter.getAllExportedMetrics()).to.have.lengthOf(10); + }); - // Add 2 connection events — no flush yet - aggregator.processEvent(connectionEvent({ sessionId: 's1' })); - aggregator.processEvent(connectionEvent({ sessionId: 's2' })); - expect(exporter.export.called).to.be.false; + it('should not flush before batch size reached', () => { + // Process 9 connection events (below batch size of 10) + for (let i = 0; i < 9; i++) { + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: `session-${i}`, + workspaceId: 'workspace-1', + }; + aggregator.processEvent(event); + } - // 3rd event reaches batchSize - aggregator.processEvent(connectionEvent({ sessionId: 's3' })); - expect(exporter.export.calledOnce).to.be.true; + // Should not flush yet + expect(exporter.exportCount).to.equal(0); }); }); - describe('flush() - periodic timer', () => { - it('should flush periodically based on flushIntervalMs', () => { - const context = new ClientContextStub({ telemetryFlushIntervalMs: 5000 } as any); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + describe('periodic timer flushing', () => { + it('should flush on periodic timer', () => { + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; + + aggregator.processEvent(event); - aggregator.processEvent(connectionEvent()); - expect(exporter.export.called).to.be.false; + // Should not flush immediately + expect(exporter.exportCount).to.equal(0); + // Advance timer by flush interval (5000ms) clock.tick(5000); - expect(exporter.export.calledOnce).to.be.true; + // Should flush now + expect(exporter.exportCount).to.equal(1); + expect(exporter.getAllExportedMetrics()).to.have.lengthOf(1); }); - it('should not flush if there are no pending metrics', () => { - const context = new ClientContextStub({ telemetryFlushIntervalMs: 5000 } as any); - const exporter = makeExporterStub(); - new MetricsAggregator(context, exporter as any); + it('should flush multiple times on timer', () => { + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; + aggregator.processEvent(event); clock.tick(5000); + expect(exporter.exportCount).to.equal(1); - expect(exporter.export.called).to.be.false; + aggregator.processEvent(event); + clock.tick(5000); + expect(exporter.exportCount).to.equal(2); }); }); - describe('maxPendingMetrics bound', () => { - it('should drop oldest metrics when buffer exceeds maxPendingMetrics', () => { - const context = new ClientContextStub({ telemetryMaxPendingMetrics: 3, telemetryBatchSize: 1000 } as any); - const logSpy = sinon.spy((context as any).logger, 'log'); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + describe('completeStatement', () => { + it('should complete statement and prepare for flushing', () => { + const event: TelemetryEvent = { + eventType: TelemetryEventType.STATEMENT_START, + timestamp: Date.now(), + statementId: 'stmt-123', + sessionId: 'session-123', + }; - // Add 5 events — should be capped at 3 - for (let i = 0; i < 5; i++) { - aggregator.processEvent(connectionEvent({ sessionId: `s${i}` })); - } + aggregator.processEvent(event); + aggregator.completeStatement('stmt-123'); + aggregator.flush(); + + const metrics = exporter.getAllExportedMetrics(); + expect(metrics).to.have.lengthOf(1); + expect(metrics[0].statementId).to.equal('stmt-123'); + }); + it('should do nothing for unknown statement_id', () => { + aggregator.completeStatement('unknown-stmt'); aggregator.flush(); - const metrics = exporter.export.firstCall.args[0]; - expect(metrics.length).to.equal(3); - expect(logSpy.calledWith(LogLevel.debug, sinon.match(/Dropped/))).to.be.true; + expect(exporter.getAllExportedMetrics()).to.have.lengthOf(0); + }); + + it('should include buffered errors when completing statement', () => { + const startEvent: TelemetryEvent = { + eventType: TelemetryEventType.STATEMENT_START, + timestamp: Date.now(), + statementId: 'stmt-123', + sessionId: 'session-123', + }; + + const error1: TelemetryEvent = { + eventType: TelemetryEventType.ERROR, + timestamp: Date.now(), + sessionId: 'session-123', + statementId: 'stmt-123', + errorName: 'Error1', + errorMessage: 'First error', + isTerminal: false, + }; + + const error2: TelemetryEvent = { + eventType: TelemetryEventType.ERROR, + timestamp: Date.now(), + sessionId: 'session-123', + statementId: 'stmt-123', + errorName: 'Error2', + errorMessage: 'Second error', + isTerminal: false, + }; - logSpy.restore(); + aggregator.processEvent(startEvent); + aggregator.processEvent(error1); + aggregator.processEvent(error2); + aggregator.completeStatement('stmt-123'); + aggregator.flush(); + + const metrics = exporter.getAllExportedMetrics(); + expect(metrics).to.have.lengthOf(3); + expect(metrics[0].metricType).to.equal('statement'); + expect(metrics[1].metricType).to.equal('error'); + expect(metrics[2].metricType).to.equal('error'); }); }); - describe('close()', () => { + describe('close', () => { it('should flush remaining metrics on close', () => { - const context = new ClientContextStub(); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; - aggregator.processEvent(connectionEvent()); + aggregator.processEvent(event); aggregator.close(); - expect(exporter.export.called).to.be.true; + expect(exporter.exportCount).to.equal(1); + expect(exporter.getAllExportedMetrics()).to.have.lengthOf(1); }); - it('should stop the flush timer on close', () => { - const context = new ClientContextStub({ telemetryFlushIntervalMs: 5000 } as any); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + it('should complete pending statements on close', () => { + const event: TelemetryEvent = { + eventType: TelemetryEventType.STATEMENT_START, + timestamp: Date.now(), + statementId: 'stmt-123', + sessionId: 'session-123', + }; + aggregator.processEvent(event); aggregator.close(); - exporter.export.reset(); - // Advance time — timer should no longer fire - clock.tick(10000); - aggregator.processEvent(connectionEvent()); - // Timer stopped, so no auto-flush - expect(exporter.export.called).to.be.false; + const metrics = exporter.getAllExportedMetrics(); + expect(metrics).to.have.lengthOf(1); + expect(metrics[0].statementId).to.equal('stmt-123'); }); - it('should complete any in-progress statements on close', () => { - const context = new ClientContextStub(); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); - - aggregator.processEvent(statementEvent(TelemetryEventType.STATEMENT_START)); + it('should stop flush timer on close', () => { aggregator.close(); - expect(exporter.export.called).to.be.true; - const metrics = exporter.export.firstCall.args[0]; - expect(metrics[0].metricType).to.equal('statement'); + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; + + exporter.reset(); + aggregator.processEvent(event); + + // Advance timer - should not flush after close + clock.tick(5000); + expect(exporter.exportCount).to.equal(0); }); + }); - it('awaits in-flight export before resolving — prevents process.exit truncation', async () => { - clock.restore(); - const context = new ClientContextStub(); - let resolveExport!: () => void; - const pendingExport = new Promise((r) => { - resolveExport = r; - }); - const exporter: any = { export: sinon.stub().returns(pendingExport) }; - const aggregator = new MetricsAggregator(context, exporter); + describe('exception swallowing', () => { + it('should swallow exception in processEvent and log at debug level', () => { + // Create a context that throws in getConfig + const throwingContext = { + getLogger: () => logger, + getConfig: () => { + throw new Error('Config error'); + }, + } as any; + + const throwingAggregator = new MetricsAggregator(throwingContext, exporter as any); + + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; + + // Should not throw + expect(() => throwingAggregator.processEvent(event)).to.not.throw(); + + throwingAggregator.close(); + }); - aggregator.processEvent(connectionEvent()); + it('should swallow exception in flush and log at debug level', () => { + // Make exporter throw + exporter.throwOnExport(new Error('Export failed')); - const done = aggregator.close(); - expect(done).to.be.an.instanceof(Promise); + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; - let resolved = false; - done.then(() => { - resolved = true; - }); - await Promise.resolve(); - await Promise.resolve(); - expect(resolved, 'close() should wait for exporter promise before resolving').to.be.false; + aggregator.processEvent(event); - resolveExport(); - await done; - expect(resolved).to.be.true; + // Should not throw + expect(() => aggregator.flush()).to.not.throw(); }); - it('does not resurrect the flush timer after close', async () => { - clock.restore(); - const context = new ClientContextStub({ telemetryBatchSize: 1 } as any); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); + it('should swallow exception in completeStatement and log at debug level', () => { + // Process invalid event to create bad state + const event: TelemetryEvent = { + eventType: TelemetryEventType.STATEMENT_START, + timestamp: Date.now(), + statementId: 'stmt-123', + sessionId: 'session-123', + }; - aggregator.processEvent(statementEvent(TelemetryEventType.STATEMENT_START)); - await aggregator.close(); + aggregator.processEvent(event); - expect((aggregator as any).flushTimer, 'flushTimer should be null after close').to.equal(null); - expect((aggregator as any).closed).to.be.true; + // Create a scenario that might cause an exception + // Even if internals throw, should not propagate + expect(() => aggregator.completeStatement('stmt-123')).to.not.throw(); + }); + + it('should swallow exception in close and log at debug level', () => { + // Make exporter throw + exporter.throwOnExport(new Error('Export failed')); + + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; + + aggregator.processEvent(event); + + // Should not throw + expect(() => aggregator.close()).to.not.throw(); + }); + + it('should log all errors at debug level only', () => { + exporter.throwOnExport(new Error('Export failed')); + + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; + + aggregator.processEvent(event); + aggregator.flush(); + + const logStub = logger.log as sinon.SinonStub; + for (let i = 0; i < logStub.callCount; i++) { + const level = logStub.args[i][0]; + expect(level).to.equal(LogLevel.debug); + } }); }); - describe('exception swallowing', () => { - it('should never throw from processEvent', () => { - const context = new ClientContextStub(); - const exporter = makeExporterStub(); - const aggregator = new MetricsAggregator(context, exporter as any); - - expect(() => - aggregator.processEvent({ eventType: 'unknown.event' as any, timestamp: Date.now() }), - ).to.not.throw(); + describe('no console logging', () => { + it('should not use console.log', () => { + const consoleSpy = sinon.spy(console, 'log'); + + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; + + aggregator.processEvent(event); + aggregator.flush(); + aggregator.close(); + + expect(consoleSpy.called).to.be.false; + consoleSpy.restore(); + }); + + it('should not use console.debug', () => { + const consoleSpy = sinon.spy(console, 'debug'); + + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; + + aggregator.processEvent(event); + aggregator.flush(); + aggregator.close(); + + expect(consoleSpy.called).to.be.false; + consoleSpy.restore(); + }); + + it('should not use console.error', () => { + const consoleSpy = sinon.spy(console, 'error'); + + exporter.throwOnExport(new Error('Export failed')); + + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; + + aggregator.processEvent(event); + aggregator.flush(); + + expect(consoleSpy.called).to.be.false; + consoleSpy.restore(); + }); + }); + + describe('config reading', () => { + it('should read batch size from context config', () => { + const customContext = { + getLogger: () => logger, + getConfig: () => ({ + telemetryBatchSize: 3, + telemetryFlushIntervalMs: 5000, + directResultsDefaultMaxRows: 10000, + fetchChunkDefaultMaxRows: 100000, + socketTimeout: 900000, + retryMaxAttempts: 30, + retriesTimeout: 900000, + retryDelayMin: 1000, + retryDelayMax: 30000, + useCloudFetch: true, + cloudFetchConcurrentDownloads: 10, + cloudFetchSpeedThresholdMBps: 0, + useLZ4Compression: true, + }), + } as any; + + const customAggregator = new MetricsAggregator(customContext, exporter as any); + + // Process 3 events (custom batch size) + for (let i = 0; i < 3; i++) { + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: `session-${i}`, + workspaceId: 'workspace-1', + }; + customAggregator.processEvent(event); + } + + // Should flush at batch size 3 + expect(exporter.exportCount).to.equal(1); + customAggregator.close(); + }); + + it('should read flush interval from context config', () => { + const customContext = { + getLogger: () => logger, + getConfig: () => ({ + telemetryBatchSize: 10, + telemetryFlushIntervalMs: 3000, + directResultsDefaultMaxRows: 10000, + fetchChunkDefaultMaxRows: 100000, + socketTimeout: 900000, + retryMaxAttempts: 30, + retriesTimeout: 900000, + retryDelayMin: 1000, + retryDelayMax: 30000, + useCloudFetch: true, + cloudFetchConcurrentDownloads: 10, + cloudFetchSpeedThresholdMBps: 0, + useLZ4Compression: true, + }), + } as any; + + const customAggregator = new MetricsAggregator(customContext, exporter as any); + + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: 'session-123', + workspaceId: 'workspace-1', + }; + + customAggregator.processEvent(event); + + // Should not flush yet + expect(exporter.exportCount).to.equal(0); + + // Advance timer by custom flush interval (3000ms) + clock.tick(3000); + + // Should flush now + expect(exporter.exportCount).to.equal(1); + customAggregator.close(); + }); + + it('should use default values when config values are undefined', () => { + const defaultContext = { + getLogger: () => logger, + getConfig: () => ({ + directResultsDefaultMaxRows: 10000, + fetchChunkDefaultMaxRows: 100000, + socketTimeout: 900000, + retryMaxAttempts: 30, + retriesTimeout: 900000, + retryDelayMin: 1000, + retryDelayMax: 30000, + useCloudFetch: true, + cloudFetchConcurrentDownloads: 10, + cloudFetchSpeedThresholdMBps: 0, + useLZ4Compression: true, + }), + } as any; + + const defaultAggregator = new MetricsAggregator(defaultContext, exporter as any); + + // Process events up to default batch size (100) + for (let i = 0; i < DEFAULT_TELEMETRY_CONFIG.batchSize; i++) { + const event: TelemetryEvent = { + eventType: TelemetryEventType.CONNECTION_OPEN, + timestamp: Date.now(), + sessionId: `session-${i}`, + workspaceId: 'workspace-1', + }; + defaultAggregator.processEvent(event); + } + + // Should flush at default batch size + expect(exporter.exportCount).to.equal(1); + defaultAggregator.close(); }); }); }); diff --git a/tests/unit/telemetry/TelemetryEventEmitter.test.ts b/tests/unit/telemetry/TelemetryEventEmitter.test.ts index 6fc29107..c1f86802 100644 --- a/tests/unit/telemetry/TelemetryEventEmitter.test.ts +++ b/tests/unit/telemetry/TelemetryEventEmitter.test.ts @@ -17,187 +17,723 @@ import { expect } from 'chai'; import sinon from 'sinon'; import TelemetryEventEmitter from '../../../lib/telemetry/TelemetryEventEmitter'; -import { TelemetryEventType } from '../../../lib/telemetry/types'; -import ClientContextStub from '../.stubs/ClientContextStub'; -import { LogLevel } from '../../../lib/contracts/IDBSQLLogger'; - -function makeEmitter(enabled: boolean): TelemetryEventEmitter { - const context = new ClientContextStub({ telemetryEnabled: enabled } as any); - return new TelemetryEventEmitter(context); -} +import { TelemetryEventType, TelemetryEvent, DriverConfiguration } from '../../../lib/telemetry/types'; +import IClientContext from '../../../lib/contracts/IClientContext'; +import IDBSQLLogger, { LogLevel } from '../../../lib/contracts/IDBSQLLogger'; describe('TelemetryEventEmitter', () => { - describe('when telemetry is disabled', () => { - it('should not emit any events', () => { - const emitter = makeEmitter(false); - const listener = sinon.stub(); - emitter.on(TelemetryEventType.CONNECTION_OPEN, listener); + let context: IClientContext; + let logger: IDBSQLLogger; + let emitter: TelemetryEventEmitter; + + beforeEach(() => { + logger = { + log: sinon.stub(), + }; + + context = { + getLogger: () => logger, + getConfig: () => ({ + telemetryEnabled: true, + directResultsDefaultMaxRows: 10000, + fetchChunkDefaultMaxRows: 100000, + socketTimeout: 900000, + retryMaxAttempts: 30, + retriesTimeout: 900000, + retryDelayMin: 1000, + retryDelayMax: 30000, + useCloudFetch: true, + cloudFetchConcurrentDownloads: 10, + cloudFetchSpeedThresholdMBps: 0, + useLZ4Compression: true, + }), + } as any; + + emitter = new TelemetryEventEmitter(context); + }); - emitter.emitConnectionOpen({ sessionId: 's1', workspaceId: 'w1', driverConfig: {} as any }); + afterEach(() => { + sinon.restore(); + }); + + describe('constructor', () => { + it('should create instance with telemetry enabled', () => { + expect(emitter).to.be.instanceOf(TelemetryEventEmitter); + }); - expect(listener.called).to.be.false; + it('should create instance with telemetry disabled', () => { + const disabledContext = { + getLogger: () => logger, + getConfig: () => ({ + telemetryEnabled: false, + directResultsDefaultMaxRows: 10000, + fetchChunkDefaultMaxRows: 100000, + socketTimeout: 900000, + retryMaxAttempts: 30, + retriesTimeout: 900000, + retryDelayMin: 1000, + retryDelayMax: 30000, + useCloudFetch: true, + cloudFetchConcurrentDownloads: 10, + cloudFetchSpeedThresholdMBps: 0, + useLZ4Compression: true, + }), + } as any; + + const disabledEmitter = new TelemetryEventEmitter(disabledContext); + expect(disabledEmitter).to.be.instanceOf(TelemetryEventEmitter); }); - it('should not emit statement start', () => { - const emitter = makeEmitter(false); - const listener = sinon.stub(); - emitter.on(TelemetryEventType.STATEMENT_START, listener); + it('should default to disabled when telemetryEnabled is undefined', () => { + const defaultContext = { + getLogger: () => logger, + getConfig: () => ({ + directResultsDefaultMaxRows: 10000, + fetchChunkDefaultMaxRows: 100000, + socketTimeout: 900000, + retryMaxAttempts: 30, + retriesTimeout: 900000, + retryDelayMin: 1000, + retryDelayMax: 30000, + useCloudFetch: true, + cloudFetchConcurrentDownloads: 10, + cloudFetchSpeedThresholdMBps: 0, + useLZ4Compression: true, + }), + } as any; + + const defaultEmitter = new TelemetryEventEmitter(defaultContext); + expect(defaultEmitter).to.be.instanceOf(TelemetryEventEmitter); + }); + }); - emitter.emitStatementStart({ statementId: 'st1', sessionId: 's1' }); + describe('emitConnectionOpen', () => { + it('should emit connection.open event with correct data', (done) => { + const driverConfig: DriverConfiguration = { + driverVersion: '1.0.0', + driverName: 'databricks-sql-nodejs', + nodeVersion: process.version, + platform: process.platform, + osVersion: 'test-os', + osArch: 'x64', + runtimeVendor: 'Node.js Foundation', + localeName: 'en_US', + charSetEncoding: 'UTF-8', + processName: 'node', + authType: 'pat', + cloudFetchEnabled: true, + lz4Enabled: true, + arrowEnabled: false, + directResultsEnabled: true, + socketTimeout: 900000, + retryMaxAttempts: 30, + cloudFetchConcurrentDownloads: 10, + }; + + emitter.on(TelemetryEventType.CONNECTION_OPEN, (event: TelemetryEvent) => { + expect(event.eventType).to.equal(TelemetryEventType.CONNECTION_OPEN); + expect(event.sessionId).to.equal('session-123'); + expect(event.workspaceId).to.equal('workspace-456'); + expect(event.driverConfig).to.deep.equal(driverConfig); + expect(event.timestamp).to.be.a('number'); + done(); + }); - expect(listener.called).to.be.false; + emitter.emitConnectionOpen({ + sessionId: 'session-123', + workspaceId: 'workspace-456', + driverConfig, + latencyMs: 100, + }); }); - it('should not emit error events', () => { - const emitter = makeEmitter(false); - const listener = sinon.stub(); - emitter.on(TelemetryEventType.ERROR, listener); + it('should not emit when telemetry is disabled', () => { + const disabledContext = { + getLogger: () => logger, + getConfig: () => ({ + telemetryEnabled: false, + directResultsDefaultMaxRows: 10000, + fetchChunkDefaultMaxRows: 100000, + socketTimeout: 900000, + retryMaxAttempts: 30, + retriesTimeout: 900000, + retryDelayMin: 1000, + retryDelayMax: 30000, + useCloudFetch: true, + cloudFetchConcurrentDownloads: 10, + cloudFetchSpeedThresholdMBps: 0, + useLZ4Compression: true, + }), + } as any; + + const disabledEmitter = new TelemetryEventEmitter(disabledContext); + let eventEmitted = false; + + disabledEmitter.on(TelemetryEventType.CONNECTION_OPEN, () => { + eventEmitted = true; + }); - emitter.emitError({ errorName: 'SomeError', errorMessage: 'msg', isTerminal: false }); + disabledEmitter.emitConnectionOpen({ + sessionId: 'session-123', + workspaceId: 'workspace-456', + driverConfig: {} as DriverConfiguration, + latencyMs: 100, + }); - expect(listener.called).to.be.false; + expect(eventEmitted).to.be.false; }); - }); - describe('emitConnectionOpen()', () => { - it('should emit a CONNECTION_OPEN event with correct fields', () => { - const emitter = makeEmitter(true); - const listener = sinon.stub(); - emitter.on(TelemetryEventType.CONNECTION_OPEN, listener); + it('should swallow exceptions and log at debug level', () => { + // Force an exception by emitting before adding any listeners + // Then make emit throw by adding a throwing listener + emitter.on(TelemetryEventType.CONNECTION_OPEN, () => { + throw new Error('Test error'); + }); - emitter.emitConnectionOpen({ sessionId: 's1', workspaceId: 'w1', driverConfig: {} as any }); + emitter.emitConnectionOpen({ + sessionId: 'session-123', + workspaceId: 'workspace-456', + driverConfig: {} as DriverConfiguration, + latencyMs: 100, + }); - expect(listener.calledOnce).to.be.true; - const event = listener.firstCall.args[0]; - expect(event.eventType).to.equal(TelemetryEventType.CONNECTION_OPEN); - expect(event.sessionId).to.equal('s1'); - expect(event.workspaceId).to.equal('w1'); - expect(event.timestamp).to.be.a('number'); + expect((logger.log as sinon.SinonStub).calledWith(LogLevel.debug)).to.be.true; + expect((logger.log as sinon.SinonStub).args[0][1]).to.include('Error emitting connection event'); }); - it('should swallow and log exceptions from listeners', () => { - const context = new ClientContextStub({ telemetryEnabled: true } as any); - const logSpy = sinon.spy(context.logger, 'log'); - const emitter = new TelemetryEventEmitter(context); - + it('should not log at warn or error level', () => { emitter.on(TelemetryEventType.CONNECTION_OPEN, () => { - throw new Error('listener boom'); + throw new Error('Test error'); }); - expect(() => - emitter.emitConnectionOpen({ sessionId: 's1', workspaceId: 'w1', driverConfig: {} as any }), - ).to.not.throw(); - expect(logSpy.calledWith(LogLevel.debug, sinon.match(/listener boom/))).to.be.true; + emitter.emitConnectionOpen({ + sessionId: 'session-123', + workspaceId: 'workspace-456', + driverConfig: {} as DriverConfiguration, + latencyMs: 100, + }); - logSpy.restore(); + const logStub = logger.log as sinon.SinonStub; + for (let i = 0; i < logStub.callCount; i++) { + const level = logStub.args[i][0]; + expect(level).to.not.equal(LogLevel.warn); + expect(level).to.not.equal(LogLevel.error); + } }); }); - describe('emitStatementStart()', () => { - it('should emit a STATEMENT_START event with correct fields', () => { - const emitter = makeEmitter(true); - const listener = sinon.stub(); - emitter.on(TelemetryEventType.STATEMENT_START, listener); + describe('emitStatementStart', () => { + it('should emit statement.start event with correct data', (done) => { + emitter.on(TelemetryEventType.STATEMENT_START, (event: TelemetryEvent) => { + expect(event.eventType).to.equal(TelemetryEventType.STATEMENT_START); + expect(event.statementId).to.equal('stmt-789'); + expect(event.sessionId).to.equal('session-123'); + expect(event.operationType).to.equal('SELECT'); + expect(event.timestamp).to.be.a('number'); + done(); + }); + + emitter.emitStatementStart({ + statementId: 'stmt-789', + sessionId: 'session-123', + operationType: 'SELECT', + }); + }); + + it('should emit without operationType', (done) => { + emitter.on(TelemetryEventType.STATEMENT_START, (event: TelemetryEvent) => { + expect(event.eventType).to.equal(TelemetryEventType.STATEMENT_START); + expect(event.statementId).to.equal('stmt-789'); + expect(event.sessionId).to.equal('session-123'); + expect(event.operationType).to.be.undefined; + done(); + }); + + emitter.emitStatementStart({ + statementId: 'stmt-789', + sessionId: 'session-123', + }); + }); + + it('should not emit when telemetry is disabled', () => { + const disabledContext = { + getLogger: () => logger, + getConfig: () => ({ telemetryEnabled: false }), + } as any; + + const disabledEmitter = new TelemetryEventEmitter(disabledContext); + let eventEmitted = false; + + disabledEmitter.on(TelemetryEventType.STATEMENT_START, () => { + eventEmitted = true; + }); + + disabledEmitter.emitStatementStart({ + statementId: 'stmt-789', + sessionId: 'session-123', + }); + + expect(eventEmitted).to.be.false; + }); + + it('should swallow exceptions and log at debug level', () => { + emitter.on(TelemetryEventType.STATEMENT_START, () => { + throw new Error('Test error'); + }); - emitter.emitStatementStart({ statementId: 'st1', sessionId: 's1', operationType: 'SELECT' }); + emitter.emitStatementStart({ + statementId: 'stmt-789', + sessionId: 'session-123', + }); - expect(listener.calledOnce).to.be.true; - const event = listener.firstCall.args[0]; - expect(event.eventType).to.equal(TelemetryEventType.STATEMENT_START); - expect(event.statementId).to.equal('st1'); - expect(event.operationType).to.equal('SELECT'); + expect((logger.log as sinon.SinonStub).calledWith(LogLevel.debug)).to.be.true; + expect((logger.log as sinon.SinonStub).args[0][1]).to.include('Error emitting statement start'); }); }); - describe('emitStatementComplete()', () => { - it('should emit a STATEMENT_COMPLETE event with correct fields', () => { - const emitter = makeEmitter(true); - const listener = sinon.stub(); - emitter.on(TelemetryEventType.STATEMENT_COMPLETE, listener); + describe('emitStatementComplete', () => { + it('should emit statement.complete event with all data fields', (done) => { + emitter.on(TelemetryEventType.STATEMENT_COMPLETE, (event: TelemetryEvent) => { + expect(event.eventType).to.equal(TelemetryEventType.STATEMENT_COMPLETE); + expect(event.statementId).to.equal('stmt-789'); + expect(event.sessionId).to.equal('session-123'); + expect(event.latencyMs).to.equal(1500); + expect(event.resultFormat).to.equal('cloudfetch'); + expect(event.chunkCount).to.equal(5); + expect(event.bytesDownloaded).to.equal(1024000); + expect(event.pollCount).to.equal(3); + expect(event.timestamp).to.be.a('number'); + done(); + }); emitter.emitStatementComplete({ - statementId: 'st1', - sessionId: 's1', - latencyMs: 123, - resultFormat: 'arrow', - chunkCount: 2, - bytesDownloaded: 1024, + statementId: 'stmt-789', + sessionId: 'session-123', + latencyMs: 1500, + resultFormat: 'cloudfetch', + chunkCount: 5, + bytesDownloaded: 1024000, pollCount: 3, }); + }); + + it('should emit with minimal data', (done) => { + emitter.on(TelemetryEventType.STATEMENT_COMPLETE, (event: TelemetryEvent) => { + expect(event.eventType).to.equal(TelemetryEventType.STATEMENT_COMPLETE); + expect(event.statementId).to.equal('stmt-789'); + expect(event.sessionId).to.equal('session-123'); + expect(event.latencyMs).to.be.undefined; + expect(event.resultFormat).to.be.undefined; + done(); + }); + + emitter.emitStatementComplete({ + statementId: 'stmt-789', + sessionId: 'session-123', + }); + }); + + it('should not emit when telemetry is disabled', () => { + const disabledContext = { + getLogger: () => logger, + getConfig: () => ({ telemetryEnabled: false }), + } as any; + + const disabledEmitter = new TelemetryEventEmitter(disabledContext); + let eventEmitted = false; + + disabledEmitter.on(TelemetryEventType.STATEMENT_COMPLETE, () => { + eventEmitted = true; + }); + + disabledEmitter.emitStatementComplete({ + statementId: 'stmt-789', + sessionId: 'session-123', + }); + + expect(eventEmitted).to.be.false; + }); + + it('should swallow exceptions and log at debug level', () => { + emitter.on(TelemetryEventType.STATEMENT_COMPLETE, () => { + throw new Error('Test error'); + }); - expect(listener.calledOnce).to.be.true; - const event = listener.firstCall.args[0]; - expect(event.eventType).to.equal(TelemetryEventType.STATEMENT_COMPLETE); - expect(event.latencyMs).to.equal(123); - expect(event.chunkCount).to.equal(2); + emitter.emitStatementComplete({ + statementId: 'stmt-789', + sessionId: 'session-123', + }); + + expect((logger.log as sinon.SinonStub).calledWith(LogLevel.debug)).to.be.true; + expect((logger.log as sinon.SinonStub).args[0][1]).to.include('Error emitting statement complete'); }); }); - describe('emitCloudFetchChunk()', () => { - it('should emit a CLOUDFETCH_CHUNK event with correct fields', () => { - const emitter = makeEmitter(true); - const listener = sinon.stub(); - emitter.on(TelemetryEventType.CLOUDFETCH_CHUNK, listener); + describe('emitCloudFetchChunk', () => { + it('should emit cloudfetch.chunk event with correct data', (done) => { + emitter.on(TelemetryEventType.CLOUDFETCH_CHUNK, (event: TelemetryEvent) => { + expect(event.eventType).to.equal(TelemetryEventType.CLOUDFETCH_CHUNK); + expect(event.statementId).to.equal('stmt-789'); + expect(event.chunkIndex).to.equal(2); + expect(event.latencyMs).to.equal(250); + expect(event.bytes).to.equal(204800); + expect(event.compressed).to.be.true; + expect(event.timestamp).to.be.a('number'); + done(); + }); + + emitter.emitCloudFetchChunk({ + statementId: 'stmt-789', + chunkIndex: 2, + latencyMs: 250, + bytes: 204800, + compressed: true, + }); + }); + + it('should emit without optional fields', (done) => { + emitter.on(TelemetryEventType.CLOUDFETCH_CHUNK, (event: TelemetryEvent) => { + expect(event.eventType).to.equal(TelemetryEventType.CLOUDFETCH_CHUNK); + expect(event.statementId).to.equal('stmt-789'); + expect(event.chunkIndex).to.equal(0); + expect(event.bytes).to.equal(100000); + expect(event.latencyMs).to.be.undefined; + expect(event.compressed).to.be.undefined; + done(); + }); + + emitter.emitCloudFetchChunk({ + statementId: 'stmt-789', + chunkIndex: 0, + bytes: 100000, + }); + }); + + it('should not emit when telemetry is disabled', () => { + const disabledContext = { + getLogger: () => logger, + getConfig: () => ({ telemetryEnabled: false }), + } as any; + + const disabledEmitter = new TelemetryEventEmitter(disabledContext); + let eventEmitted = false; + + disabledEmitter.on(TelemetryEventType.CLOUDFETCH_CHUNK, () => { + eventEmitted = true; + }); + + disabledEmitter.emitCloudFetchChunk({ + statementId: 'stmt-789', + chunkIndex: 0, + bytes: 100000, + }); + + expect(eventEmitted).to.be.false; + }); - emitter.emitCloudFetchChunk({ statementId: 'st1', chunkIndex: 0, bytes: 512, compressed: true }); + it('should swallow exceptions and log at debug level', () => { + emitter.on(TelemetryEventType.CLOUDFETCH_CHUNK, () => { + throw new Error('Test error'); + }); + + emitter.emitCloudFetchChunk({ + statementId: 'stmt-789', + chunkIndex: 0, + bytes: 100000, + }); - expect(listener.calledOnce).to.be.true; - const event = listener.firstCall.args[0]; - expect(event.eventType).to.equal(TelemetryEventType.CLOUDFETCH_CHUNK); - expect(event.bytes).to.equal(512); - expect(event.compressed).to.be.true; + expect((logger.log as sinon.SinonStub).calledWith(LogLevel.debug)).to.be.true; + expect((logger.log as sinon.SinonStub).args[0][1]).to.include('Error emitting cloudfetch chunk'); }); }); - describe('emitError()', () => { - it('should emit an ERROR event with correct fields', () => { - const emitter = makeEmitter(true); - const listener = sinon.stub(); - emitter.on(TelemetryEventType.ERROR, listener); + describe('emitError', () => { + it('should emit error event with all fields', (done) => { + emitter.on(TelemetryEventType.ERROR, (event: TelemetryEvent) => { + expect(event.eventType).to.equal(TelemetryEventType.ERROR); + expect(event.statementId).to.equal('stmt-789'); + expect(event.sessionId).to.equal('session-123'); + expect(event.errorName).to.equal('AuthenticationError'); + expect(event.errorMessage).to.equal('Invalid credentials'); + expect(event.isTerminal).to.be.true; + expect(event.timestamp).to.be.a('number'); + done(); + }); + + emitter.emitError({ + statementId: 'stmt-789', + sessionId: 'session-123', + errorName: 'AuthenticationError', + errorMessage: 'Invalid credentials', + isTerminal: true, + }); + }); + + it('should emit error event with minimal fields', (done) => { + emitter.on(TelemetryEventType.ERROR, (event: TelemetryEvent) => { + expect(event.eventType).to.equal(TelemetryEventType.ERROR); + expect(event.errorName).to.equal('TimeoutError'); + expect(event.errorMessage).to.equal('Request timed out'); + expect(event.isTerminal).to.be.false; + expect(event.statementId).to.be.undefined; + expect(event.sessionId).to.be.undefined; + done(); + }); + + emitter.emitError({ + errorName: 'TimeoutError', + errorMessage: 'Request timed out', + isTerminal: false, + }); + }); + + it('should not emit when telemetry is disabled', () => { + const disabledContext = { + getLogger: () => logger, + getConfig: () => ({ telemetryEnabled: false }), + } as any; + + const disabledEmitter = new TelemetryEventEmitter(disabledContext); + let eventEmitted = false; + + disabledEmitter.on(TelemetryEventType.ERROR, () => { + eventEmitted = true; + }); + + disabledEmitter.emitError({ + errorName: 'Error', + errorMessage: 'Test', + isTerminal: false, + }); + + expect(eventEmitted).to.be.false; + }); + + it('should swallow exceptions and log at debug level', () => { + emitter.on(TelemetryEventType.ERROR, () => { + throw new Error('Test error'); + }); emitter.emitError({ - statementId: 'st1', - sessionId: 's1', - errorName: 'NetworkError', - errorMessage: 'timeout', + errorName: 'Error', + errorMessage: 'Test', isTerminal: false, }); - expect(listener.calledOnce).to.be.true; - const event = listener.firstCall.args[0]; - expect(event.eventType).to.equal(TelemetryEventType.ERROR); - expect(event.errorName).to.equal('NetworkError'); - expect(event.isTerminal).to.be.false; + expect((logger.log as sinon.SinonStub).calledWith(LogLevel.debug)).to.be.true; + expect((logger.log as sinon.SinonStub).args[0][1]).to.include('Error emitting error event'); + }); + }); + + describe('exception swallowing', () => { + it('should never propagate exceptions to caller', () => { + emitter.on(TelemetryEventType.CONNECTION_OPEN, () => { + throw new Error('Critical error'); + }); + + expect(() => { + emitter.emitConnectionOpen({ + sessionId: 'session-123', + workspaceId: 'workspace-456', + driverConfig: {} as DriverConfiguration, + latencyMs: 100, + }); + }).to.not.throw(); + }); + + it('should swallow multiple listener exceptions', () => { + emitter.on(TelemetryEventType.STATEMENT_START, () => { + throw new Error('First listener error'); + }); + emitter.on(TelemetryEventType.STATEMENT_START, () => { + throw new Error('Second listener error'); + }); + + expect(() => { + emitter.emitStatementStart({ + statementId: 'stmt-789', + sessionId: 'session-123', + }); + }).to.not.throw(); }); - it('should emit a terminal ERROR event', () => { - const emitter = makeEmitter(true); - const listener = sinon.stub(); - emitter.on(TelemetryEventType.ERROR, listener); + it('should log only at debug level, never at warn or error', () => { + emitter.on(TelemetryEventType.STATEMENT_COMPLETE, () => { + throw new Error('Test error'); + }); + emitter.on(TelemetryEventType.CLOUDFETCH_CHUNK, () => { + throw new Error('Test error'); + }); + emitter.on(TelemetryEventType.ERROR, () => { + throw new Error('Test error'); + }); - emitter.emitError({ errorName: 'AuthenticationError', errorMessage: 'auth failed', isTerminal: true }); + emitter.emitStatementComplete({ + statementId: 'stmt-1', + sessionId: 'session-1', + }); + emitter.emitCloudFetchChunk({ + statementId: 'stmt-1', + chunkIndex: 0, + bytes: 1000, + }); + emitter.emitError({ + errorName: 'Error', + errorMessage: 'Test', + isTerminal: false, + }); - const event = listener.firstCall.args[0]; - expect(event.isTerminal).to.be.true; + const logStub = logger.log as sinon.SinonStub; + for (let i = 0; i < logStub.callCount; i++) { + const level = logStub.args[i][0]; + expect(level).to.equal(LogLevel.debug); + } }); }); - describe('logging level compliance', () => { - it('should never log at warn or error level', () => { - const context = new ClientContextStub({ telemetryEnabled: true } as any); - const logSpy = sinon.spy(context.logger, 'log'); - const emitter = new TelemetryEventEmitter(context); + describe('no console logging', () => { + it('should not use console.log', () => { + const consoleSpy = sinon.spy(console, 'log'); emitter.on(TelemetryEventType.CONNECTION_OPEN, () => { - throw new Error('boom'); + throw new Error('Test error'); + }); + + emitter.emitConnectionOpen({ + sessionId: 'session-123', + workspaceId: 'workspace-456', + driverConfig: {} as DriverConfiguration, + latencyMs: 100, + }); + + expect(consoleSpy.called).to.be.false; + consoleSpy.restore(); + }); + + it('should not use console.debug', () => { + const consoleSpy = sinon.spy(console, 'debug'); + + emitter.on(TelemetryEventType.STATEMENT_START, () => { + throw new Error('Test error'); + }); + + emitter.emitStatementStart({ + statementId: 'stmt-789', + sessionId: 'session-123', + }); + + expect(consoleSpy.called).to.be.false; + consoleSpy.restore(); + }); + + it('should not use console.error', () => { + const consoleSpy = sinon.spy(console, 'error'); + + emitter.on(TelemetryEventType.ERROR, () => { + throw new Error('Test error'); + }); + + emitter.emitError({ + errorName: 'Error', + errorMessage: 'Test', + isTerminal: true, + }); + + expect(consoleSpy.called).to.be.false; + consoleSpy.restore(); + }); + }); + + describe('respects telemetryEnabled flag', () => { + it('should respect flag from context.getConfig()', () => { + const customContext = { + getLogger: () => logger, + getConfig: () => ({ + telemetryEnabled: true, + directResultsDefaultMaxRows: 10000, + fetchChunkDefaultMaxRows: 100000, + socketTimeout: 900000, + retryMaxAttempts: 30, + retriesTimeout: 900000, + retryDelayMin: 1000, + retryDelayMax: 30000, + useCloudFetch: true, + cloudFetchConcurrentDownloads: 10, + cloudFetchSpeedThresholdMBps: 0, + useLZ4Compression: true, + }), + } as any; + + const customEmitter = new TelemetryEventEmitter(customContext); + let eventCount = 0; + + customEmitter.on(TelemetryEventType.CONNECTION_OPEN, () => { + eventCount++; + }); + + customEmitter.emitConnectionOpen({ + sessionId: 'session-123', + workspaceId: 'workspace-456', + driverConfig: {} as DriverConfiguration, + latencyMs: 100, }); - emitter.emitConnectionOpen({ sessionId: 's1', workspaceId: 'w1', driverConfig: {} as any }); + expect(eventCount).to.equal(1); + }); + + it('should not emit when explicitly disabled', () => { + const disabledContext = { + getLogger: () => logger, + getConfig: () => ({ + telemetryEnabled: false, + }), + } as any; + + const disabledEmitter = new TelemetryEventEmitter(disabledContext); + let eventCount = 0; + + disabledEmitter.on(TelemetryEventType.CONNECTION_OPEN, () => { + eventCount++; + }); + disabledEmitter.on(TelemetryEventType.STATEMENT_START, () => { + eventCount++; + }); + disabledEmitter.on(TelemetryEventType.STATEMENT_COMPLETE, () => { + eventCount++; + }); + disabledEmitter.on(TelemetryEventType.CLOUDFETCH_CHUNK, () => { + eventCount++; + }); + disabledEmitter.on(TelemetryEventType.ERROR, () => { + eventCount++; + }); - expect(logSpy.neverCalledWith(LogLevel.error, sinon.match.any)).to.be.true; - expect(logSpy.neverCalledWith(LogLevel.warn, sinon.match.any)).to.be.true; + disabledEmitter.emitConnectionOpen({ + sessionId: 'session-123', + workspaceId: 'workspace-456', + driverConfig: {} as DriverConfiguration, + latencyMs: 100, + }); + disabledEmitter.emitStatementStart({ + statementId: 'stmt-789', + sessionId: 'session-123', + }); + disabledEmitter.emitStatementComplete({ + statementId: 'stmt-789', + sessionId: 'session-123', + }); + disabledEmitter.emitCloudFetchChunk({ + statementId: 'stmt-789', + chunkIndex: 0, + bytes: 1000, + }); + disabledEmitter.emitError({ + errorName: 'Error', + errorMessage: 'Test', + isTerminal: false, + }); - logSpy.restore(); + expect(eventCount).to.equal(0); }); }); });