diff --git a/backend/analytics/__tests__/anonymization.test.ts b/backend/analytics/__tests__/anonymization.test.ts new file mode 100644 index 00000000..6285bfe2 --- /dev/null +++ b/backend/analytics/__tests__/anonymization.test.ts @@ -0,0 +1,358 @@ +import { MaskStrategy, HashStrategy, TruncateStrategy, PerturbStrategy } from '../domain/anonymization/strategies'; +import { AnonymizationPipeline } from '../domain/anonymization/pipeline'; +import { AnonymizationController } from '../controller/anonymizationController'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Build N rows that share the same quasi-identifiers (to trigger k-anon) */ +function makeRows(count: number, overrides: Record = {}) { + return Array.from({ length: count }, (_, i) => ({ + userId: `user-${i}`, + email: `user${i}@example.com`, + name: `User ${i}`, + ipAddress: '192.168.1.100', + createdAt: '2025-01-15', + country: 'US', + planId: 'plan-basic', + amount: 9.99, + ...overrides, + })); +} + +// --------------------------------------------------------------------------- +// Strategy tests +// --------------------------------------------------------------------------- + +describe('MaskStrategy', () => { + const strategy = new MaskStrategy(); + + test('masks email: preserves first char of local + domain', () => { + expect(strategy.apply('john@example.com')).toBe('j***@example.com'); + }); + + test('masks single-char local email', () => { + const result = strategy.apply('a@example.com'); + expect(result).toMatch(/^a\*+@example\.com$/); + }); + + test('masks non-email string', () => { + const result = strategy.apply('hello'); + expect(result).toMatch(/^h\*+$/); + }); + + test('returns empty string unchanged', () => { + expect(strategy.apply('')).toBe(''); + }); +}); + +describe('HashStrategy', () => { + const strategy = new HashStrategy(); + + test('returns 64-char hex string', () => { + const result = strategy.apply('test@example.com', 'salt'); + expect(result).toHaveLength(64); + expect(result).toMatch(/^[0-9a-f]+$/); + }); + + test('different salt → different hash', () => { + const a = strategy.apply('alice', 'salt1'); + const b = strategy.apply('alice', 'salt2'); + expect(a).not.toBe(b); + }); + + test('same value + salt → same hash (deterministic)', () => { + expect(strategy.apply('alice', 'fixed')).toBe(strategy.apply('alice', 'fixed')); + }); + + test('hashed value does not contain original', () => { + const result = strategy.apply('plaintext@example.com', 'anysalt'); + expect(result).not.toContain('plaintext'); + expect(result).not.toContain('@'); + }); +}); + +describe('TruncateStrategy', () => { + const strategy = new TruncateStrategy(); + + test('truncates IPv4: last octet replaced with *', () => { + expect(strategy.apply('192.168.1.100')).toBe('192.168.1.*'); + }); + + test('truncates another IPv4', () => { + expect(strategy.apply('10.0.0.1')).toBe('10.0.0.*'); + }); + + test('returns empty string unchanged', () => { + expect(strategy.apply('')).toBe(''); + }); + + test('truncated IP no longer contains the last octet', () => { + expect(strategy.apply('203.0.113.42')).not.toContain('42'); + }); +}); + +describe('PerturbStrategy', () => { + const strategy = new PerturbStrategy(3); + + test('returns a valid date string', () => { + const result = strategy.apply('2025-06-15', 'salt'); + expect(result).toMatch(/^\d{4}-\d{2}-\d{2}$/); + }); + + test('perturbed date is within ±3 days of original', () => { + const original = new Date('2025-06-15'); + const result = strategy.apply('2025-06-15', 'somesalt'); + const perturbed = new Date(result); + const diffDays = Math.abs((perturbed.getTime() - original.getTime()) / 86_400_000); + expect(diffDays).toBeLessThanOrEqual(3); + }); + + test('deterministic: same input + salt → same output', () => { + expect(strategy.apply('2025-06-15', 'fixed')).toBe(strategy.apply('2025-06-15', 'fixed')); + }); + + test('returns invalid date values unchanged', () => { + expect(strategy.apply('not-a-date', 'salt')).toBe('not-a-date'); + }); +}); + +// --------------------------------------------------------------------------- +// AnonymizationPipeline: export levels +// --------------------------------------------------------------------------- + +describe('AnonymizationPipeline – full level', () => { + const pipeline = new AnonymizationPipeline(); + + test('full export passes rows through unchanged', () => { + const rows = makeRows(10); + const { rows: out } = pipeline.run(rows, 'full'); + expect(out[0].email).toBe('user0@example.com'); + expect(out[0].name).toBe('User 0'); + }); + + test('full export has no transformed fields', () => { + const { transformedFields } = pipeline.run(makeRows(10), 'full'); + expect(transformedFields).toHaveLength(0); + }); +}); + +describe('AnonymizationPipeline – pseudonymized level', () => { + const pipeline = new AnonymizationPipeline(); + + test('email is masked', () => { + const rows = makeRows(10); + const { rows: out } = pipeline.run(rows, 'pseudonymized'); + expect(out[0].email).not.toBe('user0@example.com'); + expect(String(out[0].email)).toContain('@example.com'); + expect(String(out[0].email)).toMatch(/^\w\*+@example\.com$/); + }); + + test('name is hashed', () => { + const rows = makeRows(10); + const { rows: out } = pipeline.run(rows, 'pseudonymized'); + expect(String(out[0].name)).toHaveLength(64); + }); + + test('non-PII field (amount) passes through', () => { + const rows = makeRows(10); + const { rows: out } = pipeline.run(rows, 'pseudonymized'); + expect(out[0].amount).toBe(9.99); + }); + + test('quasi-identifiers pass through in pseudonymized mode', () => { + const rows = makeRows(10); + const { rows: out } = pipeline.run(rows, 'pseudonymized'); + // country and planId are quasi, not direct PII → unchanged + expect(out[0].country).toBe('US'); + expect(out[0].planId).toBe('plan-basic'); + }); +}); + +describe('AnonymizationPipeline – anonymized level', () => { + const pipeline = new AnonymizationPipeline(); + + test('email is masked', () => { + const rows = makeRows(10); + const { rows: out } = pipeline.run(rows, 'anonymized'); + expect(String(out[0].email)).toMatch(/^\w\*+@example\.com$/); + }); + + test('name is hashed', () => { + const rows = makeRows(10); + const { rows: out } = pipeline.run(rows, 'anonymized'); + expect(String(out[0].name)).toHaveLength(64); + }); + + test('IP is truncated', () => { + const rows = makeRows(10); + const { rows: out } = pipeline.run(rows, 'anonymized'); + expect(out[0].ipAddress).toBe('192.168.1.*'); + }); + + test('date is perturbed (within ±3 days)', () => { + const rows = makeRows(10); + const { rows: out } = pipeline.run(rows, 'anonymized'); + const original = new Date('2025-01-15'); + const perturbed = new Date(String(out[0].createdAt)); + const diffDays = Math.abs((perturbed.getTime() - original.getTime()) / 86_400_000); + expect(diffDays).toBeLessThanOrEqual(3); + }); + + test('export salt is discarded for anonymized exports', () => { + const { exportSalt } = pipeline.run(makeRows(10), 'anonymized'); + expect(exportSalt).toBe('[discarded]'); + }); + + test('export salt is retained for pseudonymized exports', () => { + const { exportSalt } = pipeline.run(makeRows(10), 'pseudonymized'); + expect(exportSalt).not.toBe('[discarded]'); + expect(exportSalt).toHaveLength(32); + }); +}); + +// --------------------------------------------------------------------------- +// k-anonymity checks +// --------------------------------------------------------------------------- + +describe('k-anonymity validation', () => { + const pipeline = new AnonymizationPipeline(); + + test('no warning when every quasi-id group has ≥ 5 members', () => { + // 10 rows, all identical quasi-identifiers → one group of 10 → k=10 ≥ 5 + const rows = makeRows(10); + const { warnings } = pipeline.run(rows, 'full'); + const kWarn = warnings.filter((w) => w.includes('k-anonymity')); + expect(kWarn).toHaveLength(0); + }); + + test('warning when a group has fewer than 5 members', () => { + // Each row has a unique country → each group size = 1 < 5 + const rows = makeRows(10, {}).map((r, i) => ({ ...r, country: `country-${i}` })); + const { warnings } = pipeline.run(rows, 'full'); + const kWarn = warnings.find((w) => w.includes('k-anonymity')); + expect(kWarn).toBeDefined(); + }); + + test('k-anonymity warning contains violation count', () => { + const rows = makeRows(3, { country: 'unique-group' }); + const { warnings } = pipeline.run(rows, 'anonymized'); + const kWarn = warnings.find((w) => w.includes('k-anonymity')); + expect(kWarn).toMatch(/fewer than 5/); + }); +}); + +// --------------------------------------------------------------------------- +// Re-identification risk: small dataset +// --------------------------------------------------------------------------- + +describe('Re-identification risk – small dataset', () => { + const pipeline = new AnonymizationPipeline(); + + test('warning issued for datasets with < 20 records', () => { + const rows = makeRows(5); + const { warnings } = pipeline.run(rows, 'anonymized'); + const w = warnings.find((msg) => msg.includes('re-identification')); + expect(w).toBeDefined(); + }); + + test('no small-dataset warning for ≥ 20 records', () => { + const rows = makeRows(25); + const { warnings } = pipeline.run(rows, 'anonymized'); + const w = warnings.find((msg) => msg.includes('re-identification')); + expect(w).toBeUndefined(); + }); + + test('warning includes record count', () => { + const { warnings } = pipeline.run(makeRows(7), 'anonymized'); + const w = warnings.find((msg) => msg.includes('re-identification')); + expect(w).toContain('7 records'); + }); + + test('empty dataset produces no small-dataset warning', () => { + const { warnings } = pipeline.run([], 'anonymized'); + const w = warnings.find((msg) => msg.includes('re-identification')); + expect(w).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// Preview +// --------------------------------------------------------------------------- + +describe('AnonymizationPipeline – preview', () => { + const pipeline = new AnonymizationPipeline(); + + test('preview returns at most 5 rows', () => { + const result = pipeline.preview(makeRows(100), 'anonymized'); + expect(result.length).toBe(5); + }); + + test('preview applies anonymization', () => { + const result = pipeline.preview(makeRows(10), 'anonymized'); + expect(String(result[0].email)).not.toBe('user0@example.com'); + }); +}); + +// --------------------------------------------------------------------------- +// Controller: authorization +// --------------------------------------------------------------------------- + +describe('AnonymizationController – authorization', () => { + const controller = new AnonymizationController(); + const rows = makeRows(25); + + test('admin can request full export', () => { + expect(() => controller.export(rows, { level: 'full', requestedBy: 'admin-1' }, 'admin')).not.toThrow(); + }); + + test('analytics role cannot request full export', () => { + expect(() => controller.export(rows, { level: 'full', requestedBy: 'analyst-1' }, 'analytics')).toThrow(/not permitted/); + }); + + test('third-party role can only request anonymized export', () => { + expect(() => controller.export(rows, { level: 'pseudonymized', requestedBy: 'ext-1' }, 'third-party')).toThrow(/not permitted/); + expect(() => controller.export(rows, { level: 'anonymized', requestedBy: 'ext-1' }, 'third-party')).not.toThrow(); + }); + + test('analytics role can request pseudonymized or anonymized', () => { + expect(() => controller.export(rows, { level: 'pseudonymized', requestedBy: 'a' }, 'analytics')).not.toThrow(); + expect(() => controller.export(rows, { level: 'anonymized', requestedBy: 'a' }, 'analytics')).not.toThrow(); + }); +}); + +describe('AnonymizationController – audit log', () => { + const controller = new AnonymizationController(); + + test('audit log is populated after export', () => { + controller.export(makeRows(25), { level: 'anonymized', requestedBy: 'tester', label: 'test run' }, 'admin'); + const log = controller.getAuditLog(); + const entry = log.find((e) => e.label === 'test run'); + expect(entry).toBeDefined(); + expect(entry!.level).toBe('anonymized'); + expect(entry!.requestedBy).toBe('tester'); + expect(entry!.rowCount).toBe(25); + expect(entry!.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/); + }); + + test('audit entry includes transformed fields', () => { + controller.export(makeRows(25), { level: 'anonymized', requestedBy: 'tester2' }, 'admin'); + const log = controller.getAuditLog(); + const entry = log.find((e) => e.requestedBy === 'tester2'); + expect(entry!.transformedFields).toContain('email'); + expect(entry!.transformedFields).toContain('name'); + }); +}); + +describe('AnonymizationController – getPiiFieldRegistry', () => { + const controller = new AnonymizationController(); + + test('returns all PII field definitions', () => { + const fields = controller.getPiiFieldRegistry(); + expect(fields.length).toBeGreaterThan(0); + const emailDef = fields.find((f) => f.field === 'email'); + expect(emailDef).toBeDefined(); + expect(emailDef!.strategy).toBe('mask'); + }); +}); diff --git a/backend/analytics/controller/anonymizationController.ts b/backend/analytics/controller/anonymizationController.ts new file mode 100644 index 00000000..778659bc --- /dev/null +++ b/backend/analytics/controller/anonymizationController.ts @@ -0,0 +1,157 @@ +/** + * AnonymizationController + * + * Provides the configuration API for analytics export anonymization and records + * a structured audit log entry for every export request. + */ + +import { anonymizationPipeline, type AnonymizationResult } from '../domain/anonymization/pipeline'; +import { type ExportLevel, getPiiFields } from '../../gdpr/piiRegistry'; +import { piiAuditService } from '../../services/shared/piiAudit'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface ExportConfig { + /** Export level requested by the caller */ + level: ExportLevel; + /** Identifier of the user/system requesting the export */ + requestedBy: string; + /** Optional label / description attached to this export */ + label?: string; +} + +export interface ExportAuditEntry { + exportId: string; + requestedBy: string; + level: ExportLevel; + label?: string; + timestamp: string; + transformedFields: string[]; + warnings: string[]; + rowCount: number; +} + +export interface ExportResponse { + exportId: string; + result: AnonymizationResult; + audit: ExportAuditEntry; +} + +// Role to allowed export levels (enforced by controller) +const LEVEL_PERMISSIONS: Record = { + admin: ['full', 'pseudonymized', 'anonymized'], + analytics: ['pseudonymized', 'anonymized'], + 'third-party': ['anonymized'], +}; + +// --------------------------------------------------------------------------- +// In-memory audit log (in production this would be persisted) +// --------------------------------------------------------------------------- +const _auditLog: ExportAuditEntry[] = []; + +function generateExportId(): string { + return `anon-export-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; +} + +// --------------------------------------------------------------------------- +// Controller +// --------------------------------------------------------------------------- + +export class AnonymizationController { + /** + * Run an anonymized analytics export. + * + * @param rows Raw data rows to be exported + * @param config Export configuration (level, requester) + * @param role Caller's role – used to enforce level permissions + */ + export( + rows: Record[], + config: ExportConfig, + role: string + ): ExportResponse { + this.authorize(config.level, role, config.requestedBy); + + const result = anonymizationPipeline.run(rows, config.level); + + const audit = this.recordAudit(result, config); + + return { exportId: audit.exportId, result, audit }; + } + + /** + * Preview up to 5 anonymized sample rows without committing an audit entry. + */ + previewExport( + rows: Record[], + level: ExportLevel, + role: string, + requestedBy: string + ): Record[] { + this.authorize(level, role, requestedBy); + return anonymizationPipeline.preview(rows, level); + } + + /** Returns the full export audit log. */ + getAuditLog(): ExportAuditEntry[] { + return [..._auditLog]; + } + + /** Returns the PII field registry for UI configuration screens. */ + getPiiFieldRegistry() { + return getPiiFields(); + } + + // --------------------------------------------------------------------------- + + private authorize(level: ExportLevel, role: string, requestedBy: string): void { + const allowed = LEVEL_PERMISSIONS[role] ?? ['anonymized']; + if (!allowed.includes(level)) { + throw new Error( + `Role '${role}' (${requestedBy}) is not permitted to request '${level}' exports. ` + + `Allowed: ${allowed.join(', ')}` + ); + } + } + + private recordAudit( + result: AnonymizationResult, + config: ExportConfig + ): ExportAuditEntry { + const exportId = generateExportId(); + const entry: ExportAuditEntry = { + exportId, + requestedBy: config.requestedBy, + level: config.level, + label: config.label, + timestamp: new Date().toISOString(), + transformedFields: result.transformedFields, + warnings: result.warnings, + rowCount: result.rows.length, + }; + + _auditLog.push(entry); + + // Also write to the shared PII audit trail + piiAuditService.logPiiAccess( + 'pii.exported', + config.requestedBy, + exportId, + 'analytics_export', + result.transformedFields, + { + level: config.level, + label: config.label, + rowCount: result.rows.length, + warnings: result.warnings, + exportSalt: result.exportSalt === '[discarded]' ? '[discarded]' : '[retained]', + } + ); + + return entry; + } +} + +export const anonymizationController = new AnonymizationController(); diff --git a/backend/analytics/domain/anonymization/pipeline.ts b/backend/analytics/domain/anonymization/pipeline.ts new file mode 100644 index 00000000..ebcdc5a8 --- /dev/null +++ b/backend/analytics/domain/anonymization/pipeline.ts @@ -0,0 +1,155 @@ +import { randomBytes } from 'crypto'; +import { + MaskStrategy, + HashStrategy, + TruncateStrategy, + PerturbStrategy, + AnonymizationStrategy, +} from './strategies'; +import { + PII_REGISTRY, + ExportLevel, + getQuasiIdentifiers, + type AnonymizationStrategyType, +} from '../../../gdpr/piiRegistry'; + +export interface AnonymizationResult { + rows: Record[]; + /** Per-field summary of what was transformed */ + transformedFields: string[]; + /** Salt used for this export (must NOT be stored for anonymized exports) */ + exportSalt: string; + warnings: string[]; +} + +/** Minimum group size for k-anonymity */ +const K_ANONYMITY_THRESHOLD = 5; +/** Dataset size below which re-identification risk warning is issued */ +const SMALL_DATASET_THRESHOLD = 20; + +const STRATEGY_MAP: Record = { + mask: new MaskStrategy(), + hash: new HashStrategy(), + truncate: new TruncateStrategy(), + perturb: new PerturbStrategy(), + none: { apply: (v) => v }, +}; + +export class AnonymizationPipeline { + /** + * Processes rows according to the requested export level. + * + * - `full` – no transformation (admin only) + * - `pseudonymized` – direct PII is hashed with a per-export salt (reversible + * only if the salt is retained; default: salt is kept) + * - `anonymized` – all PII strategies applied; salt is discarded after use + */ + run( + rows: Record[], + level: ExportLevel + ): AnonymizationResult { + const warnings: string[] = []; + const exportSalt = randomBytes(16).toString('hex'); + + // Small dataset warning + if (rows.length > 0 && rows.length < SMALL_DATASET_THRESHOLD) { + warnings.push( + `Small dataset (${rows.length} records): re-identification risk is elevated. ` + + 'Consider aggregating before sharing.' + ); + } + + if (level === 'full') { + this.checkKAnonymity(rows, warnings); + return { + rows, + transformedFields: [], + exportSalt, + warnings, + }; + } + + const transformedFields = new Set(); + const anonymizedRows = rows.map((row) => + this.transformRow(row, level, exportSalt, transformedFields) + ); + + this.checkKAnonymity(anonymizedRows, warnings); + + return { + rows: anonymizedRows, + transformedFields: Array.from(transformedFields), + exportSalt: level === 'anonymized' ? '[discarded]' : exportSalt, + warnings, + }; + } + + /** Returns a sample (up to 5 rows) of anonymized data for preview. */ + preview( + rows: Record[], + level: ExportLevel + ): Record[] { + const sample = rows.slice(0, 5); + return this.run(sample, level).rows; + } + + // --------------------------------------------------------------------------- + // Private helpers + // --------------------------------------------------------------------------- + + private transformRow( + row: Record, + level: ExportLevel, + salt: string, + transformedFields: Set + ): Record { + const result: Record = {}; + + for (const [key, value] of Object.entries(row)) { + const def = PII_REGISTRY[key]; + if (!def || def.strategy === 'none') { + result[key] = value; + continue; + } + + // For pseudonymized, only transform direct PII; quasi-identifiers pass through + if (level === 'pseudonymized' && def.sensitivity !== 'direct') { + result[key] = value; + continue; + } + + const strategy = STRATEGY_MAP[def.strategy]; + result[key] = strategy.apply(String(value ?? ''), salt); + transformedFields.add(key); + } + + return result; + } + + private checkKAnonymity(rows: Record[], warnings: string[]): void { + if (rows.length === 0) return; + + const quasiIds = getQuasiIdentifiers(); + const groups = new Map(); + + for (const row of rows) { + const key = quasiIds + .map((q) => `${q}=${String(row[q] ?? '')}`) + .join('|'); + groups.set(key, (groups.get(key) ?? 0) + 1); + } + + const violations = Array.from(groups.values()).filter( + (count) => count < K_ANONYMITY_THRESHOLD + ); + + if (violations.length > 0) { + warnings.push( + `k-anonymity violation: ${violations.length} quasi-identifier group(s) have fewer than ` + + `${K_ANONYMITY_THRESHOLD} records. Re-identification risk is elevated.` + ); + } + } +} + +export const anonymizationPipeline = new AnonymizationPipeline(); diff --git a/backend/analytics/domain/anonymization/strategies.ts b/backend/analytics/domain/anonymization/strategies.ts new file mode 100644 index 00000000..e4a4702f --- /dev/null +++ b/backend/analytics/domain/anonymization/strategies.ts @@ -0,0 +1,81 @@ +import { createHash } from 'crypto'; + +export interface AnonymizationStrategy { + apply(value: string, salt?: string): string; +} + +/** + * MaskStrategy: replaces characters after the first with `*`. + * Email: j***@example.com | Other: first char + `***` + */ +export class MaskStrategy implements AnonymizationStrategy { + apply(value: string): string { + if (!value) return value; + const atIndex = value.indexOf('@'); + if (atIndex > 0) { + const local = value.slice(0, atIndex); + const domain = value.slice(atIndex); + return local[0] + '*'.repeat(Math.max(local.length - 1, 3)) + domain; + } + return value[0] + '*'.repeat(Math.max(value.length - 1, 3)); + } +} + +/** + * HashStrategy: SHA-256 of (value + salt), returns hex digest. + * Irreversible when salt is per-export and discarded. + */ +export class HashStrategy implements AnonymizationStrategy { + apply(value: string, salt: string = ''): string { + return createHash('sha256') + .update(value + salt) + .digest('hex'); + } +} + +/** + * TruncateStrategy: removes the last octet(s) from IP addresses. + * 192.168.1.100 → 192.168.1.* + */ +export class TruncateStrategy implements AnonymizationStrategy { + apply(value: string): string { + if (!value) return value; + // IPv4 + const ipv4 = value.match(/^(\d{1,3}\.\d{1,3}\.\d{1,3})\.\d{1,3}$/); + if (ipv4) return ipv4[1] + '.*'; + // IPv6: zero last group + const ipv6 = value.match(/^((?:[0-9a-fA-F:]+:))[0-9a-fA-F]+$/); + if (ipv6) return ipv6[1] + '0'; + // Generic: drop last segment after the last delimiter + const lastDot = value.lastIndexOf('.'); + if (lastDot > 0) return value.slice(0, lastDot) + '.*'; + return value.slice(0, Math.ceil(value.length / 2)) + '***'; + } +} + +/** + * PerturbStrategy: shifts date values by a random offset in [-3, +3] days. + * The offset is deterministically seeded from (value + salt) so the same + * input always produces the same perturbed output within a single export. + */ +export class PerturbStrategy implements AnonymizationStrategy { + private readonly maxDays: number; + + constructor(maxDays = 3) { + this.maxDays = maxDays; + } + + apply(value: string, salt: string = ''): string { + if (!value) return value; + const date = new Date(value); + if (isNaN(date.getTime())) return value; + + // Deterministic offset: hash → 0..1 → scale to [-maxDays, +maxDays] + const hashHex = createHash('sha256').update(value + salt).digest('hex'); + const fraction = parseInt(hashHex.slice(0, 8), 16) / 0xffffffff; + const offsetDays = Math.round((fraction * 2 - 1) * this.maxDays); + + date.setDate(date.getDate() + offsetDays); + return date.toISOString().split('T')[0]; // YYYY-MM-DD + } +} diff --git a/backend/gdpr/piiRegistry.ts b/backend/gdpr/piiRegistry.ts new file mode 100644 index 00000000..5e5c07f9 --- /dev/null +++ b/backend/gdpr/piiRegistry.ts @@ -0,0 +1,113 @@ +/** + * GDPR PII Field Registry + * Classifies data fields by sensitivity and required anonymization strategy. + */ + +export type AnonymizationStrategyType = 'mask' | 'hash' | 'truncate' | 'perturb' | 'none'; +export type SensitivityLevel = 'direct' | 'quasi' | 'non-sensitive'; + +export interface PiiFieldDefinition { + field: string; + sensitivity: SensitivityLevel; + strategy: AnonymizationStrategyType; + /** Whether the field is a quasi-identifier used in k-anonymity checks */ + quasiIdentifier: boolean; +} + +/** Central registry of all PII fields and their anonymization requirements */ +export const PII_REGISTRY: Record = { + email: { + field: 'email', + sensitivity: 'direct', + strategy: 'mask', + quasiIdentifier: false, + }, + name: { + field: 'name', + sensitivity: 'direct', + strategy: 'hash', + quasiIdentifier: false, + }, + ipAddress: { + field: 'ipAddress', + sensitivity: 'direct', + strategy: 'truncate', + quasiIdentifier: true, + }, + createdAt: { + field: 'createdAt', + sensitivity: 'quasi', + strategy: 'perturb', + quasiIdentifier: true, + }, + subscriptionStartDate: { + field: 'subscriptionStartDate', + sensitivity: 'quasi', + strategy: 'perturb', + quasiIdentifier: true, + }, + country: { + field: 'country', + sensitivity: 'quasi', + strategy: 'none', + quasiIdentifier: true, + }, + planId: { + field: 'planId', + sensitivity: 'quasi', + strategy: 'none', + quasiIdentifier: true, + }, + amount: { + field: 'amount', + sensitivity: 'quasi', + strategy: 'none', + quasiIdentifier: true, + }, + userId: { + field: 'userId', + sensitivity: 'direct', + strategy: 'hash', + quasiIdentifier: false, + }, + phoneNumber: { + field: 'phoneNumber', + sensitivity: 'direct', + strategy: 'mask', + quasiIdentifier: false, + }, + address: { + field: 'address', + sensitivity: 'direct', + strategy: 'mask', + quasiIdentifier: false, + }, +}; + +/** Export level controls which fields are anonymized */ +export type ExportLevel = 'full' | 'pseudonymized' | 'anonymized'; + +/** Fields that are passed through unchanged per export level */ +export const EXPORT_LEVEL_PASSTHROUGH: Record> = { + full: new Set(['mask', 'hash', 'truncate', 'perturb', 'none']), + pseudonymized: new Set(['none']), // only non-pii passthrough; direct/quasi are pseudonymized via hash + anonymized: new Set(['none']), // all PII is irreversibly anonymized +}; + +export function getPiiFields(): PiiFieldDefinition[] { + return Object.values(PII_REGISTRY); +} + +export function getQuasiIdentifiers(): string[] { + return getPiiFields() + .filter((f) => f.quasiIdentifier) + .map((f) => f.field); +} + +export function isPiiField(field: string): boolean { + return field in PII_REGISTRY; +} + +export function getFieldDefinition(field: string): PiiFieldDefinition | undefined { + return PII_REGISTRY[field]; +}