|
| 1 | +################################################################### |
| 2 | +# Copyright (c) 2016 by TAOS Technologies, Inc. |
| 3 | +# All rights reserved. |
| 4 | +# |
| 5 | +# This file is proprietary and confidential to TAOS Technologies. |
| 6 | +# No part of this file may be reproduced, stored, transmitted, |
| 7 | +# disclosed or used in any form or by any means other than as |
| 8 | +# expressly provided by the written permission from Jianhui Tao |
| 9 | +# |
| 10 | +################################################################### |
| 11 | + |
| 12 | +# -*- coding: utf-8 -*- |
| 13 | +from new_test_framework.utils import tdLog, tdSql, epath, sc, etool, AutoGen, tdDnodes |
| 14 | +import os |
| 15 | +import subprocess |
| 16 | + |
| 17 | + |
| 18 | +class TestBasic: |
| 19 | + """Test encryption with multi-level storage |
| 20 | +
|
| 21 | + This test verifies that: |
| 22 | + 1. taosk correctly identifies primary disk in multi-level storage configuration |
| 23 | + 2. Encryption keys are stored only on primary disk (level=0, primary=1) |
| 24 | + 3. Encrypted databases work correctly with multi-level storage |
| 25 | + 4. Data is properly distributed across storage tiers |
| 26 | +
|
| 27 | + Usage: |
| 28 | + # Single-level storage (default) |
| 29 | + pytest cases/31-Security/07-EncryptionMultiLevel/test_encryption_mlevel.py --clean |
| 30 | +
|
| 31 | + # Multi-level storage (3 levels, 2 disks per level) |
| 32 | + pytest cases/31-Security/07-EncryptionMultiLevel/test_encryption_mlevel.py --clean -L 3 -D 2 |
| 33 | + """ |
| 34 | + |
| 35 | + # Configure debug flags |
| 36 | + updatecfgDict = { |
| 37 | + 'dDebugFlag': 131, |
| 38 | + 'vDebugFlag': 131, |
| 39 | + 'fsDebugFlag': 131 |
| 40 | + } |
| 41 | + |
| 42 | + # Pre-generate encryption keys with all options |
| 43 | + encryptConfig = { |
| 44 | + "svrKey": "mlevelsvr123", |
| 45 | + "dbKey": "mleveldb456", |
| 46 | + "dataKey": "mleveldat789", |
| 47 | + "generateConfig": True, |
| 48 | + "generateMeta": True, |
| 49 | + "generateData": True |
| 50 | + } |
| 51 | + |
| 52 | + def setup_class(cls): |
| 53 | + cls.init(cls, replicaVar=1, checkColName="c1") |
| 54 | + cls.valgrind = 0 |
| 55 | + cls.db = "mlevel_encrypt_db" |
| 56 | + |
| 57 | + def test_encryption_mlevel(self): |
| 58 | + """ Test encryption with multi-level storage |
| 59 | +
|
| 60 | + Tests: |
| 61 | + 1. Verify taosk identifies primary disk correctly |
| 62 | + 2. Verify encryption keys are stored on primary disk only |
| 63 | + 3. Create encrypted database with multi-level storage |
| 64 | + 4. Insert data and verify distribution across tiers |
| 65 | + 5. Verify data integrity after flush and compact |
| 66 | +
|
| 67 | + Since: v3.4.0.0 |
| 68 | +
|
| 69 | + Labels: common,ci |
| 70 | +
|
| 71 | + Jira: TS-7230 |
| 72 | +
|
| 73 | + History: |
| 74 | + - 2026-04-14 Created for multi-level storage encryption support |
| 75 | +
|
| 76 | + """ |
| 77 | + # Get data directories (should be multiple for multi-level storage) |
| 78 | + data_dirs = tdDnodes.dnodes[0].dataDir |
| 79 | + if not isinstance(data_dirs, list): |
| 80 | + data_dirs = [data_dirs] |
| 81 | + |
| 82 | + cfg_path = tdDnodes.dnodes[0].cfgDir |
| 83 | + |
| 84 | + tdLog.info("=" * 80) |
| 85 | + tdLog.info("Test Environment Setup") |
| 86 | + tdLog.info("=" * 80) |
| 87 | + tdLog.info(f"Config directory: {cfg_path}") |
| 88 | + tdLog.info(f"Data directories: {data_dirs}") |
| 89 | + tdLog.info(f"Number of storage tiers: {len(data_dirs)}") |
| 90 | + |
| 91 | + # Test 1: Verify taosk identifies primary disk |
| 92 | + tdLog.info("=" * 80) |
| 93 | + tdLog.info("Test 1: Verify taosk identifies primary disk") |
| 94 | + tdLog.info("=" * 80) |
| 95 | + |
| 96 | + taosk_bin = etool.taoskFile() |
| 97 | + tdLog.info(f"taosk binary: {taosk_bin}") |
| 98 | + |
| 99 | + # Primary disk should be the first one (level=0, primary=1) |
| 100 | + # In multi-level storage, dataDir format is: "path level primary" |
| 101 | + primary_disk_entry = data_dirs[0] |
| 102 | + if isinstance(primary_disk_entry, str) and ' ' in primary_disk_entry: |
| 103 | + # Multi-level storage: extract path from "path level primary" |
| 104 | + primary_disk = primary_disk_entry.split()[0] |
| 105 | + else: |
| 106 | + # Single-level storage |
| 107 | + primary_disk = primary_disk_entry |
| 108 | + |
| 109 | + key_dir = os.path.join(primary_disk, "dnode", "config") |
| 110 | + master_file = os.path.join(key_dir, "master.bin") |
| 111 | + derived_file = os.path.join(key_dir, "derived.bin") |
| 112 | + |
| 113 | + tdLog.info(f"Expected primary disk: {primary_disk}") |
| 114 | + tdLog.info(f"Expected key directory: {key_dir}") |
| 115 | + |
| 116 | + # Verify keys exist on primary disk |
| 117 | + if os.path.exists(master_file): |
| 118 | + tdLog.info(f"✓ master.bin found on primary disk: {os.path.getsize(master_file)} bytes") |
| 119 | + else: |
| 120 | + tdLog.exit(f"✗ master.bin NOT found on primary disk: {master_file}") |
| 121 | + |
| 122 | + if os.path.exists(derived_file): |
| 123 | + tdLog.info(f"✓ derived.bin found on primary disk: {os.path.getsize(derived_file)} bytes") |
| 124 | + else: |
| 125 | + tdLog.info(f"Note: derived.bin not found (may not be generated yet)") |
| 126 | + |
| 127 | + # Test 2: Verify keys are NOT on other disks |
| 128 | + tdLog.info("=" * 80) |
| 129 | + tdLog.info("Test 2: Verify keys are stored ONLY on primary disk") |
| 130 | + tdLog.info("=" * 80) |
| 131 | + |
| 132 | + for i, disk_entry in enumerate(data_dirs[1:], start=1): |
| 133 | + # Extract disk path |
| 134 | + if isinstance(disk_entry, str) and ' ' in disk_entry: |
| 135 | + disk_path = disk_entry.split()[0] |
| 136 | + else: |
| 137 | + disk_path = disk_entry |
| 138 | + |
| 139 | + other_key_dir = os.path.join(disk_path, "dnode", "config") |
| 140 | + other_master = os.path.join(other_key_dir, "master.bin") |
| 141 | + |
| 142 | + if os.path.exists(other_master): |
| 143 | + tdLog.exit(f"✗ ERROR: master.bin found on non-primary disk {i}: {other_master}") |
| 144 | + else: |
| 145 | + tdLog.info(f"✓ Disk {i} ({disk_path}): No encryption keys (correct)") |
| 146 | + |
| 147 | + # Test 3: Create encrypted database with multi-level storage |
| 148 | + tdLog.info("=" * 80) |
| 149 | + tdLog.info("Test 3: Create encrypted database with multi-level storage") |
| 150 | + tdLog.info("=" * 80) |
| 151 | + |
| 152 | + # Create database with SM4 encryption and multi-level keep |
| 153 | + tdSql.execute(f"DROP DATABASE IF EXISTS {self.db}") |
| 154 | + tdSql.execute(f"CREATE DATABASE {self.db} ENCRYPT_ALGORITHM 'SM4-CBC' KEEP 40d,70d,120d VGROUPS 2") |
| 155 | + tdLog.info(f"Database {self.db} created with SM4 encryption and multi-level keep") |
| 156 | + |
| 157 | + # Verify database configuration |
| 158 | + tdSql.query(f"SELECT name, `encrypt_algorithm`, `keep` FROM information_schema.ins_databases WHERE name='{self.db}'") |
| 159 | + if tdSql.queryRows > 0: |
| 160 | + tdLog.info(f"Database info: {tdSql.queryResult}") |
| 161 | + assert 'SM4' in str(tdSql.queryResult[0]), "Database should use SM4 encryption" |
| 162 | + |
| 163 | + # Use database |
| 164 | + tdSql.execute(f"USE {self.db}") |
| 165 | + |
| 166 | + # Test 4: Insert data and verify distribution |
| 167 | + tdLog.info("=" * 80) |
| 168 | + tdLog.info("Test 4: Insert data and verify distribution across tiers") |
| 169 | + tdLog.info("=" * 80) |
| 170 | + |
| 171 | + # Create super table |
| 172 | + tdSql.execute(""" |
| 173 | + CREATE STABLE IF NOT EXISTS meters ( |
| 174 | + ts TIMESTAMP, |
| 175 | + voltage INT, |
| 176 | + current FLOAT, |
| 177 | + phase FLOAT, |
| 178 | + temperature FLOAT |
| 179 | + ) TAGS (location BINARY(32), groupid INT) |
| 180 | + """) |
| 181 | + tdLog.info("Super table created") |
| 182 | + |
| 183 | + # Create child tables and insert data |
| 184 | + num_tables = 10 |
| 185 | + rows_per_table = 1000 # Reduced for faster testing |
| 186 | + |
| 187 | + import time |
| 188 | + current_time_ms = int(time.time() * 1000) |
| 189 | + |
| 190 | + for i in range(num_tables): |
| 191 | + table_name = f"d{i}" |
| 192 | + location = f"Location_{i}" |
| 193 | + |
| 194 | + tdSql.execute(f"CREATE TABLE {table_name} USING meters TAGS ('{location}', {i})") |
| 195 | + |
| 196 | + # Insert historical data in batches (will be distributed across storage tiers based on keep) |
| 197 | + batch_size = 100 |
| 198 | + for batch_start in range(0, rows_per_table, batch_size): |
| 199 | + values = [] |
| 200 | + for j in range(batch_start, min(batch_start + batch_size, rows_per_table)): |
| 201 | + # Insert data spanning multiple days to trigger multi-level storage |
| 202 | + day_offset = j // 100 # Change day every 100 records |
| 203 | + sec_offset = j % 100 # Seconds within the day |
| 204 | + # Calculate timestamp: current time - days - seconds |
| 205 | + ts = current_time_ms - (day_offset * 86400000) - (sec_offset * 1000) |
| 206 | + values.append(f"({ts}, {220 + i}, {10.0 + i * 0.1}, {0.95 + i * 0.01}, {25.0 + i})") |
| 207 | + |
| 208 | + tdSql.execute(f"INSERT INTO {table_name} VALUES {' '.join(values)}") |
| 209 | + |
| 210 | + tdLog.info(f"Inserted {num_tables * rows_per_table} rows across {num_tables} tables") |
| 211 | + |
| 212 | + # Verify data count |
| 213 | + tdSql.query("SELECT COUNT(*) FROM meters") |
| 214 | + total_rows = tdSql.queryResult[0][0] |
| 215 | + expected_rows = num_tables * rows_per_table |
| 216 | + assert total_rows == expected_rows, f"Expected {expected_rows} rows, got {total_rows}" |
| 217 | + tdLog.info(f"✓ Data verification passed: {total_rows} rows") |
| 218 | + |
| 219 | + # Test 5: Flush and verify data integrity |
| 220 | + tdLog.info("=" * 80) |
| 221 | + tdLog.info("Test 5: Flush database and verify data integrity") |
| 222 | + tdLog.info("=" * 80) |
| 223 | + |
| 224 | + # Flush database to ensure data is written to disk |
| 225 | + tdSql.execute(f"FLUSH DATABASE {self.db}") |
| 226 | + tdLog.info("Database flushed") |
| 227 | + |
| 228 | + # Wait for flush to complete |
| 229 | + import time |
| 230 | + time.sleep(2) |
| 231 | + |
| 232 | + # Verify data after flush |
| 233 | + tdSql.query("SELECT COUNT(*) FROM meters") |
| 234 | + rows_after_flush = tdSql.queryResult[0][0] |
| 235 | + assert rows_after_flush == expected_rows, f"Data lost after flush: {rows_after_flush} != {expected_rows}" |
| 236 | + tdLog.info(f"✓ Data integrity verified after flush: {rows_after_flush} rows") |
| 237 | + |
| 238 | + # Verify aggregate queries work correctly |
| 239 | + tdSql.query("SELECT AVG(voltage), MAX(current), MIN(phase) FROM meters") |
| 240 | + tdLog.info(f"Aggregate query result: {tdSql.queryResult}") |
| 241 | + |
| 242 | + # Test 6: Verify data files across storage tiers |
| 243 | + tdLog.info("=" * 80) |
| 244 | + tdLog.info("Test 6: Verify data files across storage tiers") |
| 245 | + tdLog.info("=" * 80) |
| 246 | + |
| 247 | + for i, disk in enumerate(data_dirs): |
| 248 | + vnode_dir = os.path.join(disk, "vnode") |
| 249 | + if os.path.exists(vnode_dir): |
| 250 | + vnode_list = os.listdir(vnode_dir) |
| 251 | + tdLog.info(f"Tier {i} ({disk}): {len(vnode_list)} vnode(s)") |
| 252 | + |
| 253 | + # Check for encrypted data files |
| 254 | + for vnode in vnode_list[:2]: # Check first 2 vnodes |
| 255 | + vnode_path = os.path.join(vnode_dir, vnode) |
| 256 | + if os.path.isdir(vnode_path): |
| 257 | + # Check TSDB directory |
| 258 | + tsdb_dir = os.path.join(vnode_path, "tsdb") |
| 259 | + if os.path.exists(tsdb_dir): |
| 260 | + tsdb_files = [f for f in os.listdir(tsdb_dir) if os.path.isfile(os.path.join(tsdb_dir, f))] |
| 261 | + tdLog.info(f" Vnode {vnode} TSDB: {len(tsdb_files)} file(s)") |
| 262 | + |
| 263 | + # Check WAL directory |
| 264 | + wal_dir = os.path.join(vnode_path, "wal") |
| 265 | + if os.path.exists(wal_dir): |
| 266 | + wal_files = [f for f in os.listdir(wal_dir) if os.path.isfile(os.path.join(wal_dir, f))] |
| 267 | + tdLog.info(f" Vnode {vnode} WAL: {len(wal_files)} file(s)") |
| 268 | + else: |
| 269 | + tdLog.info(f"Tier {i} ({disk}): No vnode directory yet") |
| 270 | + |
| 271 | + # Test 7: Compact database |
| 272 | + tdLog.info("=" * 80) |
| 273 | + tdLog.info("Test 7: Compact database and verify data integrity") |
| 274 | + tdLog.info("=" * 80) |
| 275 | + |
| 276 | + tdSql.execute(f"COMPACT DATABASE {self.db}") |
| 277 | + tdLog.info("Database compacted") |
| 278 | + |
| 279 | + # Wait for compact to complete |
| 280 | + time.sleep(2) |
| 281 | + |
| 282 | + # Verify data after compact |
| 283 | + tdSql.query("SELECT COUNT(*) FROM meters") |
| 284 | + rows_after_compact = tdSql.queryResult[0][0] |
| 285 | + assert rows_after_compact == expected_rows, f"Data lost after compact: {rows_after_compact} != {expected_rows}" |
| 286 | + tdLog.info(f"✓ Data integrity verified after compact: {rows_after_compact} rows") |
| 287 | + |
| 288 | + # Test 8: Query data across time ranges (different storage tiers) |
| 289 | + tdLog.info("=" * 80) |
| 290 | + tdLog.info("Test 8: Query data across time ranges (different storage tiers)") |
| 291 | + tdLog.info("=" * 80) |
| 292 | + |
| 293 | + # Query recent data (should be on hot tier) |
| 294 | + tdSql.query("SELECT COUNT(*) FROM meters WHERE ts > now - 1d") |
| 295 | + recent_count = tdSql.queryResult[0][0] |
| 296 | + tdLog.info(f"Recent data (< 1 day): {recent_count} rows") |
| 297 | + |
| 298 | + # Query older data (should be on warm/cold tiers) |
| 299 | + tdSql.query("SELECT COUNT(*) FROM meters WHERE ts < now - 1d") |
| 300 | + old_count = tdSql.queryResult[0][0] |
| 301 | + tdLog.info(f"Older data (> 1 day): {old_count} rows") |
| 302 | + |
| 303 | + # Verify total |
| 304 | + assert recent_count + old_count == expected_rows, "Data count mismatch across tiers" |
| 305 | + tdLog.info(f"✓ Data distribution verified: {recent_count} + {old_count} = {expected_rows}") |
| 306 | + |
| 307 | + tdLog.success(f"{__file__} successfully executed") |
0 commit comments