Skip to content

Commit d149028

Browse files
mccullssaravadeodevflow.devflow-routing-intake
authored
🪞 10671 - Fix platform-dependent String.getBytes() calls to use explicit UTF-8 charset (#11149)
Fix platform-dependent String.getBytes() calls to use explicit UTF-8 charset Specify StandardCharsets.UTF_8 in String.getBytes() calls used with MessageDigest and other encoding-sensitive APIs. Without an explicit charset, getBytes() uses the platform's default charset, which can vary across systems and produce inconsistent results. Files changed: - AppSecEventTracker: user ID anonymization hash now uses UTF-8, ensuring consistent hashing across all platforms. Also resolved the TODO about MessageDigest caching with a clarifying comment referencing micro-benchmark data showing negligible overhead. - Fingerprinter: exception fingerprint hashes now use UTF-8. - JsonStreamParser: JSON byte conversion now uses UTF-8 (JSON spec). - LLMObsSpanMapper: writeUTF8() now receives actual UTF-8 bytes. Found a few more places using 'String.getBytes()' - all are working with UTF-8 strings Add String.getBytes() to list of forbidden APIs: this uses the platform's default charset, which may not be UTF-8, and can lead to inconsistent results across systems Co-authored-by: saravadeo <saravadeo@yahoo.com> Co-authored-by: devflow.devflow-routing-intake <devflow.devflow-routing-intake@kubernetes.us1.ddbuild.io>
1 parent d625c66 commit d149028

File tree

12 files changed

+32
-17
lines changed

12 files changed

+32
-17
lines changed

‎dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/buildid/ElfBuildIdExtractor.java‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import java.io.RandomAccessFile;
55
import java.nio.ByteBuffer;
66
import java.nio.ByteOrder;
7+
import java.nio.charset.StandardCharsets;
78
import java.nio.file.Path;
89
import java.util.Arrays;
910
import org.slf4j.Logger;
@@ -31,7 +32,7 @@ public class ElfBuildIdExtractor implements BuildIdExtractor {
3132

3233
// Note header constants
3334
private static final int NT_GNU_BUILD_ID = 3;
34-
private static final byte[] GNU_NOTE_NAME = "GNU\0".getBytes();
35+
private static final byte[] GNU_NOTE_NAME = "GNU\0".getBytes(StandardCharsets.UTF_8);
3536

3637
@Override
3738
public String extractBuildId(Path file) {

‎dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/Fingerprinter.java‎

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import static com.datadog.debugger.util.ExceptionHelper.getInnerMostThrowable;
44

55
import datadog.trace.bootstrap.debugger.DebuggerContext.ClassNameFilter;
6+
import java.nio.charset.StandardCharsets;
67
import java.security.MessageDigest;
78
import java.security.NoSuchAlgorithmException;
89
import org.slf4j.Logger;
@@ -30,15 +31,15 @@ public static String fingerprint(Throwable t, ClassNameFilter classNameFiltering
3031
return null;
3132
}
3233
String typeName = clazz.getTypeName();
33-
digest.update(typeName.getBytes());
34+
digest.update(typeName.getBytes(StandardCharsets.UTF_8));
3435
StackTraceElement[] stackTrace = t.getStackTrace();
3536
if (stackTrace != null) {
3637
for (StackTraceElement stackTraceElement : stackTrace) {
3738
String className = stackTraceElement.getClassName();
3839
if (classNameFiltering.isExcluded(className)) {
3940
continue;
4041
}
41-
digest.update(stackTraceElement.toString().getBytes());
42+
digest.update(stackTraceElement.toString().getBytes(StandardCharsets.UTF_8));
4243
}
4344
}
4445
return bytesToHex(digest.digest());
@@ -47,7 +48,7 @@ public static String fingerprint(Throwable t, ClassNameFilter classNameFiltering
4748
public static String fingerprint(StackTraceElement element) {
4849
try {
4950
MessageDigest digest = MessageDigest.getInstance("SHA-256");
50-
digest.update(element.toString().getBytes());
51+
digest.update(element.toString().getBytes(StandardCharsets.UTF_8));
5152
return bytesToHex(digest.digest());
5253
} catch (NoSuchAlgorithmException e) {
5354
LOGGER.debug("Unable to find digest algorithm SHA-256", e);

‎dd-java-agent/agent-iast/src/main/java/com/datadog/iast/model/json/VulnerabilityEncoding.java‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import com.squareup.moshi.Moshi;
66
import datadog.trace.api.iast.telemetry.IastMetric;
77
import datadog.trace.api.iast.telemetry.IastMetricCollector;
8+
import java.nio.charset.StandardCharsets;
89
import org.slf4j.Logger;
910
import org.slf4j.LoggerFactory;
1011

@@ -25,7 +26,7 @@ public class VulnerabilityEncoding {
2526
public static String toJson(final VulnerabilityBatch value) {
2627
try {
2728
String json = BATCH_ADAPTER.toJson(value);
28-
return json.getBytes().length > MAX_SPAN_TAG_SIZE
29+
return json.getBytes(StandardCharsets.UTF_8).length > MAX_SPAN_TAG_SIZE
2930
? getExceededTagSizeJson(new TruncatedVulnerabilities(value.getVulnerabilities()))
3031
: json;
3132
} catch (Exception ex) {

‎dd-java-agent/src/main/java/datadog/trace/bootstrap/BootstrapInitializationTelemetry.java‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import de.thetaphi.forbiddenapis.SuppressForbidden;
99
import java.io.Closeable;
1010
import java.io.OutputStream;
11+
import java.nio.charset.StandardCharsets;
1112
import java.util.ArrayList;
1213
import java.util.LinkedHashMap;
1314
import java.util.List;
@@ -319,7 +320,7 @@ public void run() {
319320

320321
// Run forwarder and mute tracing for subprocesses executed in by dd-java-agent.
321322
try (final Closeable ignored = muteTracing()) {
322-
byte[] payload = telemetry.toString().getBytes();
323+
byte[] payload = telemetry.toString().getBytes(StandardCharsets.UTF_8);
323324

324325
Process process = builder.start();
325326
try (OutputStream out = process.getOutputStream()) {

‎dd-java-agent/src/main/java6/datadog/trace/bootstrap/AgentPreCheck.java‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import java.io.PrintStream;
99
import java.lang.instrument.Instrumentation;
1010
import java.lang.reflect.Method;
11+
import java.nio.charset.StandardCharsets;
1112

1213
/** Special lightweight pre-main class that skips installation on incompatible JVMs. */
1314
public class AgentPreCheck {
@@ -189,7 +190,7 @@ public void run() {
189190
OutputStream out = null;
190191
try {
191192
out = process.getOutputStream();
192-
out.write(payload.getBytes());
193+
out.write(payload.getBytes(StandardCharsets.UTF_8));
193194
} finally {
194195
if (out != null) {
195196
out.close();

‎dd-trace-core/src/main/java/datadog/trace/civisibility/writer/ddintake/CiTestCovMapperV2.java‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ private static class PayloadV2 extends Payload {
189189

190190
// backend requires _some_ JSON to be present
191191
private static final RequestBody DUMMY_JSON_BODY =
192-
jsonRequestBodyOf("{\"dummy\":true}".getBytes());
192+
jsonRequestBodyOf("{\"dummy\":true}".getBytes(StandardCharsets.UTF_8));
193193

194194
private final boolean compressionEnabled;
195195

‎dd-trace-core/src/main/java/datadog/trace/core/util/JsonStreamParser.java‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.io.ByteArrayInputStream;
66
import java.io.IOException;
77
import java.io.InputStream;
8+
import java.nio.charset.StandardCharsets;
89
import okio.BufferedSource;
910
import okio.Okio;
1011

@@ -60,7 +61,7 @@ public interface Visitor {
6061
*/
6162
public static boolean tryToParse(String raw, Visitor visitor, PathCursor pathCursor) {
6263
if (raw.startsWith("{") && raw.endsWith("}") || raw.startsWith("[") && raw.endsWith("]")) {
63-
try (InputStream is = new ByteArrayInputStream(raw.getBytes())) {
64+
try (InputStream is = new ByteArrayInputStream(raw.getBytes(StandardCharsets.UTF_8))) {
6465
return tryToParse(is, visitor, pathCursor.copy());
6566
} catch (Exception e) {
6667
visitor.expandValueFailed(pathCursor, e);

‎gradle/forbiddenApiFilters/main.txt‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ java.lang.String#split(java.lang.String,int)
77
java.lang.String#replaceAll(java.lang.String,java.lang.String)
88
java.lang.String#replaceFirst(java.lang.String,java.lang.String)
99

10+
# uses the platform's default charset, which may not be UTF-8
11+
java.lang.String#getBytes()
12+
1013
# can initialize java.util.logging when ACCP is installed, prefer RandomUtils instead
1114
java.util.UUID#randomUUID()
1215

‎internal-api/src/main/java/datadog/trace/api/appsec/AppSecEventTracker.java‎

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
3737
import datadog.trace.bootstrap.instrumentation.api.AgentTracer;
3838
import datadog.trace.bootstrap.instrumentation.api.Tags;
39+
import java.nio.charset.StandardCharsets;
3940
import java.security.MessageDigest;
4041
import java.security.NoSuchAlgorithmException;
4142
import java.util.HashMap;
@@ -374,12 +375,13 @@ protected static String anonymize(final UserIdCollectionMode mode, final String
374375
}
375376
MessageDigest digest;
376377
try {
377-
// TODO avoid lookup a new instance every time
378+
// A new instance is needed each time for thread safety.
379+
// Per micro-benchmarks, the overhead of getInstance() is negligible.
378380
digest = MessageDigest.getInstance("SHA-256");
379381
} catch (NoSuchAlgorithmException e) {
380382
return null;
381383
}
382-
digest.update(userId.getBytes());
384+
digest.update(userId.getBytes(StandardCharsets.UTF_8));
383385
byte[] hash = digest.digest();
384386
if (hash.length > HASH_SIZE_BYTES) {
385387
byte[] temp = new byte[HASH_SIZE_BYTES];

‎internal-api/src/main/java/datadog/trace/api/datastreams/TransactionInfo.java‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import java.nio.ByteBuffer;
44
import java.nio.ByteOrder;
5+
import java.nio.charset.StandardCharsets;
56
import java.util.Map;
67
import java.util.concurrent.ConcurrentHashMap;
78
import java.util.concurrent.atomic.AtomicInteger;
@@ -38,7 +39,7 @@ private int generateCheckpointId(String checkpoint) {
3839
int id = ID_COUNTER.getAndIncrement();
3940

4041
// update cache bytes
41-
byte[] checkpointBytes = checkpoint.getBytes();
42+
byte[] checkpointBytes = checkpoint.getBytes(StandardCharsets.UTF_8);
4243
byte[] bytesToAdd = new byte[checkpointBytes.length + 2];
4344
bytesToAdd[0] = (byte) id;
4445
bytesToAdd[1] = (byte) checkpointBytes.length;
@@ -56,7 +57,7 @@ private static synchronized void appendCacheBytes(byte[] bytes) {
5657
}
5758

5859
public byte[] getBytes() {
59-
byte[] idBytes = id.getBytes();
60+
byte[] idBytes = id.getBytes(StandardCharsets.UTF_8);
6061

6162
// long ids will be truncated
6263
int idLen = Math.min(idBytes.length, MAX_ID_SIZE);

0 commit comments

Comments
 (0)