Skip to content

Commit f838ac1

Browse files
committed
Another attempt at improving performance
1 parent 5493419 commit f838ac1

File tree

2 files changed

+53
-57
lines changed

2 files changed

+53
-57
lines changed

CodenameOne/src/com/codename1/util/Base64.java

Lines changed: 34 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ public abstract class Base64 {
3939

4040
private static final byte[] decodeMap = new byte[256];
4141
private static final int[] decodeMapInt = new int[256];
42-
private static final int SIMD_SCRATCH_INTS = 192;
43-
4442
static {
4543
for (int i = 0; i < decodeMap.length; i++) {
4644
decodeMap[i] = (byte) DECODE_INVALID;
@@ -451,18 +449,14 @@ public static int encodeNoNewline(byte[] in, byte[] out) {
451449
@DisableDebugInfo
452450
@DisableNullChecksAndArrayBoundsChecks
453451
public static int encodeNoNewlineSimd(byte[] in, int inOffset, int inLength, byte[] out, int outOffset, int[] scratch) {
454-
if (inOffset == 0 && outOffset == 0 && inLength == in.length) {
455-
requireScratch(scratch);
456-
return encodeNoNewline(in, out);
457-
}
458452
int outputLength = ((inLength + 2) / 3) * 4;
459453
if (out.length - outOffset < outputLength) {
460454
throw new IllegalArgumentException("Output buffer too small for encoded data");
461455
}
462456
if (inLength == 0) {
463457
return 0;
464458
}
465-
requireScratch(scratch);
459+
byte[] mapLocal = map;
466460

467461
int end = inOffset + inLength - (inLength % 3);
468462
int inIndex = inOffset;
@@ -482,52 +476,52 @@ public static int encodeNoNewlineSimd(byte[] in, int inOffset, int inLength, byt
482476
int b10 = in[inIndex + 10] & 0xff;
483477
int b11 = in[inIndex + 11] & 0xff;
484478

485-
out[outIndex++] = map[b0 >> 2];
486-
out[outIndex++] = map[((b0 & 0x03) << 4) | (b1 >> 4)];
487-
out[outIndex++] = map[((b1 & 0x0f) << 2) | (b2 >> 6)];
488-
out[outIndex++] = map[b2 & 0x3f];
479+
out[outIndex++] = mapLocal[b0 >> 2];
480+
out[outIndex++] = mapLocal[((b0 & 0x03) << 4) | (b1 >> 4)];
481+
out[outIndex++] = mapLocal[((b1 & 0x0f) << 2) | (b2 >> 6)];
482+
out[outIndex++] = mapLocal[b2 & 0x3f];
489483

490-
out[outIndex++] = map[b3 >> 2];
491-
out[outIndex++] = map[((b3 & 0x03) << 4) | (b4 >> 4)];
492-
out[outIndex++] = map[((b4 & 0x0f) << 2) | (b5 >> 6)];
493-
out[outIndex++] = map[b5 & 0x3f];
484+
out[outIndex++] = mapLocal[b3 >> 2];
485+
out[outIndex++] = mapLocal[((b3 & 0x03) << 4) | (b4 >> 4)];
486+
out[outIndex++] = mapLocal[((b4 & 0x0f) << 2) | (b5 >> 6)];
487+
out[outIndex++] = mapLocal[b5 & 0x3f];
494488

495-
out[outIndex++] = map[b6 >> 2];
496-
out[outIndex++] = map[((b6 & 0x03) << 4) | (b7 >> 4)];
497-
out[outIndex++] = map[((b7 & 0x0f) << 2) | (b8 >> 6)];
498-
out[outIndex++] = map[b8 & 0x3f];
489+
out[outIndex++] = mapLocal[b6 >> 2];
490+
out[outIndex++] = mapLocal[((b6 & 0x03) << 4) | (b7 >> 4)];
491+
out[outIndex++] = mapLocal[((b7 & 0x0f) << 2) | (b8 >> 6)];
492+
out[outIndex++] = mapLocal[b8 & 0x3f];
499493

500-
out[outIndex++] = map[b9 >> 2];
501-
out[outIndex++] = map[((b9 & 0x03) << 4) | (b10 >> 4)];
502-
out[outIndex++] = map[((b10 & 0x0f) << 2) | (b11 >> 6)];
503-
out[outIndex++] = map[b11 & 0x3f];
494+
out[outIndex++] = mapLocal[b9 >> 2];
495+
out[outIndex++] = mapLocal[((b9 & 0x03) << 4) | (b10 >> 4)];
496+
out[outIndex++] = mapLocal[((b10 & 0x0f) << 2) | (b11 >> 6)];
497+
out[outIndex++] = mapLocal[b11 & 0x3f];
504498
}
505499

506500
for (; inIndex < end; inIndex += 3) {
507501
int x0 = in[inIndex] & 0xff;
508502
int x1 = in[inIndex + 1] & 0xff;
509503
int x2 = in[inIndex + 2] & 0xff;
510-
out[outIndex++] = map[x0 >> 2];
511-
out[outIndex++] = map[((x0 & 0x03) << 4) | (x1 >> 4)];
512-
out[outIndex++] = map[((x1 & 0x0f) << 2) | (x2 >> 6)];
513-
out[outIndex++] = map[x2 & 0x3f];
504+
out[outIndex++] = mapLocal[x0 >> 2];
505+
out[outIndex++] = mapLocal[((x0 & 0x03) << 4) | (x1 >> 4)];
506+
out[outIndex++] = mapLocal[((x1 & 0x0f) << 2) | (x2 >> 6)];
507+
out[outIndex++] = mapLocal[x2 & 0x3f];
514508
}
515509

516510
switch (inOffset + inLength - end) {
517511
case 1: {
518512
int x0 = in[end] & 0xff;
519-
out[outIndex++] = map[x0 >> 2];
520-
out[outIndex++] = map[(x0 & 0x03) << 4];
513+
out[outIndex++] = mapLocal[x0 >> 2];
514+
out[outIndex++] = mapLocal[(x0 & 0x03) << 4];
521515
out[outIndex++] = '=';
522516
out[outIndex++] = '=';
523517
break;
524518
}
525519
case 2: {
526520
int x0 = in[end] & 0xff;
527521
int x1 = in[end + 1] & 0xff;
528-
out[outIndex++] = map[x0 >> 2];
529-
out[outIndex++] = map[((x0 & 0x03) << 4) | (x1 >> 4)];
530-
out[outIndex++] = map[(x1 & 0x0f) << 2];
522+
out[outIndex++] = mapLocal[x0 >> 2];
523+
out[outIndex++] = mapLocal[((x0 & 0x03) << 4) | (x1 >> 4)];
524+
out[outIndex++] = mapLocal[(x1 & 0x0f) << 2];
531525
out[outIndex++] = '=';
532526
break;
533527
}
@@ -554,10 +548,6 @@ public static int encodeNoNewlineSimd(byte[] in, int inOffset, int inLength, byt
554548
@DisableDebugInfo
555549
@DisableNullChecksAndArrayBoundsChecks
556550
public static int decodeNoWhitespaceSimd(byte[] in, int inOffset, int inLength, byte[] out, int outOffset, int[] scratch) {
557-
if (inOffset == 0 && outOffset == 0 && inLength == in.length) {
558-
requireScratch(scratch);
559-
return decodeNoWhitespace(in, inLength, out);
560-
}
561551
if ((inLength & 0x3) != 0) {
562552
return -1;
563553
}
@@ -579,7 +569,7 @@ public static int decodeNoWhitespaceSimd(byte[] in, int inOffset, int inLength,
579569
throw new IllegalArgumentException("Output buffer too small for decoded data");
580570
}
581571

582-
requireScratch(scratch);
572+
int[] decodeMap = decodeMapInt;
583573

584574
int fullLen = inLength - (pad > 0 ? 4 : 0);
585575
int fullEnd = inOffset + fullLen;
@@ -590,10 +580,10 @@ public static int decodeNoWhitespaceSimd(byte[] in, int inOffset, int inLength,
590580
int c1 = in[inIndex + 1] & 0xff;
591581
int c2 = in[inIndex + 2] & 0xff;
592582
int c3v = in[inIndex + 3] & 0xff;
593-
int x0 = decodeMapInt[c0];
594-
int x1 = decodeMapInt[c1];
595-
int x2 = decodeMapInt[c2];
596-
int x3 = decodeMapInt[c3v];
583+
int x0 = decodeMap[c0];
584+
int x1 = decodeMap[c1];
585+
int x2 = decodeMap[c2];
586+
int x3 = decodeMap[c3v];
597587
if ((x0 | x1 | x2 | x3) < 0) {
598588
return -1;
599589
}
@@ -610,8 +600,8 @@ public static int decodeNoWhitespaceSimd(byte[] in, int inOffset, int inLength,
610600
int i = inOffset + inLength - 4;
611601
int c0 = in[i] & 0xff;
612602
int c1 = in[i + 1] & 0xff;
613-
int x0 = decodeMapInt[c0];
614-
int x1 = decodeMapInt[c1];
603+
int x0 = decodeMap[c0];
604+
int x1 = decodeMap[c1];
615605
if ((x0 | x1) < 0) {
616606
return -1;
617607
}
@@ -622,7 +612,7 @@ public static int decodeNoWhitespaceSimd(byte[] in, int inOffset, int inLength,
622612
if (in[i + 3] != '=') {
623613
return -1;
624614
}
625-
int x2 = decodeMapInt[in[i + 2] & 0xff];
615+
int x2 = decodeMap[in[i + 2] & 0xff];
626616
if (x2 < 0) {
627617
return -1;
628618
}
@@ -642,12 +632,6 @@ public static int decodeNoWhitespaceSimd(byte[] in, int len, byte[] out, int[] s
642632
return decodeNoWhitespaceSimd(in, 0, len, out, 0, scratch);
643633
}
644634

645-
private static void requireScratch(int[] scratch) {
646-
if (scratch == null || scratch.length < SIMD_SCRATCH_INTS) {
647-
throw new IllegalArgumentException("scratch must be an int[] allocated with Simd.allocInt(192) or larger");
648-
}
649-
}
650-
651635
private static byte[] allocByteMaybeSimd(int size) {
652636
if (size <= 0) {
653637
return new byte[0];

scripts/hellocodenameone/common/src/main/java/com/codenameone/examples/hellocodenameone/tests/Base64NativePerformanceTest.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,11 @@ public boolean runTest() {
9696

9797
if (!isIos()) {
9898
warmup(nativeBase64, payload, payloadBytes, nativeEncoded, cn1EncodedBytes, cn1DecodedBuffer,
99-
runSimdBenchmark, simdPayloadBytes, simdEncodedBytes, simdDecodedBuffer, simdScratch);
99+
runSimdBenchmark, simdPayloadBytes, simdEncodedBytes, simdDecodedBuffer, simdScratch, encodedLen);
100100
}
101101
if (runSimdBenchmark) {
102102
warmup(nativeBase64, payload, payloadBytes, nativeEncoded, cn1EncodedBytes, cn1DecodedBuffer,
103-
true, simdPayloadBytes, simdEncodedBytes, simdDecodedBuffer, simdScratch);
103+
true, simdPayloadBytes, simdEncodedBytes, simdDecodedBuffer, simdScratch, encodedLen);
104104
}
105105

106106
long nativeEncodeMs = measureNativeEncode(nativeBase64, payload);
@@ -139,15 +139,27 @@ public boolean runTest() {
139139

140140
private static void warmup(Base64Native nativeBase64, String payload, byte[] payloadBytes, String nativeEncoded, byte[] cn1EncodedBytes,
141141
byte[] cn1DecodedBuffer, boolean includeSimd, byte[] simdPayloadBytes, byte[] simdEncodedBytes,
142-
byte[] simdDecodedBuffer, int[] simdScratch) {
142+
byte[] simdDecodedBuffer, int[] simdScratch, int encodedLen) {
143143
for (int i = 0; i < 40; i++) {
144144
nativeBase64.encodeUtf8(payload);
145-
Base64.encodeNoNewline(payloadBytes, cn1EncodedBytes);
145+
int cn1EncodedWritten = Base64.encodeNoNewline(payloadBytes, cn1EncodedBytes);
146+
if (cn1EncodedWritten != encodedLen) {
147+
throw new IllegalStateException("Warmup CN1 encode length mismatch");
148+
}
146149
nativeBase64.decodeToUtf8(nativeEncoded);
147-
Base64.decode(cn1EncodedBytes, cn1DecodedBuffer);
150+
int cn1DecodedWritten = Base64.decode(cn1EncodedBytes, cn1DecodedBuffer);
151+
if (cn1DecodedWritten != payloadBytes.length || !byteArraysEqual(payloadBytes, cn1DecodedBuffer, payloadBytes.length)) {
152+
throw new IllegalStateException("Warmup CN1 decode mismatch");
153+
}
148154
if (includeSimd) {
149-
Base64.encodeNoNewlineSimd(simdPayloadBytes, 0, simdPayloadBytes.length, simdEncodedBytes, 0, simdScratch);
150-
Base64.decodeNoWhitespaceSimd(simdEncodedBytes, 0, simdEncodedBytes.length, simdDecodedBuffer, 0, simdScratch);
155+
int simdEncodedWritten = Base64.encodeNoNewlineSimd(simdPayloadBytes, 0, simdPayloadBytes.length, simdEncodedBytes, 0, simdScratch);
156+
if (simdEncodedWritten != encodedLen || !byteArraysEqual(cn1EncodedBytes, simdEncodedBytes, encodedLen)) {
157+
throw new IllegalStateException("Warmup SIMD encode mismatch");
158+
}
159+
int simdDecodedWritten = Base64.decodeNoWhitespaceSimd(simdEncodedBytes, 0, encodedLen, simdDecodedBuffer, 0, simdScratch);
160+
if (simdDecodedWritten != payloadBytes.length || !byteArraysEqual(payloadBytes, simdDecodedBuffer, payloadBytes.length)) {
161+
throw new IllegalStateException("Warmup SIMD decode mismatch");
162+
}
151163
}
152164
}
153165
}

0 commit comments

Comments
 (0)