@@ -452,7 +452,6 @@ public static int encodeNoNewline(byte[] in, byte[] out) {
452452 @ DisableDebugInfo
453453 @ DisableNullChecksAndArrayBoundsChecks
454454 public static int encodeNoNewlineSimd (byte [] in , int inOffset , int inLength , byte [] out , int outOffset , int [] scratch ) {
455- Simd simd = Simd .get ();
456455 int outputLength = ((inLength + 2 ) / 3 ) * 4 ;
457456 if (out .length - outOffset < outputLength ) {
458457 throw new IllegalArgumentException ("Output buffer too small for encoded data" );
@@ -461,56 +460,45 @@ public static int encodeNoNewlineSimd(byte[] in, int inOffset, int inLength, byt
461460 return 0 ;
462461 }
463462 requireScratch (scratch );
464- requireSimdApiArrays (simd , in , out , scratch );
465-
466- final int b0 = 0 ;
467- final int b1 = b0 + SIMD_LANES ;
468- final int b2 = b1 + SIMD_LANES ;
469- final int s0 = b2 + SIMD_LANES ;
470- final int s1 = s0 + SIMD_LANES ;
471- final int s2 = s1 + SIMD_LANES ;
472- final int s3 = s2 + SIMD_LANES ;
473- final int t0 = s3 + SIMD_LANES ;
474- final int t1 = t0 + SIMD_LANES ;
475- final int c3 = t1 + SIMD_LANES ;
476- final int c15 = c3 + SIMD_LANES ;
477- final int c63 = c15 + SIMD_LANES ;
478-
479- for (int lane = 0 ; lane < SIMD_LANES ; lane ++) {
480- scratch [c3 + lane ] = 3 ;
481- scratch [c15 + lane ] = 15 ;
482- scratch [c63 + lane ] = 63 ;
483- }
463+ requireSimdApiArrays (Simd .get (), in , out , scratch );
484464
485465 int end = inOffset + inLength - (inLength % 3 );
486- int simdEnd = end - ((end - inOffset ) % 48 );
487466 int inIndex = inOffset ;
488467 int outIndex = outOffset ;
489- for (; inIndex < simdEnd ; inIndex += 48 ) {
490- for (int lane = 0 ; lane < SIMD_LANES ; lane ++) {
491- int src = inIndex + lane * 3 ;
492- scratch [b0 + lane ] = in [src ] & 0xff ;
493- scratch [b1 + lane ] = in [src + 1 ] & 0xff ;
494- scratch [b2 + lane ] = in [src + 2 ] & 0xff ;
495- }
496-
497- simd .shrLogical (scratch , b0 , 2 , scratch , s0 , SIMD_LANES );
498- simd .and (scratch , b0 , scratch , c3 , scratch , t0 , SIMD_LANES );
499- simd .shl (scratch , t0 , 4 , scratch , t0 , SIMD_LANES );
500- simd .shrLogical (scratch , b1 , 4 , scratch , t1 , SIMD_LANES );
501- simd .or (scratch , t0 , scratch , t1 , scratch , s1 , SIMD_LANES );
502- simd .and (scratch , b1 , scratch , c15 , scratch , t0 , SIMD_LANES );
503- simd .shl (scratch , t0 , 2 , scratch , t0 , SIMD_LANES );
504- simd .shrLogical (scratch , b2 , 6 , scratch , t1 , SIMD_LANES );
505- simd .or (scratch , t0 , scratch , t1 , scratch , s2 , SIMD_LANES );
506- simd .and (scratch , b2 , scratch , c63 , scratch , s3 , SIMD_LANES );
507-
508- for (int lane = 0 ; lane < SIMD_LANES ; lane ++) {
509- out [outIndex ++] = map [scratch [s0 + lane ]];
510- out [outIndex ++] = map [scratch [s1 + lane ]];
511- out [outIndex ++] = map [scratch [s2 + lane ]];
512- out [outIndex ++] = map [scratch [s3 + lane ]];
513- }
468+ int fastEnd = end - 12 ;
469+ for (; inIndex <= fastEnd ; inIndex += 12 ) {
470+ int b0 = in [inIndex ] & 0xff ;
471+ int b1 = in [inIndex + 1 ] & 0xff ;
472+ int b2 = in [inIndex + 2 ] & 0xff ;
473+ int b3 = in [inIndex + 3 ] & 0xff ;
474+ int b4 = in [inIndex + 4 ] & 0xff ;
475+ int b5 = in [inIndex + 5 ] & 0xff ;
476+ int b6 = in [inIndex + 6 ] & 0xff ;
477+ int b7 = in [inIndex + 7 ] & 0xff ;
478+ int b8 = in [inIndex + 8 ] & 0xff ;
479+ int b9 = in [inIndex + 9 ] & 0xff ;
480+ int b10 = in [inIndex + 10 ] & 0xff ;
481+ int b11 = in [inIndex + 11 ] & 0xff ;
482+
483+ out [outIndex ++] = map [b0 >> 2 ];
484+ out [outIndex ++] = map [((b0 & 0x03 ) << 4 ) | (b1 >> 4 )];
485+ out [outIndex ++] = map [((b1 & 0x0f ) << 2 ) | (b2 >> 6 )];
486+ out [outIndex ++] = map [b2 & 0x3f ];
487+
488+ out [outIndex ++] = map [b3 >> 2 ];
489+ out [outIndex ++] = map [((b3 & 0x03 ) << 4 ) | (b4 >> 4 )];
490+ out [outIndex ++] = map [((b4 & 0x0f ) << 2 ) | (b5 >> 6 )];
491+ out [outIndex ++] = map [b5 & 0x3f ];
492+
493+ out [outIndex ++] = map [b6 >> 2 ];
494+ out [outIndex ++] = map [((b6 & 0x03 ) << 4 ) | (b7 >> 4 )];
495+ out [outIndex ++] = map [((b7 & 0x0f ) << 2 ) | (b8 >> 6 )];
496+ out [outIndex ++] = map [b8 & 0x3f ];
497+
498+ out [outIndex ++] = map [b9 >> 2 ];
499+ out [outIndex ++] = map [((b9 & 0x03 ) << 4 ) | (b10 >> 4 )];
500+ out [outIndex ++] = map [((b10 & 0x0f ) << 2 ) | (b11 >> 6 )];
501+ out [outIndex ++] = map [b11 & 0x3f ];
514502 }
515503
516504 for (; inIndex < end ; inIndex += 3 ) {
@@ -586,66 +574,12 @@ public static int decodeNoWhitespaceSimd(byte[] in, int inOffset, int inLength,
586574 }
587575
588576 requireScratch (scratch );
589- Simd simd = Simd .get ();
590- requireSimdApiArrays (simd , in , out , scratch );
591-
592- final int q0 = 0 ;
593- final int q1 = q0 + SIMD_LANES ;
594- final int q2 = q1 + SIMD_LANES ;
595- final int q3 = q2 + SIMD_LANES ;
596- final int o0 = q3 + SIMD_LANES ;
597- final int o1 = o0 + SIMD_LANES ;
598- final int o2 = o1 + SIMD_LANES ;
599- final int t0 = o2 + SIMD_LANES ;
600- final int t1 = t0 + SIMD_LANES ;
601- final int c3 = t1 + SIMD_LANES ;
602- final int c15 = c3 + SIMD_LANES ;
603-
604- for (int lane = 0 ; lane < SIMD_LANES ; lane ++) {
605- scratch [c3 + lane ] = 3 ;
606- scratch [c15 + lane ] = 15 ;
607- }
577+ requireSimdApiArrays (Simd .get (), in , out , scratch );
608578
609579 int fullLen = inLength - (pad > 0 ? 4 : 0 );
610- int simdFullLen = fullLen - ( fullLen % 64 ) ;
580+ int fullEnd = inOffset + fullLen ;
611581 int inIndex = inOffset ;
612582 int outIndex = outOffset ;
613- int endVector = inOffset + simdFullLen ;
614- for (; inIndex < endVector ; inIndex += 64 ) {
615- for (int lane = 0 ; lane < SIMD_LANES ; lane ++) {
616- int src = inIndex + lane * 4 ;
617- int d0 = decodeMapInt [in [src ] & 0xff ];
618- int d1 = decodeMapInt [in [src + 1 ] & 0xff ];
619- int d2 = decodeMapInt [in [src + 2 ] & 0xff ];
620- int d3 = decodeMapInt [in [src + 3 ] & 0xff ];
621- if ((d0 | d1 | d2 | d3 ) < 0 ) {
622- return -1 ;
623- }
624- scratch [q0 + lane ] = d0 ;
625- scratch [q1 + lane ] = d1 ;
626- scratch [q2 + lane ] = d2 ;
627- scratch [q3 + lane ] = d3 ;
628- }
629-
630- simd .shl (scratch , q0 , 2 , scratch , o0 , SIMD_LANES );
631- simd .shrLogical (scratch , q1 , 4 , scratch , t0 , SIMD_LANES );
632- simd .or (scratch , o0 , scratch , t0 , scratch , o0 , SIMD_LANES );
633- simd .and (scratch , q1 , scratch , c15 , scratch , t0 , SIMD_LANES );
634- simd .shl (scratch , t0 , 4 , scratch , t0 , SIMD_LANES );
635- simd .shrLogical (scratch , q2 , 2 , scratch , t1 , SIMD_LANES );
636- simd .or (scratch , t0 , scratch , t1 , scratch , o1 , SIMD_LANES );
637- simd .and (scratch , q2 , scratch , c3 , scratch , t0 , SIMD_LANES );
638- simd .shl (scratch , t0 , 6 , scratch , t0 , SIMD_LANES );
639- simd .or (scratch , t0 , scratch , q3 , scratch , o2 , SIMD_LANES );
640-
641- for (int lane = 0 ; lane < SIMD_LANES ; lane ++) {
642- out [outIndex ++] = (byte )scratch [o0 + lane ];
643- out [outIndex ++] = (byte )scratch [o1 + lane ];
644- out [outIndex ++] = (byte )scratch [o2 + lane ];
645- }
646- }
647-
648- int fullEnd = inOffset + fullLen ;
649583 for (; inIndex < fullEnd ; inIndex += 4 ) {
650584 int c0 = in [inIndex ] & 0xff ;
651585 int c1 = in [inIndex + 1 ] & 0xff ;
0 commit comments