Skip to content

Commit 3d26a8b

Browse files
authored
Span-ify integer parsing (#1559)
* Span-ify integer parsing * Improve performance * Refactor and fix regression
1 parent b3af90d commit 3d26a8b

2 files changed

Lines changed: 102 additions & 150 deletions

File tree

Src/IronPython/Runtime/LiteralParser.cs

Lines changed: 89 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -654,17 +654,19 @@ private static bool ParseInt(string text, int b, out int ret) {
654654
ret = 0;
655655
long m = 1;
656656
for (int i = text.Length - 1; i >= 0; i--) {
657+
var ch = text[i];
658+
657659
// avoid the exception here. Not only is throwing it expensive,
658660
// but loading the resources for it is also expensive
659-
long lret = ret + m * CharValue(text[i], b);
660-
if (Int32.MinValue <= lret && lret <= Int32.MaxValue) {
661+
long lret = ret + m * CharValue(ch, b);
662+
if (int.MinValue <= lret && lret <= int.MaxValue) {
661663
ret = (int)lret;
662664
} else {
663665
return false;
664666
}
665667

666668
m *= b;
667-
if (Int32.MinValue > m || m > Int32.MaxValue) {
669+
if (int.MinValue > m || m > int.MaxValue) {
668670
return false;
669671
}
670672
}
@@ -697,126 +699,119 @@ public static object ParseInteger(string text, int b) {
697699
return ScriptingRuntimeHelpers.Int32ToObject(iret);
698700
}
699701

700-
public static object ParseIntegerSign(string text, int b, int start = 0) {
701-
if (TryParseIntegerSign(text, b, start, out object val))
702-
return val;
703-
704-
throw new ValueErrorException(string.Format("invalid literal for int() with base {0}: {1}", b, StringOps.__repr__(text)));
705-
}
706-
707-
internal static bool TryParseIntegerSign(string text, int b, int start, out object val) {
708-
int end = text.Length, saveb = b, savestart = start;
709-
if (start < 0 || start > end) throw new ArgumentOutOfRangeException(nameof(start));
710-
short sign = 1;
711-
702+
internal static bool TryParseIntegerSign(ReadOnlySpan<char> text, int b, out object val) {
712703
if (b < 0 || b == 1 || b > 36) {
713704
throw new ValueErrorException("int() base must be >= 2 and <= 36, or 0");
714705
}
715706

716-
ParseIntegerStart(text, ref b, ref start, end, ref sign);
707+
text = text.Trim();
717708

718-
if (start < end && char.IsWhiteSpace(text, start)) {
709+
if (TryParseIntegerStart(text, ref b, out int sign, out int consumed)) {
710+
text = text.Slice(consumed);
711+
} else {
719712
val = default;
720713
return false;
721714
}
722715

723-
int ret = 0;
724-
try {
725-
int saveStart = start;
726-
for (; ; ) {
727-
int digit;
728-
if (start >= end) {
729-
if (saveStart == start) {
716+
Debug.Assert(!text.IsEmpty);
717+
718+
long ret = 0;
719+
720+
for (int i = 0; i < text.Length; i++) {
721+
var ch = text[i];
722+
if (!HexValue(ch, out int digit) || !(digit < b)) {
723+
val = default;
724+
return false;
725+
}
726+
727+
ret = ret * b + digit;
728+
729+
if (ret > int.MaxValue) {
730+
BigInteger retBi = ret;
731+
for (i++; i < text.Length; i++) {
732+
ch = text[i];
733+
if (!HexValue(ch, out digit) || !(digit < b)) {
730734
val = default;
731735
return false;
732736
}
733-
break;
734-
}
735-
if (!HexValue(text[start], out digit)) break;
736-
if (!(digit < b)) {
737-
val = default;
738-
return false;
737+
738+
retBi = retBi * b + digit;
739739
}
740740

741-
checked {
742-
// include sign here so that System.Int32.MinValue won't overflow
743-
ret = ret * b + sign * digit;
741+
if (sign < 0) {
742+
if (retBi == (BigInteger)int.MaxValue + 1) {
743+
val = ScriptingRuntimeHelpers.Int32ToObject(int.MinValue);
744+
return true;
745+
}
746+
val = -retBi;
747+
return true;
744748
}
745-
start++;
746-
}
747-
} catch (OverflowException) {
748-
if (TryParseBigIntegerSign(text, saveb, savestart, out var bi)) {
749-
val = bi;
749+
750+
val = retBi;
750751
return true;
751752
}
752-
val = default;
753-
return false;
754-
}
755-
756-
ParseIntegerEnd(text, ref start, ref end);
757-
758-
if (start < end) {
759-
val = default;
760-
return false;
761753
}
762754

763-
val = ScriptingRuntimeHelpers.Int32ToObject(ret);
755+
int res = unchecked((int)ret);
756+
res = sign < 0 ? -res : res;
757+
val = ScriptingRuntimeHelpers.Int32ToObject(res);
764758
return true;
765759
}
766760

767-
private static void ParseIntegerStart(string text, ref int b, ref int start, int end, ref short sign) {
768-
// Skip whitespace
769-
while (start < end && Char.IsWhiteSpace(text, start)) start++;
770-
// Sign?
771-
if (start < end) {
772-
switch (text[start]) {
773-
case '-':
774-
sign = -1;
775-
goto case '+';
776-
case '+':
777-
start++;
778-
break;
779-
}
761+
private static bool TryParseIntegerStart(ReadOnlySpan<char> text, ref int b, out int sign, out int consumed) {
762+
// set defaults
763+
sign = 1;
764+
consumed = 0;
765+
766+
if (text.IsEmpty) return false;
767+
768+
var start = 0;
769+
var end = text.Length;
770+
771+
// assumes a Trim has already been preformed
772+
Debug.Assert(!char.IsWhiteSpace(text[start]));
773+
774+
// sign?
775+
switch (text[start]) {
776+
case '-':
777+
sign = -1;
778+
if (++start >= end) return false;
779+
break;
780+
case '+':
781+
if (++start >= end) return false;
782+
break;
780783
}
781784

782-
// Determine base
785+
// determine base
783786
if (b == 0) {
784-
if (start < end && text[start] == '0') {
785-
// Hex, oct, or bin
786-
if (++start < end) {
787-
switch (text[start]) {
788-
case 'x':
789-
case 'X':
790-
start++;
791-
b = 16;
792-
break;
793-
case 'o':
794-
case 'O':
795-
b = 8;
796-
start++;
797-
break;
798-
case 'b':
799-
case 'B':
800-
start++;
801-
b = 2;
802-
break;
803-
}
804-
}
805-
806-
if (b == 0) {
807-
// Keep the leading zero
808-
start--;
809-
b = 8;
787+
if (start + 1 < end && text[start] == '0') {
788+
char ch = text[++start];
789+
switch (ch) {
790+
case 'x':
791+
case 'X':
792+
b = 16;
793+
break;
794+
case 'o':
795+
case 'O':
796+
b = 8;
797+
break;
798+
case 'b':
799+
case 'B':
800+
b = 2;
801+
break;
802+
default:
803+
b = 1;
804+
consumed = start - 1;
805+
return true;
810806
}
807+
if (++start >= end) return false;
811808
} else {
812809
b = 10;
813810
}
814811
}
815-
}
816812

817-
private static void ParseIntegerEnd(string text, ref int start, ref int end) {
818-
// Skip whitespace
819-
while (start < end && char.IsWhiteSpace(text, start)) start++;
813+
consumed = start;
814+
return true;
820815
}
821816

822817
internal static BigInteger ParseBigInteger(string text, int b) {
@@ -835,7 +830,8 @@ internal static BigInteger ParseBigInteger(string text, int b) {
835830
uint uval = 0;
836831

837832
for (int j = 0; j < groupMax && i >= 0; j++) {
838-
uval = (uint)(CharValue(text[i--], b) * smallMultiplier + uval);
833+
var ch = text[i--];
834+
uval = (uint)(CharValue(ch, b) * smallMultiplier + uval);
839835
smallMultiplier *= b;
840836
}
841837

@@ -847,60 +843,6 @@ internal static BigInteger ParseBigInteger(string text, int b) {
847843
return ret;
848844
}
849845

850-
internal static BigInteger ParseBigIntegerSign(string text, int b, int start = 0) {
851-
if (TryParseBigIntegerSign(text, b, start, out var val))
852-
return val;
853-
854-
throw new ValueErrorException(string.Format("invalid literal for int() with base {0}: {1}", b, StringOps.__repr__(text)));
855-
}
856-
857-
private static bool TryParseBigIntegerSign(string text, int b, int start, out BigInteger val) {
858-
int end = text.Length;
859-
if (start < 0 || start > end) throw new ArgumentOutOfRangeException(nameof(start));
860-
short sign = 1;
861-
862-
if (b < 0 || b == 1 || b > 36) {
863-
throw new ValueErrorException("int() base must be >= 2 and <= 36, or 0");
864-
}
865-
866-
ParseIntegerStart(text, ref b, ref start, end, ref sign);
867-
868-
if (start < end && char.IsWhiteSpace(text, start)) {
869-
val = default;
870-
return false;
871-
}
872-
873-
BigInteger ret = BigInteger.Zero;
874-
int saveStart = start;
875-
for (; ; ) {
876-
int digit;
877-
if (start >= end) {
878-
if (start == saveStart) {
879-
val = default;
880-
return false;
881-
}
882-
break;
883-
}
884-
if (!HexValue(text[start], out digit)) break;
885-
if (!(digit < b)) {
886-
val = default;
887-
return false;
888-
}
889-
ret = ret * b + digit;
890-
start++;
891-
}
892-
893-
ParseIntegerEnd(text, ref start, ref end);
894-
895-
if (start < end) {
896-
val = default;
897-
return false;
898-
}
899-
900-
val = sign < 0 ? -ret : ret;
901-
return true;
902-
}
903-
904846
internal static bool TryParseFloat(string text, out double res, bool replaceUnicode) {
905847
try {
906848
//

Src/IronPython/Runtime/Operations/BigIntegerOps.cs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,17 @@ private static object FastNew(CodeContext/*!*/ context, object? o, int @base = 1
7575
throw new InvalidOperationException(); // unreachable
7676
}
7777
case string s:
78-
return LiteralParser.ParseIntegerSign(s, @base, FindStart(s, @base));
78+
if (LiteralParser.TryParseIntegerSign(s.AsSpan(FindStart(s, @base)), @base, out result)) {
79+
return result;
80+
} else {
81+
throw PythonOps.ValueError($"invalid literal for int() with base {@base}: {PythonOps.Repr(context, s)}");
82+
}
7983
case Extensible<string> es:
80-
return TryInvokeInt(context, o, out result) ? result : LiteralParser.ParseIntegerSign(es.Value, @base, FindStart(es.Value, @base));
84+
if (TryInvokeInt(context, o, out result) || LiteralParser.TryParseIntegerSign(es.Value.AsSpan(FindStart(es.Value, @base)), @base, out result)) {
85+
return result;
86+
} else {
87+
throw PythonOps.ValueError($"invalid literal for int() with base {@base}: {PythonOps.Repr(context, es)}");
88+
}
8189
default:
8290
break;
8391
}
@@ -168,7 +176,7 @@ public static object __new__(CodeContext/*!*/ context, [NotNone] PythonType cls,
168176
?? throw PythonOps.TypeErrorForBadInstance("int() argument must be a string, a bytes-like object or a number, not '{0}'", x);
169177

170178
var text = buf.AsReadOnlySpan().MakeString();
171-
if (!LiteralParser.TryParseIntegerSign(text, @base, FindStart(text, @base), out value))
179+
if (!LiteralParser.TryParseIntegerSign(text.AsSpan(FindStart(text, @base)), @base, out value))
172180
throw PythonOps.ValueError($"invalid literal for int() with base {@base}: {new Bytes(x).__repr__(context)}");
173181
}
174182

@@ -203,6 +211,8 @@ private static object ReturnObject(CodeContext context, PythonType cls, object v
203211
=> cls == TypeCache.BigInteger ? value : cls.CreateInstance(context, value);
204212

205213
private static int FindStart(string s, int radix) {
214+
if (radix == 10) return 0;
215+
206216
int i = 0;
207217

208218
// skip whitespace

0 commit comments

Comments
 (0)