Skip to content

Commit a8d349e

Browse files
authored
Perf | Reduce allocations when sending large strings (#4072)
* Introduce shims for Encoding Shim GetByteCount and GetBytes methods, and make sure these are used more widely. Improves performance on netfx and netcore. * Address comments in EncodingExtensions * Add unit tests for EncodingExtensions This also highlighted that passing a null string should throw an exception rather than return 0/[].
1 parent 4df2aae commit a8d349e

File tree

4 files changed

+237
-35
lines changed

4 files changed

+237
-35
lines changed

src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8651,36 +8651,18 @@ private Task WriteEncodingChar(string s, Encoding encoding, TdsParserStateObject
86518651

86528652
private byte[] SerializeEncodingChar(string s, int numChars, int offset, Encoding encoding)
86538653
{
8654-
#if NETFRAMEWORK
8655-
char[] charData;
8656-
byte[] byteData = null;
8657-
86588654
// if hitting 7.0 server, encoding will be null in metadata for columns or return values since
86598655
// 7.0 has no support for multiple code pages in data - single code page support only
8660-
if (encoding == null)
8661-
{
8662-
encoding = _defaultEncoding;
8663-
}
8664-
8665-
charData = s.ToCharArray(offset, numChars);
8666-
8667-
byteData = new byte[encoding.GetByteCount(charData, 0, charData.Length)];
8668-
encoding.GetBytes(charData, 0, charData.Length, byteData, 0);
8656+
encoding ??= _defaultEncoding;
86698657

8670-
return byteData;
8671-
#else
86728658
return encoding.GetBytes(s, offset, numChars);
8673-
#endif
86748659
}
86758660

86768661
private Task WriteEncodingChar(string s, int numChars, int offset, Encoding encoding, TdsParserStateObject stateObj, bool canAccumulate = true)
86778662
{
86788663
// if hitting 7.0 server, encoding will be null in metadata for columns or return values since
86798664
// 7.0 has no support for multiple code pages in data - single code page support only
8680-
if (encoding == null)
8681-
{
8682-
encoding = _defaultEncoding;
8683-
}
8665+
encoding ??= _defaultEncoding;
86848666

86858667
// Optimization: if the entire string fits in the current buffer, then copy it directly
86868668
int bytesLeft = stateObj._outBuff.Length - stateObj._outBytesUsed;
@@ -8692,23 +8674,14 @@ private Task WriteEncodingChar(string s, int numChars, int offset, Encoding enco
86928674
}
86938675
else
86948676
{
8695-
#if NETFRAMEWORK
8696-
char[] charData = s.ToCharArray(offset, numChars);
8697-
byte[] byteData = encoding.GetBytes(charData, 0, numChars);
8698-
Debug.Assert(byteData != null, "no data from encoding");
8699-
return stateObj.WriteByteArray(byteData, byteData.Length, 0, canAccumulate);
8700-
#else
87018677
byte[] byteData = encoding.GetBytes(s, offset, numChars);
87028678
Debug.Assert(byteData != null, "no data from encoding");
87038679
return stateObj.WriteByteArray(byteData, byteData.Length, 0, canAccumulate);
8704-
#endif
87058680
}
87068681
}
87078682

87088683
internal int GetEncodingCharLength(string value, int numChars, int charOffset, Encoding encoding)
87098684
{
8710-
// UNDONE: (PERF) this is an expensive way to get the length. Also, aren't we
8711-
// UNDONE: (PERF) going through these steps twice when we write out a value?
87128685
if (string.IsNullOrEmpty(value))
87138686
{
87148687
return 0;
@@ -8726,9 +8699,7 @@ internal int GetEncodingCharLength(string value, int numChars, int charOffset, E
87268699
encoding = _defaultEncoding;
87278700
}
87288701

8729-
char[] charData = value.ToCharArray(charOffset, numChars);
8730-
8731-
return encoding.GetByteCount(charData, 0, numChars);
8702+
return encoding.GetByteCount(value, charOffset, numChars);
87328703
}
87338704

87348705
//

src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsValueSetter.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -354,8 +354,7 @@ internal void SetString(string value, int offset, int length)
354354
}
355355
else
356356
{
357-
char[] chars = value.ToCharArray(offset, length);
358-
bytes = _stateObj.Parser._defaultEncoding.GetBytes(chars);
357+
bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length);
359358
}
360359
SetBytes(0, bytes, 0, bytes.Length);
361360
SetBytesLength(bytes.Length);
@@ -376,7 +375,7 @@ internal void SetString(string value, int offset, int length)
376375
}
377376
else
378377
{
379-
bytes = _stateObj.Parser._defaultEncoding.GetBytes(value.ToCharArray(offset, length));
378+
bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length);
380379
}
381380
_stateObj.Parser.WriteSqlVariantHeader(9 + bytes.Length, TdsEnums.SQLBIGVARCHAR, 7, _stateObj);
382381
_stateObj.Parser.WriteUnsignedInt(collation._info, _stateObj); // propbytes: collation.Info
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
#if NETFRAMEWORK
6+
7+
using System.Diagnostics;
8+
9+
#nullable enable
10+
11+
namespace System.Text;
12+
13+
internal static class EncodingExtensions
14+
{
15+
public static int GetByteCount(this Encoding encoding, string? s, int offset, int count)
16+
{
17+
if (s is null)
18+
{
19+
throw new ArgumentNullException(nameof(s));
20+
}
21+
22+
ReadOnlySpan<char> slicedString = s.AsSpan(offset, count);
23+
24+
if (slicedString.Length == 0)
25+
{
26+
return 0;
27+
}
28+
29+
unsafe
30+
{
31+
fixed (char* str = slicedString)
32+
{
33+
return encoding.GetByteCount(str, slicedString.Length);
34+
}
35+
}
36+
}
37+
38+
public static byte[] GetBytes(this Encoding encoding, string? s, int index, int count)
39+
{
40+
if (s is null)
41+
{
42+
throw new ArgumentNullException(nameof(s));
43+
}
44+
45+
ReadOnlySpan<char> slicedString = s.AsSpan(index, count);
46+
47+
if (slicedString.Length == 0)
48+
{
49+
return Array.Empty<byte>();
50+
}
51+
52+
unsafe
53+
{
54+
fixed (char* str = slicedString)
55+
{
56+
int byteCount = encoding.GetByteCount(str, slicedString.Length);
57+
byte[] bytes = new byte[byteCount];
58+
59+
fixed (byte* destArray = &bytes[0])
60+
{
61+
int bytesWritten = encoding.GetBytes(str, slicedString.Length, destArray, bytes.Length);
62+
63+
Debug.Assert(bytesWritten == byteCount);
64+
return bytes;
65+
}
66+
}
67+
}
68+
}
69+
}
70+
71+
#endif
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using Xunit;
6+
7+
namespace System.Text.UnitTests;
8+
9+
/// <summary>
10+
/// Tests that the Encoding polyfills in netfx operate correctly and handle
11+
/// invalid parameter values.
12+
/// </summary>
13+
/// <remarks>
14+
/// In the netcore cases, we're testing the built-in GetBytes and GetByteCount
15+
/// methods. The contract for our extension polyfills must match these implementations.
16+
/// </remarks>
17+
public class EncodingTest
18+
{
19+
private const string ExampleStringValue = "ABCDéFG1234567abcdefg";
20+
21+
/// <summary>
22+
/// Represents a series of invalid [offset, count] pairs into the <see cref="ExampleStringValue"/>
23+
/// constant.
24+
/// </summary>
25+
public static TheoryData<int, int> InvalidOffsetsAndCounts =>
26+
new()
27+
{
28+
// Group 1: offset starts before the string.
29+
// * Count extends beyond it.
30+
{ -1, 999 },
31+
// * Count is valid.
32+
{ -1, 5 },
33+
// Group 2: offset is valid.
34+
// * Count extends beyond the end of it.
35+
{ 0, 999 },
36+
// * Count extends backwards to the start it.
37+
{ 5, -5 },
38+
// Group 3: offset starts beyond the end of the string.
39+
// * Count extends beyond the end of it.
40+
{ 999, 999 },
41+
// * Count extends backwards into the string.
42+
{ 999, -1005 }
43+
};
44+
45+
#if NET
46+
static EncodingTest()
47+
{
48+
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
49+
}
50+
#endif
51+
52+
/// <summary>
53+
/// Verifies that GetByteCount throws an ArgumentNullException when passed a null string.
54+
/// </summary>
55+
[Fact]
56+
public void GetByteCount_ThrowsOnNullString()
57+
{
58+
string nullString = null!;
59+
Action act = () => Encoding.Unicode.GetByteCount(nullString, 0, 0);
60+
61+
Assert.Throws<ArgumentNullException>(act);
62+
}
63+
64+
/// <summary>
65+
/// Verifies that GetBytes throws an ArgumentNullException when passed a null string.
66+
/// </summary>
67+
[Fact]
68+
public void GetBytes_ThrowsOnNullString()
69+
{
70+
string nullString = null!;
71+
Action act = () => Encoding.Unicode.GetBytes(nullString, 0, 0);
72+
73+
Assert.Throws<ArgumentNullException>(act);
74+
}
75+
76+
/// <summary>
77+
/// Verifies that GetByteCount throws an ArgumentOutOfRangeException when passes an offset
78+
/// or count which is outside of the string.
79+
/// </summary>
80+
/// <param name="offset">offset parameter of GetByteCount.</param>
81+
/// <param name="count">count parameter of GetByteCount.</param>
82+
/// <seealso cref="InvalidOffsetsAndCounts"/>
83+
[Theory]
84+
[MemberData(nameof(InvalidOffsetsAndCounts))]
85+
public void GetByteCount_ThrowsOnOutOfRangeOffsetOrCount(int offset, int count)
86+
{
87+
Action act = () => Encoding.Unicode.GetByteCount(ExampleStringValue, offset, count);
88+
89+
Assert.Throws<ArgumentOutOfRangeException>(act);
90+
}
91+
92+
/// <summary>
93+
/// Verifies that GetBytes throws an ArgumentOutOfRangeException when passes an offset
94+
/// or count which is outside of the string.
95+
/// </summary>
96+
/// <param name="offset">offset parameter of GetBytes.</param>
97+
/// <param name="count">count parameter of GetBytes.</param>
98+
[Theory]
99+
[MemberData(nameof(InvalidOffsetsAndCounts))]
100+
public void GetBytes_ThrowsOnOutOfRangeOffsetOrCount(int offset, int count)
101+
{
102+
Action act = () => Encoding.Unicode.GetBytes(ExampleStringValue, offset, count);
103+
104+
Assert.Throws<ArgumentOutOfRangeException>(act);
105+
}
106+
107+
/// <summary>
108+
/// Verifies that when using the new GetByteCount and GetBytes polyfills to encode the entire string, the return
109+
/// value is equal to passing the string as-is to GetByteCount(string) and GetBytes(string).
110+
/// </summary>
111+
[Fact]
112+
public void GetBytesOfFullStringByLength_MatchesGetBytesOfFullString()
113+
{
114+
byte[] fullStringBytes = Encoding.Unicode.GetBytes(ExampleStringValue);
115+
int fullStringByteCount = Encoding.Unicode.GetByteCount(ExampleStringValue);
116+
117+
byte[] partialStringBytes = Encoding.Unicode.GetBytes(ExampleStringValue, 0, ExampleStringValue.Length);
118+
int partialStringByteCount = Encoding.Unicode.GetByteCount(ExampleStringValue, 0, ExampleStringValue.Length);
119+
120+
Assert.Equal(fullStringByteCount, partialStringByteCount);
121+
Assert.Equal(fullStringByteCount, partialStringBytes.Length);
122+
Assert.Equal(fullStringBytes, partialStringBytes);
123+
}
124+
125+
/// <summary>
126+
/// Verifies that encoding a specific substring returns a byte array which can be decoded into the same string, in
127+
/// various code pages.
128+
/// </summary>
129+
/// <param name="codePage">The code page identifier to use for transcoding.</param>
130+
[Theory]
131+
// Unicode
132+
[InlineData(1200)]
133+
// UTF8
134+
[InlineData(65001)]
135+
public void GetBytes_Roundtrips(int codePage)
136+
{
137+
Encoding encoding = Encoding.GetEncoding(codePage);
138+
byte[] partialStringBytes = encoding.GetBytes(ExampleStringValue, 4, 5);
139+
string expectedRoundtrippedValue = ExampleStringValue.Substring(4, 5);
140+
string roundtrip = encoding.GetString(partialStringBytes);
141+
142+
Assert.Equal(expectedRoundtrippedValue, roundtrip);
143+
}
144+
145+
/// <summary>
146+
/// Verifies that when a string contains a multibyte character, the byte array returns the correct number of
147+
/// elements for the encoding.
148+
/// </summary>
149+
[Fact]
150+
public void GetByteCount_ReturnsCorrectValueOnMultiCharacterRune()
151+
{
152+
// The character é is two bytes in UTF8.
153+
Assert.Equal(6, Encoding.UTF8.GetByteCount(ExampleStringValue, 4, 5));
154+
155+
// All Unicode characters in our sample string are two bytes long.
156+
Assert.Equal(10, Encoding.Unicode.GetByteCount(ExampleStringValue, 4, 5));
157+
158+
// Code page 1251 does not have the é character, so treats it as the single-byte character "e".
159+
Assert.Equal(5, Encoding.GetEncoding(1251).GetByteCount(ExampleStringValue, 4, 5));
160+
}
161+
}

0 commit comments

Comments
 (0)