Perf | Reduce allocations when sending large strings (#4072)

edwardneal · web-flow · commit a8d349e7baeb · 2026-04-16T22:56:41.000Z
* Introduce shims for Encoding

Shim GetByteCount and GetBytes methods, and make sure these are used more widely.
Improves performance on netfx and netcore.

* Address comments in EncodingExtensions

* Add unit tests for EncodingExtensions

This also highlighted that passing a null string should throw an exception rather than return 0/[].
diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs
@@ -8651,36 +8651,18 @@ private Task WriteEncodingChar(string s, Encoding encoding, TdsParserStateObject
 
         private byte[] SerializeEncodingChar(string s, int numChars, int offset, Encoding encoding)
         {
-#if NETFRAMEWORK
-            char[] charData;
-            byte[] byteData = null;
-
             // if hitting 7.0 server, encoding will be null in metadata for columns or return values since
             // 7.0 has no support for multiple code pages in data - single code page support only
-            if (encoding == null)
-            {
-                encoding = _defaultEncoding;
-            }
-
-            charData = s.ToCharArray(offset, numChars);
-
-            byteData = new byte[encoding.GetByteCount(charData, 0, charData.Length)];
-            encoding.GetBytes(charData, 0, charData.Length, byteData, 0);
+            encoding ??= _defaultEncoding;
 
-            return byteData;
-#else
             return encoding.GetBytes(s, offset, numChars);
-#endif
         }
 
         private Task WriteEncodingChar(string s, int numChars, int offset, Encoding encoding, TdsParserStateObject stateObj, bool canAccumulate = true)
         {
             // if hitting 7.0 server, encoding will be null in metadata for columns or return values since
             // 7.0 has no support for multiple code pages in data - single code page support only
-            if (encoding == null)
-            {
-                encoding = _defaultEncoding;
-            }
+            encoding ??= _defaultEncoding;
 
             // Optimization: if the entire string fits in the current buffer, then copy it directly
             int bytesLeft = stateObj._outBuff.Length - stateObj._outBytesUsed;
@@ -8692,23 +8674,14 @@ private Task WriteEncodingChar(string s, int numChars, int offset, Encoding enco
             }
             else
             {
-#if NETFRAMEWORK
-                char[] charData = s.ToCharArray(offset, numChars);
-                byte[] byteData = encoding.GetBytes(charData, 0, numChars);
-                Debug.Assert(byteData != null, "no data from encoding");
-                return stateObj.WriteByteArray(byteData, byteData.Length, 0, canAccumulate);
-#else
                 byte[] byteData = encoding.GetBytes(s, offset, numChars);
                 Debug.Assert(byteData != null, "no data from encoding");
                 return stateObj.WriteByteArray(byteData, byteData.Length, 0, canAccumulate);
-#endif
             }
         }
 
         internal int GetEncodingCharLength(string value, int numChars, int charOffset, Encoding encoding)
         {
-            // UNDONE: (PERF) this is an expensive way to get the length.  Also, aren't we
-            // UNDONE: (PERF) going through these steps twice when we write out a value?
             if (string.IsNullOrEmpty(value))
             {
                 return 0;
@@ -8726,9 +8699,7 @@ internal int GetEncodingCharLength(string value, int numChars, int charOffset, E
                 encoding = _defaultEncoding;
             }
 
-            char[] charData = value.ToCharArray(charOffset, numChars);
-
-            return encoding.GetByteCount(charData, 0, numChars);
+            return encoding.GetByteCount(value, charOffset, numChars);
         }
 
         //
diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsValueSetter.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsValueSetter.cs
@@ -354,8 +354,7 @@ internal void SetString(string value, int offset, int length)
                 }
                 else
                 {
-                    char[] chars = value.ToCharArray(offset, length);
-                    bytes = _stateObj.Parser._defaultEncoding.GetBytes(chars);
+                    bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length);
                 }
                 SetBytes(0, bytes, 0, bytes.Length);
                 SetBytesLength(bytes.Length);
@@ -376,7 +375,7 @@ internal void SetString(string value, int offset, int length)
                     }
                     else
                     {
-                        bytes = _stateObj.Parser._defaultEncoding.GetBytes(value.ToCharArray(offset, length));
+                        bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length);
                     }
                     _stateObj.Parser.WriteSqlVariantHeader(9 + bytes.Length, TdsEnums.SQLBIGVARCHAR, 7, _stateObj);
                     _stateObj.Parser.WriteUnsignedInt(collation._info, _stateObj); // propbytes: collation.Info
diff --git a/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs b/src/Microsoft.Data.SqlClient/src/System/Text/EncodingExtensions.netfx.cs
@@ -0,0 +1,71 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if NETFRAMEWORK
+
+using System.Diagnostics;
+
+#nullable enable
+
+namespace System.Text;
+
+internal static class EncodingExtensions
+{
+    public static int GetByteCount(this Encoding encoding, string? s, int offset, int count)
+    {
+        if (s is null)
+        {
+            throw new ArgumentNullException(nameof(s));
+        }
+
+        ReadOnlySpan<char> slicedString = s.AsSpan(offset, count);
+
+        if (slicedString.Length == 0)
+        {
+            return 0;
+        }
+
+        unsafe
+        {
+            fixed (char* str = slicedString)
+            {
+                return encoding.GetByteCount(str, slicedString.Length);
+            }
+        }
+    }
+
+    public static byte[] GetBytes(this Encoding encoding, string? s, int index, int count)
+    {
+        if (s is null)
+        {
+            throw new ArgumentNullException(nameof(s));
+        }
+
+        ReadOnlySpan<char> slicedString = s.AsSpan(index, count);
+
+        if (slicedString.Length == 0)
+        {
+            return Array.Empty<byte>();
+        }
+
+        unsafe
+        {
+            fixed (char* str = slicedString)
+            {
+                int byteCount = encoding.GetByteCount(str, slicedString.Length);
+                byte[] bytes = new byte[byteCount];
+
+                fixed (byte* destArray = &bytes[0])
+                {
+                    int bytesWritten = encoding.GetBytes(str, slicedString.Length, destArray, bytes.Length);
+
+                    Debug.Assert(bytesWritten == byteCount);
+                    return bytes;
+                }
+            }
+        }
+    }
+}
+
+#endif
diff --git a/src/Microsoft.Data.SqlClient/tests/UnitTests/System/Text/EncodingTest.cs b/src/Microsoft.Data.SqlClient/tests/UnitTests/System/Text/EncodingTest.cs
@@ -0,0 +1,161 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Xunit;
+
+namespace System.Text.UnitTests;
+
+/// <summary>
+/// Tests that the Encoding polyfills in netfx operate correctly and handle
+/// invalid parameter values.
+/// </summary>
+/// <remarks>
+/// In the netcore cases, we're testing the built-in GetBytes and GetByteCount
+/// methods. The contract for our extension polyfills must match these implementations.
+/// </remarks>
+public class EncodingTest
+{
+    private const string ExampleStringValue = "ABCDéFG1234567abcdefg";
+
+    /// <summary>
+    /// Represents a series of invalid [offset, count] pairs into the <see cref="ExampleStringValue"/>
+    /// constant.
+    /// </summary>
+    public static TheoryData<int, int> InvalidOffsetsAndCounts =>
+        new()
+        {
+            // Group 1: offset starts before the string.
+            // * Count extends beyond it.
+            { -1, 999 },
+            // * Count is valid.
+            { -1, 5 },
+            // Group 2: offset is valid.
+            // * Count extends beyond the end of it.
+            { 0, 999 },
+            // * Count extends backwards to the start it.
+            { 5, -5 },
+            // Group 3: offset starts beyond the end of the string.
+            // * Count extends beyond the end of it.
+            { 999, 999 },
+            // * Count extends backwards into the string.
+            { 999, -1005 }
+        };
+
+    #if NET
+    static EncodingTest()
+    {
+        Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
+    }
+#endif
+
+    /// <summary>
+    /// Verifies that GetByteCount throws an ArgumentNullException when passed a null string.
+    /// </summary>
+    [Fact]
+    public void GetByteCount_ThrowsOnNullString()
+    {
+        string nullString = null!;
+        Action act = () => Encoding.Unicode.GetByteCount(nullString, 0, 0);
+
+        Assert.Throws<ArgumentNullException>(act);
+    }
+
+    /// <summary>
+    /// Verifies that GetBytes throws an ArgumentNullException when passed a null string.
+    /// </summary>
+    [Fact]
+    public void GetBytes_ThrowsOnNullString()
+    {
+        string nullString = null!;
+        Action act = () => Encoding.Unicode.GetBytes(nullString, 0, 0);
+
+        Assert.Throws<ArgumentNullException>(act);
+    }
+
+    /// <summary>
+    /// Verifies that GetByteCount throws an ArgumentOutOfRangeException when passes an offset
+    /// or count which is outside of the string.
+    /// </summary>
+    /// <param name="offset">offset parameter of GetByteCount.</param>
+    /// <param name="count">count parameter of GetByteCount.</param>
+    /// <seealso cref="InvalidOffsetsAndCounts"/>
+    [Theory]
+    [MemberData(nameof(InvalidOffsetsAndCounts))]
+    public void GetByteCount_ThrowsOnOutOfRangeOffsetOrCount(int offset, int count)
+    {
+        Action act = () => Encoding.Unicode.GetByteCount(ExampleStringValue, offset, count);
+
+        Assert.Throws<ArgumentOutOfRangeException>(act);
+    }
+
+    /// <summary>
+    /// Verifies that GetBytes throws an ArgumentOutOfRangeException when passes an offset
+    /// or count which is outside of the string.
+    /// </summary>
+    /// <param name="offset">offset parameter of GetBytes.</param>
+    /// <param name="count">count parameter of GetBytes.</param>
+    [Theory]
+    [MemberData(nameof(InvalidOffsetsAndCounts))]
+    public void GetBytes_ThrowsOnOutOfRangeOffsetOrCount(int offset, int count)
+    {
+        Action act = () => Encoding.Unicode.GetBytes(ExampleStringValue, offset, count);
+
+        Assert.Throws<ArgumentOutOfRangeException>(act);
+    }
+
+    /// <summary>
+    /// Verifies that when using the new GetByteCount and GetBytes polyfills to encode the entire string, the return
+    /// value is equal to passing the string as-is to GetByteCount(string) and GetBytes(string).
+    /// </summary>
+    [Fact]
+    public void GetBytesOfFullStringByLength_MatchesGetBytesOfFullString()
+    {
+        byte[] fullStringBytes = Encoding.Unicode.GetBytes(ExampleStringValue);
+        int fullStringByteCount = Encoding.Unicode.GetByteCount(ExampleStringValue);
+
+        byte[] partialStringBytes = Encoding.Unicode.GetBytes(ExampleStringValue, 0, ExampleStringValue.Length);
+        int partialStringByteCount = Encoding.Unicode.GetByteCount(ExampleStringValue, 0, ExampleStringValue.Length);
+
+        Assert.Equal(fullStringByteCount, partialStringByteCount);
+        Assert.Equal(fullStringByteCount, partialStringBytes.Length);
+        Assert.Equal(fullStringBytes, partialStringBytes);
+    }
+
+    /// <summary>
+    /// Verifies that encoding a specific substring returns a byte array which can be decoded into the same string, in
+    /// various code pages.
+    /// </summary>
+    /// <param name="codePage">The code page identifier to use for transcoding.</param>
+    [Theory]
+    // Unicode
+    [InlineData(1200)]
+    // UTF8
+    [InlineData(65001)]
+    public void GetBytes_Roundtrips(int codePage)
+    {
+        Encoding encoding = Encoding.GetEncoding(codePage);
+        byte[] partialStringBytes = encoding.GetBytes(ExampleStringValue, 4, 5);
+        string expectedRoundtrippedValue = ExampleStringValue.Substring(4, 5);
+        string roundtrip = encoding.GetString(partialStringBytes);
+
+        Assert.Equal(expectedRoundtrippedValue, roundtrip);
+    }
+
+    /// <summary>
+    /// Verifies that when a string contains a multibyte character, the byte array returns the correct number of
+    /// elements for the encoding.
+    /// </summary>
+    [Fact]
+    public void GetByteCount_ReturnsCorrectValueOnMultiCharacterRune()
+    {
+        // The character é is two bytes in UTF8.
+        Assert.Equal(6, Encoding.UTF8.GetByteCount(ExampleStringValue, 4, 5));
+
+        // All Unicode characters in our sample string are two bytes long.
+        Assert.Equal(10, Encoding.Unicode.GetByteCount(ExampleStringValue, 4, 5));
+
+        // Code page 1251 does not have the é character, so treats it as the single-byte character "e".
+        Assert.Equal(5, Encoding.GetEncoding(1251).GetByteCount(ExampleStringValue, 4, 5));
+    }
+}

Original file line number	Diff line number	Diff line change
`@@ -354,8 +354,7 @@ internal void SetString(string value, int offset, int length)`
`354`	`354`	`}`
`355`	`355`	`else`
`356`	`356`	`{`
`357`		`- char[] chars = value.ToCharArray(offset, length);`
`358`		`- bytes = _stateObj.Parser._defaultEncoding.GetBytes(chars);`
	`357`	`+ bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length);`
`359`	`358`	`}`
`360`	`359`	`SetBytes(0, bytes, 0, bytes.Length);`
`361`	`360`	`SetBytesLength(bytes.Length);`
`@@ -376,7 +375,7 @@ internal void SetString(string value, int offset, int length)`
`376`	`375`	`}`
`377`	`376`	`else`
`378`	`377`	`{`
`379`		`- bytes = _stateObj.Parser._defaultEncoding.GetBytes(value.ToCharArray(offset, length));`
	`378`	`+ bytes = _stateObj.Parser._defaultEncoding.GetBytes(value, offset, length);`
`380`	`379`	`}`
`381`	`380`	`_stateObj.Parser.WriteSqlVariantHeader(9 + bytes.Length, TdsEnums.SQLBIGVARCHAR, 7, _stateObj);`
`382`	`381`	`_stateObj.Parser.WriteUnsignedInt(collation._info, _stateObj); // propbytes: collation.Info`