@@ -1751,35 +1751,61 @@ def test_basic(self):
17511751 writer .write_utf8 (b'var' , - 1 )
17521752
17531753 # test PyUnicodeWriter_WriteChar()
1754- writer .write_char ('=' )
1754+ writer .write_char (ord ( '=' ) )
17551755
17561756 # test PyUnicodeWriter_WriteSubstring()
17571757 writer .write_substring ("[long]" , 1 , 5 )
1758+ # CRASHES writer.write_substring(NULL, 0, 0)
17581759
17591760 # test PyUnicodeWriter_WriteStr()
17601761 writer .write_str (" value " )
1762+ # CRASHES writer.write_str(NULL)
17611763
17621764 # test PyUnicodeWriter_WriteRepr()
17631765 writer .write_repr ("repr" )
17641766
17651767 self .assertEqual (writer .finish (),
17661768 "var=long value 'repr'" )
17671769
1770+ def test_repr_null (self ):
1771+ writer = self .create_writer (0 )
1772+ writer .write_utf8 (b'var=' , - 1 )
1773+ writer .write_repr (NULL )
1774+ self .assertEqual (writer .finish (),
1775+ "var=<NULL>" )
1776+
1777+ def test_write_char (self ):
1778+ writer = self .create_writer (0 )
1779+ writer .write_char (0 )
1780+ writer .write_char (ord ('$' ))
1781+ writer .write_char (0x20ac )
1782+ writer .write_char (0x10_ffff )
1783+ self .assertRaises (ValueError , writer .write_char , 0x11_0000 )
1784+ self .assertRaises (ValueError , writer .write_char , 0xFFFF_FFFF )
1785+ self .assertEqual (writer .finish (),
1786+ "\0 $\u20AC \U0010FFFF " )
1787+
17681788 def test_utf8 (self ):
17691789 writer = self .create_writer (0 )
17701790 writer .write_utf8 (b"ascii" , - 1 )
1771- writer .write_char ('-' )
1791+ writer .write_char (ord ( '-' ) )
17721792 writer .write_utf8 (b"latin1=\xC3 \xA9 " , - 1 )
1773- writer .write_char ('-' )
1793+ writer .write_char (ord ( '-' ) )
17741794 writer .write_utf8 (b"euro=\xE2 \x82 \xAC " , - 1 )
1775- writer .write_char ('.' )
1795+ writer .write_char (ord ('.' ))
1796+ writer .write_utf8 (NULL , 0 )
1797+ # CRASHES writer.write_utf8(NULL, 1)
1798+ # CRASHES writer.write_utf8(NULL, -1)
17761799 self .assertEqual (writer .finish (),
17771800 "ascii-latin1=\xE9 -euro=\u20AC ." )
17781801
17791802 def test_ascii (self ):
17801803 writer = self .create_writer (0 )
17811804 writer .write_ascii (b"Hello " , - 1 )
17821805 writer .write_ascii (b"" , 0 )
1806+ writer .write_ascii (NULL , 0 )
1807+ # CRASHES writer.write_ascii(NULL, 1)
1808+ # CRASHES writer.write_ascii(NULL, -1)
17831809 writer .write_ascii (b"Python! <truncated>" , 6 )
17841810 self .assertEqual (writer .finish (), "Hello Python" )
17851811
@@ -1796,6 +1822,9 @@ def test_recover_utf8_error(self):
17961822 # write fails with an invalid string
17971823 with self .assertRaises (UnicodeDecodeError ):
17981824 writer .write_utf8 (b"invalid\xFF " , - 1 )
1825+ with self .assertRaises (UnicodeDecodeError ):
1826+ s = "truncated\u20AC " .encode ()
1827+ writer .write_utf8 (s , len (s ) - 1 )
17991828
18001829 # retry write with a valid string
18011830 writer .write_utf8 (b"valid" , - 1 )
@@ -1807,13 +1836,19 @@ def test_decode_utf8(self):
18071836 # test PyUnicodeWriter_DecodeUTF8Stateful()
18081837 writer = self .create_writer (0 )
18091838 writer .decodeutf8stateful (b"ign\xFF ore" , - 1 , b"ignore" )
1810- writer .write_char ('-' )
1839+ writer .write_char (ord ( '-' ) )
18111840 writer .decodeutf8stateful (b"replace\xFF " , - 1 , b"replace" )
1812- writer .write_char ('-' )
1841+ writer .write_char (ord ( '-' ) )
18131842
18141843 # incomplete trailing UTF-8 sequence
18151844 writer .decodeutf8stateful (b"incomplete\xC3 " , - 1 , b"replace" )
18161845
1846+ writer .decodeutf8stateful (NULL , 0 , b"replace" )
1847+ # CRASHES writer.decodeutf8stateful(NULL, 1, b"replace")
1848+ # CRASHES writer.decodeutf8stateful(NULL, -1, b"replace")
1849+ with self .assertRaises (UnicodeDecodeError ):
1850+ writer .decodeutf8stateful (b"default\xFF " , - 1 , NULL )
1851+
18171852 self .assertEqual (writer .finish (),
18181853 "ignore-replace\uFFFD -incomplete\uFFFD " )
18191854
@@ -1824,12 +1859,12 @@ def test_decode_utf8_consumed(self):
18241859 # valid string
18251860 consumed = writer .decodeutf8stateful (b"text" , - 1 , b"strict" , True )
18261861 self .assertEqual (consumed , 4 )
1827- writer .write_char ('-' )
1862+ writer .write_char (ord ( '-' ) )
18281863
18291864 # non-ASCII
18301865 consumed = writer .decodeutf8stateful (b"\xC3 \xA9 -\xE2 \x82 \xAC " , 6 , b"strict" , True )
18311866 self .assertEqual (consumed , 6 )
1832- writer .write_char ('-' )
1867+ writer .write_char (ord ( '-' ) )
18331868
18341869 # invalid UTF-8 (consumed is 0 on error)
18351870 with self .assertRaises (UnicodeDecodeError ):
@@ -1838,54 +1873,92 @@ def test_decode_utf8_consumed(self):
18381873 # ignore error handler
18391874 consumed = writer .decodeutf8stateful (b"more\xFF " , - 1 , b"ignore" , True )
18401875 self .assertEqual (consumed , 5 )
1841- writer .write_char ('-' )
1876+ writer .write_char (ord ( '-' ) )
18421877
18431878 # incomplete trailing UTF-8 sequence
18441879 consumed = writer .decodeutf8stateful (b"incomplete\xC3 " , - 1 , b"ignore" , True )
18451880 self .assertEqual (consumed , 10 )
1881+ writer .write_char (ord ('-' ))
1882+
1883+ consumed = writer .decodeutf8stateful (NULL , 0 , b"replace" , True )
1884+ self .assertEqual (consumed , 0 )
1885+ # CRASHES writer.decodeutf8stateful(NULL, 1, b"replace", True)
1886+ # CRASHES writer.decodeutf8stateful(NULL, -1, b"replace", True)
1887+ consumed = writer .decodeutf8stateful (b"default\xC3 " , - 1 , NULL , True )
1888+ self .assertEqual (consumed , 7 )
18461889
1847- self .assertEqual (writer .finish (), "text-\xE9 -\u20AC -more-incomplete" )
1890+ self .assertEqual (writer .finish (), "text-\xE9 -\u20AC -more-incomplete-default " )
18481891
18491892 def test_widechar (self ):
1893+ from _testcapi import SIZEOF_WCHAR_T
1894+
1895+ if SIZEOF_WCHAR_T == 2 :
1896+ encoding = 'utf-16le' if sys .byteorder == 'little' else 'utf-16be'
1897+ elif SIZEOF_WCHAR_T == 4 :
1898+ encoding = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1899+
18501900 writer = self .create_writer (0 )
1851- writer .write_widechar ("latin1=\xE9 " )
1852- writer .write_widechar ("-" )
1853- writer .write_widechar ("euro=\u20AC " )
1854- writer .write_char ("-" )
1855- writer .write_widechar ("max=\U0010ffff " )
1856- writer .write_char ('.' )
1901+ writer .write_widechar ("latin1=\xE9 " .encode (encoding ))
1902+ writer .write_char (ord ("-" ))
1903+ writer .write_widechar ("euro=\u20AC " .encode (encoding ))
1904+ writer .write_char (ord ("-" ))
1905+ writer .write_widechar ("max=\U0010ffff " .encode (encoding ))
1906+ writer .write_char (ord ("-" ))
1907+ writer .write_widechar ("zeroes=" .encode (encoding ).ljust (SIZEOF_WCHAR_T * 10 , b'\0 ' ),
1908+ 10 )
1909+ writer .write_char (ord ('.' ))
1910+
1911+ if SIZEOF_WCHAR_T == 4 :
1912+ invalid = (b'\x00 \x00 \x11 \x00 ' if sys .byteorder == 'little' else
1913+ b'\x00 \x11 \x00 \x00 ' )
1914+ with self .assertRaises (ValueError ):
1915+ writer .write_widechar ("invalid=" .encode (encoding ) + invalid )
1916+ writer .write_widechar (b'' , - 5 )
1917+ writer .write_widechar (NULL , 0 )
1918+ # CRASHES writer.write_widechar(NULL, 1)
1919+ # CRASHES writer.write_widechar(NULL, -1)
1920+
18571921 self .assertEqual (writer .finish (),
1858- "latin1=\xE9 -euro=\u20AC -max=\U0010ffff ." )
1922+ "latin1=\xE9 -euro=\u20AC -max=\U0010ffff -zeroes= \0 \0 \0 ." )
18591923
18601924 def test_ucs4 (self ):
1925+ encoding = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1926+
18611927 writer = self .create_writer (0 )
1862- writer .write_ucs4 ("ascii IGNORED" , 5 )
1863- writer .write_char ("-" )
1864- writer .write_ucs4 ("latin1=\xe9 " , 8 )
1865- writer .write_char ("-" )
1866- writer .write_ucs4 ("euro=\u20ac " , 6 )
1867- writer .write_char ("-" )
1868- writer .write_ucs4 ("max=\U0010ffff " , 5 )
1869- writer .write_char ("." )
1928+ writer .write_ucs4 ("ascii IGNORED" . encode ( encoding ) , 5 )
1929+ writer .write_char (ord ( "-" ) )
1930+ writer .write_ucs4 ("latin1=\xe9 " . encode ( encoding ) )
1931+ writer .write_char (ord ( "-" ) )
1932+ writer .write_ucs4 ("euro=\u20ac " . encode ( encoding ) )
1933+ writer .write_char (ord ( "-" ) )
1934+ writer .write_ucs4 ("max=\U0010ffff " . encode ( encoding ) )
1935+ writer .write_char (ord ( "." ) )
18701936 self .assertEqual (writer .finish (),
18711937 "ascii-latin1=\xE9 -euro=\u20AC -max=\U0010ffff ." )
18721938
18731939 # Test some special characters
18741940 writer = self .create_writer (0 )
18751941 # Lone surrogate character
1876- writer .write_ucs4 ("lone\uDC80 " , 5 )
1877- writer .write_char ("-" )
1942+ writer .write_ucs4 ("lone\uDC80 " . encode ( encoding , 'surrogatepass' ) )
1943+ writer .write_char (ord ( "-" ) )
18781944 # Surrogate pair
1879- writer .write_ucs4 ("pair\uDBFF \uDFFF " , 5 )
1880- writer .write_char ("-" )
1881- writer .write_ucs4 ("null[\0 ]" , 7 )
1945+ writer .write_ucs4 ("pair\uD83D \uDC0D " .encode (encoding , 'surrogatepass' ))
1946+ writer .write_char (ord ("-" ))
1947+ writer .write_ucs4 ("null[\0 ]" .encode (encoding ), 7 )
1948+ invalid = (b'\x00 \x00 \x11 \x00 ' if sys .byteorder == 'little' else
1949+ b'\x00 \x11 \x00 \x00 ' )
1950+ # CRASHES writer.write_ucs4("invalid".encode(encoding) + invalid)
1951+ writer .write_ucs4 (NULL , 0 )
1952+ # CRASHES writer.write_ucs4(NULL, 1)
18821953 self .assertEqual (writer .finish (),
1883- "lone\udc80 -pair\udbff -null[\0 ]" )
1954+ "lone\udc80 -pair\ud83d \udc0d -null[\x00 ]" )
18841955
18851956 # invalid size
18861957 writer = self .create_writer (0 )
18871958 with self .assertRaises (ValueError ):
1888- writer .write_ucs4 ("text" , - 1 )
1959+ writer .write_ucs4 ("text" .encode (encoding ), - 1 )
1960+ self .assertRaises (ValueError , writer .write_ucs4 , b'' , - 1 )
1961+ self .assertRaises (ValueError , writer .write_ucs4 , NULL , - 1 )
18891962
18901963 def test_substring_empty (self ):
18911964 writer = self .create_writer (0 )
@@ -1911,7 +1984,7 @@ def test_format(self):
19111984 from ctypes import c_int
19121985 writer = self .create_writer (0 )
19131986 self .writer_format (writer , b'%s %i' , b'abc' , c_int (123 ))
1914- writer .write_char ('.' )
1987+ writer .write_char (ord ( '.' ) )
19151988 self .assertEqual (writer .finish (), 'abc 123.' )
19161989
19171990 def test_recover_error (self ):
0 commit comments