11from __future__ import absolute_import, division, unicode_literals
22
3- from . import support # flake8: noqa
3+ from . import support # noqa
4+
45import codecs
56from io import BytesIO
6- import socket
77
88import six
99from six.moves import http_client, urllib
1010
1111from html5lib.inputstream import (BufferedStream, HTMLInputStream,
1212 HTMLUnicodeInputStream, HTMLBinaryInputStream)
1313
14+
1415def test_basic():
1516 s = b"abc"
1617 fp = BufferedStream(BytesIO(s))
1718 read = fp.read(10)
1819 assert read == s
1920
21+
2022def test_read_length():
2123 fp = BufferedStream(BytesIO(b"abcdef"))
2224 read1 = fp.read(1)
@@ -28,17 +30,23 @@ def test_read_length():
2830 read4 = fp.read(4)
2931 assert read4 == b""
3032
33+
3134def test_tell():
3235 fp = BufferedStream(BytesIO(b"abcdef"))
3336 read1 = fp.read(1)
37+ assert read1 == b"a"
3438 assert fp.tell() == 1
3539 read2 = fp.read(2)
40+ assert read2 == b"bc"
3641 assert fp.tell() == 3
3742 read3 = fp.read(3)
43+ assert read3 == b"def"
3844 assert fp.tell() == 6
3945 read4 = fp.read(4)
46+ assert read4 == b""
4047 assert fp.tell() == 6
4148
49+
4250def test_seek():
4351 fp = BufferedStream(BytesIO(b"abcdef"))
4452 read1 = fp.read(1)
@@ -55,20 +63,26 @@ def test_seek():
5563 read5 = fp.read(2)
5664 assert read5 == b"ef"
5765
66+
5867def test_seek_tell():
5968 fp = BufferedStream(BytesIO(b"abcdef"))
6069 read1 = fp.read(1)
70+ assert read1 == b"a"
6171 assert fp.tell() == 1
6272 fp.seek(0)
6373 read2 = fp.read(1)
74+ assert read2 == b"a"
6475 assert fp.tell() == 1
6576 read3 = fp.read(2)
77+ assert read3 == b"bc"
6678 assert fp.tell() == 3
6779 fp.seek(2)
6880 read4 = fp.read(2)
81+ assert read4 == b"cd"
6982 assert fp.tell() == 4
7083 fp.seek(4)
7184 read5 = fp.read(2)
85+ assert read5 == b"ef"
7286 assert fp.tell() == 6
7387
7488
@@ -85,28 +99,33 @@ def test_char_ascii():
8599 assert stream.charEncoding[0].name == 'windows-1252'
86100 assert stream.char() == "'"
87101
102+
88103def test_char_utf8():
89104 stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8')
90105 assert stream.charEncoding[0].name == 'utf-8'
91106 assert stream.char() == '\u2018'
92107
108+
93109def test_char_win1252():
94110 stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
95111 assert stream.charEncoding[0].name == 'windows-1252'
96112 assert stream.char() == "\xa9"
97113 assert stream.char() == "\xf1"
98114 assert stream.char() == "\u2019"
99115
116+
100117def test_bom():
101118 stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
102119 assert stream.charEncoding[0].name == 'utf-8'
103120 assert stream.char() == "'"
104121
122+
105123def test_utf_16():
106124 stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
107125 assert stream.charEncoding[0].name in ['utf-16le', 'utf-16be']
108126 assert len(stream.charsUntil(' ', True)) == 1025
109127
128+
110129def test_newlines():
111130 stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe")
112131 assert stream.position() == (1, 0)
@@ -117,11 +136,13 @@ def test_newlines():
117136 assert stream.charsUntil('e') == "x"
118137 assert stream.position() == (4, 5)
119138
139+
120140def test_newlines2():
121141 size = HTMLUnicodeInputStream._defaultChunkSize
122142 stream = HTMLInputStream("\r" * size + "\n")
123143 assert stream.charsUntil('x') == "\n" * size
124144
145+
125146def test_position():
126147 stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh")
127148 assert stream.position() == (1, 0)
@@ -140,6 +161,7 @@ def test_position():
140161 assert stream.charsUntil('h') == "e\nf\ng"
141162 assert stream.position() == (6, 1)
142163
164+
143165def test_position2():
144166 stream = HTMLUnicodeInputStreamShortChunk("abc\nd")
145167 assert stream.position() == (1, 0)
@@ -154,6 +176,7 @@ def test_position2():
154176 assert stream.char() == "d"
155177 assert stream.position() == (2, 1)
156178
179+
157180def test_python_issue_20007():
158181 """
159182 Make sure we have a work-around for Python bug #20007
@@ -168,6 +191,7 @@ def makefile(self, _mode, _bufsize=None):
168191 stream = HTMLInputStream(source)
169192 assert stream.charsUntil(" ") == "Text"
170193
194+
171195def test_python_issue_20007_b():
172196 """
173197 Make sure we have a work-around for Python bug #20007
0 commit comments