Skip to content

Commit d31fb08

Browse files
authored
Merge pull request #84 from python-hyper/strategies
Provide Hypothesis strategies
2 parents 86c7051 + 5762dfd commit d31fb08

8 files changed

Lines changed: 594 additions & 3 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ pip-log.txt
3131

3232
# Testing
3333
/.tox/
34+
/.hypothesis/
3435
nosetests.xml
3536

3637
# Coverage

LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Jean Paul Calderone
55
Adi Roiban
66
Amber Hawkie Brown
77
Mahmoud Hashemi
8+
Wilfredo Sanchez Vega
89

910
and others that have contributed code to the public domain.
1011

MANIFEST.in

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1-
include README.md LICENSE CHANGELOG.md tox.ini pyproject.toml .coveragerc Makefile pytest.ini .tox-coveragerc
2-
exclude TODO.md .appveyor.yml
1+
include README.md LICENSE CHANGELOG.md
2+
include tox.ini pytest.ini .coveragerc
3+
exclude TODO.md
4+
exclude .appveyor.yml
5+
6+
include src/hyperlink/idna-tables-properties.csv.gz
37

48
graft docs
59
prune docs/_build

src/hyperlink/hypothesis.py

Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Hypothesis strategies.
4+
"""
5+
from __future__ import absolute_import
6+
7+
try:
8+
import hypothesis
9+
10+
del hypothesis
11+
except ImportError:
12+
from typing import Tuple
13+
14+
__all__ = () # type: Tuple[str, ...]
15+
else:
16+
from csv import reader as csv_reader
17+
from os.path import dirname, join
18+
from string import ascii_letters, digits
19+
from sys import maxunicode
20+
from typing import (
21+
Callable,
22+
Iterable,
23+
List,
24+
Optional,
25+
Sequence,
26+
Text,
27+
TypeVar,
28+
cast,
29+
)
30+
from gzip import open as open_gzip
31+
32+
from . import DecodedURL, EncodedURL
33+
34+
from hypothesis import assume
35+
from hypothesis.strategies import (
36+
composite,
37+
integers,
38+
lists,
39+
sampled_from,
40+
text,
41+
)
42+
43+
from idna import IDNAError, check_label, encode as idna_encode
44+
45+
__all__ = (
46+
"decoded_urls",
47+
"encoded_urls",
48+
"hostname_labels",
49+
"hostnames",
50+
"idna_text",
51+
"paths",
52+
"port_numbers",
53+
)
54+
55+
T = TypeVar("T")
56+
DrawCallable = Callable[[Callable[..., T]], T]
57+
58+
try:
59+
unichr
60+
except NameError: # Py3
61+
unichr = chr # type: Callable[[int], Text]
62+
63+
def idna_characters():
64+
# type: () -> Text
65+
"""
66+
Returns a string containing IDNA characters.
67+
"""
68+
global _idnaCharacters
69+
70+
if not _idnaCharacters:
71+
result = []
72+
73+
# Data source "IDNA Derived Properties":
74+
# https://www.iana.org/assignments/idna-tables-6.3.0/
75+
# idna-tables-6.3.0.xhtml#idna-tables-properties
76+
dataFileName = join(
77+
dirname(__file__), "idna-tables-properties.csv.gz"
78+
)
79+
with open_gzip(dataFileName) as dataFile:
80+
reader = csv_reader(
81+
(line.decode("utf-8") for line in dataFile), delimiter=",",
82+
)
83+
next(reader) # Skip header row
84+
for row in reader:
85+
codes, prop, description = row
86+
87+
if prop != "PVALID":
88+
# CONTEXTO or CONTEXTJ are also allowed, but they come
89+
# with rules, so we're punting on those here.
90+
# See: https://tools.ietf.org/html/rfc5892
91+
continue
92+
93+
startEnd = row[0].split("-", 1)
94+
if len(startEnd) == 1:
95+
# No end of range given; use start
96+
startEnd.append(startEnd[0])
97+
start, end = (int(i, 16) for i in startEnd)
98+
99+
for i in range(start, end + 1):
100+
if i > maxunicode: # Happens using Py2 on Windows
101+
break
102+
result.append(unichr(i))
103+
104+
_idnaCharacters = u"".join(result)
105+
106+
return _idnaCharacters
107+
108+
_idnaCharacters = "" # type: Text
109+
110+
@composite
111+
def idna_text(draw, min_size=1, max_size=None):
112+
# type: (DrawCallable, int, Optional[int]) -> Text
113+
"""
114+
A strategy which generates IDNA-encodable text.
115+
116+
@param min_size: The minimum number of characters in the text.
117+
C{None} is treated as C{0}.
118+
119+
@param max_size: The maximum number of characters in the text.
120+
Use C{None} for an unbounded size.
121+
"""
122+
alphabet = idna_characters()
123+
124+
assert min_size >= 1
125+
126+
if max_size is not None:
127+
assert max_size >= 1
128+
129+
result = cast(
130+
Text,
131+
draw(text(min_size=min_size, max_size=max_size, alphabet=alphabet)),
132+
)
133+
134+
# FIXME: There should be a more efficient way to ensure we produce
135+
# valid IDNA text.
136+
try:
137+
idna_encode(result)
138+
except IDNAError:
139+
assume(False)
140+
141+
return result
142+
143+
@composite
144+
def port_numbers(draw, allow_zero=False):
145+
# type: (DrawCallable, bool) -> int
146+
"""
147+
A strategy which generates port numbers.
148+
149+
@param allow_zero: Whether to allow port C{0} as a possible value.
150+
"""
151+
if allow_zero:
152+
min_value = 0
153+
else:
154+
min_value = 1
155+
156+
return cast(int, draw(integers(min_value=min_value, max_value=65535)))
157+
158+
@composite
159+
def hostname_labels(draw, allow_idn=True):
160+
# type: (DrawCallable, bool) -> Text
161+
"""
162+
A strategy which generates host name labels.
163+
164+
@param allow_idn: Whether to allow non-ASCII characters as allowed by
165+
internationalized domain names (IDNs).
166+
"""
167+
if allow_idn:
168+
label = cast(Text, draw(idna_text(min_size=1, max_size=63)))
169+
170+
try:
171+
label.encode("ascii")
172+
except UnicodeEncodeError:
173+
# If the label doesn't encode to ASCII, then we need to check
174+
# the length of the label after encoding to punycode and adding
175+
# the xn-- prefix.
176+
while len(label.encode("punycode")) > 63 - len("xn--"):
177+
# Rather than bombing out, just trim from the end until it
178+
# is short enough, so hypothesis doesn't have to generate
179+
# new data.
180+
label = label[:-1]
181+
182+
else:
183+
label = cast(
184+
Text,
185+
draw(
186+
text(
187+
min_size=1,
188+
max_size=63,
189+
alphabet=Text(ascii_letters + digits + u"-"),
190+
)
191+
),
192+
)
193+
194+
# Filter invalid labels.
195+
# It would be better to reliably avoid generation of bogus labels in
196+
# the first place, but it's hard...
197+
try:
198+
check_label(label)
199+
except UnicodeError: # pragma: no cover (not always drawn)
200+
assume(False)
201+
202+
return label
203+
204+
@composite
205+
def hostnames(draw, allow_leading_digit=True, allow_idn=True):
206+
# type: (DrawCallable, bool, bool) -> Text
207+
"""
208+
A strategy which generates host names.
209+
210+
@param allow_leading_digit: Whether to allow a leading digit in host
211+
names; they were not allowed prior to RFC 1123.
212+
213+
@param allow_idn: Whether to allow non-ASCII characters as allowed by
214+
internationalized domain names (IDNs).
215+
"""
216+
# Draw first label, filtering out labels with leading digits if needed
217+
labels = [
218+
cast(
219+
Text,
220+
draw(
221+
hostname_labels(allow_idn=allow_idn).filter(
222+
lambda l: (
223+
True if allow_leading_digit else l[0] not in digits
224+
)
225+
)
226+
),
227+
)
228+
]
229+
# Draw remaining labels
230+
labels += cast(
231+
List[Text],
232+
draw(
233+
lists(
234+
hostname_labels(allow_idn=allow_idn),
235+
min_size=1,
236+
max_size=4,
237+
)
238+
),
239+
)
240+
241+
# Trim off labels until the total host name length fits in 252
242+
# characters. This avoids having to filter the data.
243+
while sum(len(label) for label in labels) + len(labels) - 1 > 252:
244+
labels = labels[:-1]
245+
246+
return u".".join(labels)
247+
248+
def path_characters():
249+
# type: () -> str
250+
"""
251+
Returns a string containing valid URL path characters.
252+
"""
253+
global _path_characters
254+
255+
if _path_characters is None:
256+
257+
def chars():
258+
# type: () -> Iterable[Text]
259+
for i in range(maxunicode):
260+
c = unichr(i)
261+
262+
# Exclude reserved characters
263+
if c in "#/?":
264+
continue
265+
266+
# Exclude anything not UTF-8 compatible
267+
try:
268+
c.encode("utf-8")
269+
except UnicodeEncodeError:
270+
continue
271+
272+
yield c
273+
274+
_path_characters = "".join(chars())
275+
276+
return _path_characters
277+
278+
_path_characters = None # type: Optional[str]
279+
280+
@composite
281+
def paths(draw):
282+
# type: (DrawCallable) -> Sequence[Text]
283+
return cast(
284+
List[Text],
285+
draw(
286+
lists(text(min_size=1, alphabet=path_characters()), max_size=10)
287+
),
288+
)
289+
290+
@composite
291+
def encoded_urls(draw):
292+
# type: (DrawCallable) -> EncodedURL
293+
"""
294+
A strategy which generates L{EncodedURL}s.
295+
Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
296+
protocol-friendly URI.
297+
"""
298+
port = cast(Optional[int], draw(port_numbers(allow_zero=True)))
299+
host = cast(Text, draw(hostnames()))
300+
path = cast(Sequence[Text], draw(paths()))
301+
302+
if port == 0:
303+
port = None
304+
305+
return EncodedURL(
306+
scheme=cast(Text, draw(sampled_from((u"http", u"https")))),
307+
host=host,
308+
port=port,
309+
path=path,
310+
)
311+
312+
@composite
313+
def decoded_urls(draw):
314+
# type: (DrawCallable) -> DecodedURL
315+
"""
316+
A strategy which generates L{DecodedURL}s.
317+
Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
318+
protocol-friendly URI.
319+
"""
320+
return DecodedURL(draw(encoded_urls()))
25 KB
Binary file not shown.

src/hyperlink/test/__init__.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Tests for hyperlink
4+
"""
5+
6+
__all = ()
7+
8+
9+
def _init_hypothesis():
10+
# type: () -> None
11+
from os import environ
12+
13+
if "CI" in environ:
14+
try:
15+
from hypothesis import HealthCheck, settings
16+
except ImportError:
17+
return
18+
19+
settings.register_profile(
20+
"patience",
21+
settings(
22+
suppress_health_check=[
23+
HealthCheck.too_slow,
24+
HealthCheck.filter_too_much,
25+
]
26+
),
27+
)
28+
settings.load_profile("patience")
29+
30+
31+
_init_hypothesis()

0 commit comments

Comments
 (0)