Skip to content

Commit c5df3a4

Browse files
authored
Merge pull request #3262 from piyusht-square/add-fetch-server-tests
test(fetch): add unit tests for fetch MCP server
2 parents e6b0b0f + 71d5d71 commit c5df3a4

4 files changed

Lines changed: 1120 additions & 656 deletions

File tree

src/fetch/pyproject.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,8 @@ requires = ["hatchling"]
3333
build-backend = "hatchling.build"
3434

3535
[tool.uv]
36-
dev-dependencies = ["pyright>=1.1.389", "ruff>=0.7.3"]
36+
dev-dependencies = ["pyright>=1.1.389", "ruff>=0.7.3", "pytest>=8.0.0", "pytest-asyncio>=0.21.0"]
37+
38+
[tool.pytest.ini_options]
39+
testpaths = ["tests"]
40+
asyncio_mode = "auto"

src/fetch/tests/__init__.py

Whitespace-only changes.

src/fetch/tests/test_server.py

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
"""Tests for the fetch MCP server."""
2+
3+
import pytest
4+
from unittest.mock import AsyncMock, patch, MagicMock
5+
from mcp.shared.exceptions import McpError
6+
7+
from mcp_server_fetch.server import (
8+
extract_content_from_html,
9+
get_robots_txt_url,
10+
check_may_autonomously_fetch_url,
11+
fetch_url,
12+
DEFAULT_USER_AGENT_AUTONOMOUS,
13+
)
14+
15+
16+
class TestGetRobotsTxtUrl:
17+
"""Tests for get_robots_txt_url function."""
18+
19+
def test_simple_url(self):
20+
"""Test with a simple URL."""
21+
result = get_robots_txt_url("https://example.com/page")
22+
assert result == "https://example.com/robots.txt"
23+
24+
def test_url_with_path(self):
25+
"""Test with URL containing path."""
26+
result = get_robots_txt_url("https://example.com/some/deep/path/page.html")
27+
assert result == "https://example.com/robots.txt"
28+
29+
def test_url_with_query_params(self):
30+
"""Test with URL containing query parameters."""
31+
result = get_robots_txt_url("https://example.com/page?foo=bar&baz=qux")
32+
assert result == "https://example.com/robots.txt"
33+
34+
def test_url_with_port(self):
35+
"""Test with URL containing port number."""
36+
result = get_robots_txt_url("https://example.com:8080/page")
37+
assert result == "https://example.com:8080/robots.txt"
38+
39+
def test_url_with_fragment(self):
40+
"""Test with URL containing fragment."""
41+
result = get_robots_txt_url("https://example.com/page#section")
42+
assert result == "https://example.com/robots.txt"
43+
44+
def test_http_url(self):
45+
"""Test with HTTP URL."""
46+
result = get_robots_txt_url("http://example.com/page")
47+
assert result == "http://example.com/robots.txt"
48+
49+
50+
class TestExtractContentFromHtml:
51+
"""Tests for extract_content_from_html function."""
52+
53+
def test_simple_html(self):
54+
"""Test with simple HTML content."""
55+
html = """
56+
<html>
57+
<head><title>Test Page</title></head>
58+
<body>
59+
<article>
60+
<h1>Hello World</h1>
61+
<p>This is a test paragraph.</p>
62+
</article>
63+
</body>
64+
</html>
65+
"""
66+
result = extract_content_from_html(html)
67+
# readabilipy may extract different parts depending on the content
68+
assert "test paragraph" in result
69+
70+
def test_html_with_links(self):
71+
"""Test that links are converted to markdown."""
72+
html = """
73+
<html>
74+
<body>
75+
<article>
76+
<p>Visit <a href="https://example.com">Example</a> for more.</p>
77+
</article>
78+
</body>
79+
</html>
80+
"""
81+
result = extract_content_from_html(html)
82+
assert "Example" in result
83+
84+
def test_empty_content_returns_error(self):
85+
"""Test that empty/invalid HTML returns error message."""
86+
html = ""
87+
result = extract_content_from_html(html)
88+
assert "<error>" in result
89+
90+
91+
class TestCheckMayAutonomouslyFetchUrl:
92+
"""Tests for check_may_autonomously_fetch_url function."""
93+
94+
@pytest.mark.asyncio
95+
async def test_allows_when_robots_txt_404(self):
96+
"""Test that fetching is allowed when robots.txt returns 404."""
97+
mock_response = MagicMock()
98+
mock_response.status_code = 404
99+
100+
with patch("httpx.AsyncClient") as mock_client_class:
101+
mock_client = AsyncMock()
102+
mock_client.get = AsyncMock(return_value=mock_response)
103+
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
104+
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
105+
106+
# Should not raise
107+
await check_may_autonomously_fetch_url(
108+
"https://example.com/page",
109+
DEFAULT_USER_AGENT_AUTONOMOUS
110+
)
111+
112+
@pytest.mark.asyncio
113+
async def test_blocks_when_robots_txt_401(self):
114+
"""Test that fetching is blocked when robots.txt returns 401."""
115+
mock_response = MagicMock()
116+
mock_response.status_code = 401
117+
118+
with patch("httpx.AsyncClient") as mock_client_class:
119+
mock_client = AsyncMock()
120+
mock_client.get = AsyncMock(return_value=mock_response)
121+
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
122+
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
123+
124+
with pytest.raises(McpError):
125+
await check_may_autonomously_fetch_url(
126+
"https://example.com/page",
127+
DEFAULT_USER_AGENT_AUTONOMOUS
128+
)
129+
130+
@pytest.mark.asyncio
131+
async def test_blocks_when_robots_txt_403(self):
132+
"""Test that fetching is blocked when robots.txt returns 403."""
133+
mock_response = MagicMock()
134+
mock_response.status_code = 403
135+
136+
with patch("httpx.AsyncClient") as mock_client_class:
137+
mock_client = AsyncMock()
138+
mock_client.get = AsyncMock(return_value=mock_response)
139+
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
140+
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
141+
142+
with pytest.raises(McpError):
143+
await check_may_autonomously_fetch_url(
144+
"https://example.com/page",
145+
DEFAULT_USER_AGENT_AUTONOMOUS
146+
)
147+
148+
@pytest.mark.asyncio
149+
async def test_allows_when_robots_txt_allows_all(self):
150+
"""Test that fetching is allowed when robots.txt allows all."""
151+
mock_response = MagicMock()
152+
mock_response.status_code = 200
153+
mock_response.text = "User-agent: *\nAllow: /"
154+
155+
with patch("httpx.AsyncClient") as mock_client_class:
156+
mock_client = AsyncMock()
157+
mock_client.get = AsyncMock(return_value=mock_response)
158+
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
159+
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
160+
161+
# Should not raise
162+
await check_may_autonomously_fetch_url(
163+
"https://example.com/page",
164+
DEFAULT_USER_AGENT_AUTONOMOUS
165+
)
166+
167+
@pytest.mark.asyncio
168+
async def test_blocks_when_robots_txt_disallows_all(self):
169+
"""Test that fetching is blocked when robots.txt disallows all."""
170+
mock_response = MagicMock()
171+
mock_response.status_code = 200
172+
mock_response.text = "User-agent: *\nDisallow: /"
173+
174+
with patch("httpx.AsyncClient") as mock_client_class:
175+
mock_client = AsyncMock()
176+
mock_client.get = AsyncMock(return_value=mock_response)
177+
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
178+
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
179+
180+
with pytest.raises(McpError):
181+
await check_may_autonomously_fetch_url(
182+
"https://example.com/page",
183+
DEFAULT_USER_AGENT_AUTONOMOUS
184+
)
185+
186+
187+
class TestFetchUrl:
188+
"""Tests for fetch_url function."""
189+
190+
@pytest.mark.asyncio
191+
async def test_fetch_html_page(self):
192+
"""Test fetching an HTML page returns markdown content."""
193+
mock_response = MagicMock()
194+
mock_response.status_code = 200
195+
mock_response.text = """
196+
<html>
197+
<body>
198+
<article>
199+
<h1>Test Page</h1>
200+
<p>Hello World</p>
201+
</article>
202+
</body>
203+
</html>
204+
"""
205+
mock_response.headers = {"content-type": "text/html"}
206+
207+
with patch("httpx.AsyncClient") as mock_client_class:
208+
mock_client = AsyncMock()
209+
mock_client.get = AsyncMock(return_value=mock_response)
210+
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
211+
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
212+
213+
content, prefix = await fetch_url(
214+
"https://example.com/page",
215+
DEFAULT_USER_AGENT_AUTONOMOUS
216+
)
217+
218+
# HTML is processed, so we check it returns something
219+
assert isinstance(content, str)
220+
assert prefix == ""
221+
222+
@pytest.mark.asyncio
223+
async def test_fetch_html_page_raw(self):
224+
"""Test fetching an HTML page with raw=True returns original HTML."""
225+
html_content = "<html><body><h1>Test</h1></body></html>"
226+
mock_response = MagicMock()
227+
mock_response.status_code = 200
228+
mock_response.text = html_content
229+
mock_response.headers = {"content-type": "text/html"}
230+
231+
with patch("httpx.AsyncClient") as mock_client_class:
232+
mock_client = AsyncMock()
233+
mock_client.get = AsyncMock(return_value=mock_response)
234+
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
235+
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
236+
237+
content, prefix = await fetch_url(
238+
"https://example.com/page",
239+
DEFAULT_USER_AGENT_AUTONOMOUS,
240+
force_raw=True
241+
)
242+
243+
assert content == html_content
244+
assert "cannot be simplified" in prefix
245+
246+
@pytest.mark.asyncio
247+
async def test_fetch_json_returns_raw(self):
248+
"""Test fetching JSON content returns raw content."""
249+
json_content = '{"key": "value"}'
250+
mock_response = MagicMock()
251+
mock_response.status_code = 200
252+
mock_response.text = json_content
253+
mock_response.headers = {"content-type": "application/json"}
254+
255+
with patch("httpx.AsyncClient") as mock_client_class:
256+
mock_client = AsyncMock()
257+
mock_client.get = AsyncMock(return_value=mock_response)
258+
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
259+
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
260+
261+
content, prefix = await fetch_url(
262+
"https://api.example.com/data",
263+
DEFAULT_USER_AGENT_AUTONOMOUS
264+
)
265+
266+
assert content == json_content
267+
assert "cannot be simplified" in prefix
268+
269+
@pytest.mark.asyncio
270+
async def test_fetch_404_raises_error(self):
271+
"""Test that 404 response raises McpError."""
272+
mock_response = MagicMock()
273+
mock_response.status_code = 404
274+
275+
with patch("httpx.AsyncClient") as mock_client_class:
276+
mock_client = AsyncMock()
277+
mock_client.get = AsyncMock(return_value=mock_response)
278+
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
279+
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
280+
281+
with pytest.raises(McpError):
282+
await fetch_url(
283+
"https://example.com/notfound",
284+
DEFAULT_USER_AGENT_AUTONOMOUS
285+
)
286+
287+
@pytest.mark.asyncio
288+
async def test_fetch_500_raises_error(self):
289+
"""Test that 500 response raises McpError."""
290+
mock_response = MagicMock()
291+
mock_response.status_code = 500
292+
293+
with patch("httpx.AsyncClient") as mock_client_class:
294+
mock_client = AsyncMock()
295+
mock_client.get = AsyncMock(return_value=mock_response)
296+
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
297+
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
298+
299+
with pytest.raises(McpError):
300+
await fetch_url(
301+
"https://example.com/error",
302+
DEFAULT_USER_AGENT_AUTONOMOUS
303+
)
304+
305+
@pytest.mark.asyncio
306+
async def test_fetch_with_proxy(self):
307+
"""Test that proxy URL is passed to client."""
308+
mock_response = MagicMock()
309+
mock_response.status_code = 200
310+
mock_response.text = '{"data": "test"}'
311+
mock_response.headers = {"content-type": "application/json"}
312+
313+
with patch("httpx.AsyncClient") as mock_client_class:
314+
mock_client = AsyncMock()
315+
mock_client.get = AsyncMock(return_value=mock_response)
316+
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
317+
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
318+
319+
await fetch_url(
320+
"https://example.com/data",
321+
DEFAULT_USER_AGENT_AUTONOMOUS,
322+
proxy_url="http://proxy.example.com:8080"
323+
)
324+
325+
# Verify AsyncClient was called with proxy
326+
mock_client_class.assert_called_once_with(proxies="http://proxy.example.com:8080")

0 commit comments

Comments
 (0)