Skip to content

Commit 10b557f

Browse files
1 parent 18fff53 commit 10b557f

3 files changed

Lines changed: 240 additions & 0 deletions

File tree

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
{
2+
"schema_version": "1.4.0",
3+
"id": "GHSA-pf3h-qjgv-vcpr",
4+
"modified": "2026-04-03T21:51:00Z",
5+
"published": "2026-04-03T21:51:00Z",
6+
"aliases": [
7+
"CVE-2026-34753"
8+
],
9+
"summary": "vLLM: Server-Side Request Forgery (SSRF) in `download_bytes_from_url `",
10+
"details": "### Summary\n\nA Server Side Request Forgery (SSRF) vulnerability in `download_bytes_from_url` allows any actor who can control batch input JSON to make the vLLM batch runner issue arbitrary HTTP/HTTPS requests from the server, without any URL validation or domain restrictions.\n\nThis can be used to target internal services (e.g. cloud metadata endpoints or internal HTTP APIs) reachable from the vLLM host.\n\n------\n\n### Details\n\n#### Vulnerable component\n\nThe vulnerable logic is in the batch runner entrypoint `vllm/entrypoints/openai/run_batch.py`, function `download_bytes_from_url`:\n\n```\n# run_batch.py Lines 442-482\nasync def download_bytes_from_url(url: str) -> bytes:\n \"\"\"\n Download data from a URL or decode from a data URL.\n\n Args:\n url: Either an HTTP/HTTPS URL or a data URL (data:...;base64,...)\n\n Returns:\n Data as bytes\n \"\"\"\n parsed = urlparse(url)\n\n # Handle data URLs (base64 encoded)\n if parsed.scheme == \"data\":\n # Format: data:...;base64,<base64_data>\n if \",\" in url:\n header, data = url.split(\",\", 1)\n if \"base64\" in header:\n return base64.b64decode(data)\n else:\n raise ValueError(f\"Unsupported data URL encoding: {header}\")\n else:\n raise ValueError(f\"Invalid data URL format: {url}\")\n\n # Handle HTTP/HTTPS URLs\n elif parsed.scheme in (\"http\", \"https\"):\n async with (\n aiohttp.ClientSession() as session,\n session.get(url) as resp,\n ):\n if resp.status != 200:\n raise Exception(\n f\"Failed to download data from URL: {url}. Status: {resp.status}\"\n )\n return await resp.read()\n\n else:\n raise ValueError(\n f\"Unsupported URL scheme: {parsed.scheme}. \"\n \"Supported schemes: http, https, data\"\n )\n```\n\nKey properties:\n\n- The function only parses the URL to dispatch on the scheme (`data`, `http`, `https`).\n- For `http` / `https`, it directly calls `session.get(url)` on the provided string.\n- There is no validation of:\n - hostname or IP address,\n - whether the target is internal or external,\n - port number,\n - path, query, or redirect target.\n- This is in contrast to the multimodal media path (`MediaConnector`), which implements an explicit domain allowlist. `download_bytes_from_url` does not reuse that protection.\n\n#### URL controllability\n\nThe `url` argument is fully controlled by batch input JSON via the `file_url` field of `BatchTranscriptionRequest` / `BatchTranslationRequest`.\n\n1. Batch request body type:\n\n```\n# run_batch.py Line 67-80\nclass BatchTranscriptionRequest(TranscriptionRequest):\n \"\"\"\n Batch transcription request that uses file_url instead of file.\n\n This class extends TranscriptionRequest but replaces the file field\n with file_url to support batch processing from audio files written in JSON format.\n \"\"\"\n\n file_url: str = Field(\n ...,\n description=(\n \"Either a URL of the audio or a data URL with base64 encoded audio data. \"\n ),\n )\n```\n\n```\n# run_batch.py Line 98-111\nclass BatchTranslationRequest(TranslationRequest):\n \"\"\"\n Batch translation request that uses file_url instead of file.\n\n This class extends TranslationRequest but replaces the file field\n with file_url to support batch processing from audio files written in JSON format.\n \"\"\"\n\n file_url: str = Field(\n ...,\n description=(\n \"Either a URL of the audio or a data URL with base64 encoded audio data. \"\n ),\n )\n```\n\nThere is no restriction on the domain, IP, or port of `file_url` in these models.\n\n1. Batch input is parsed directly from the batch file:\n\n```\n# run_batch.py Line 139-179\nclass BatchRequestInput(OpenAIBaseModel):\n ...\n url: str\n body: BatchRequestInputBody\n @field_validator(\"body\", mode=\"plain\")\n @classmethod\n def check_type_for_url(cls, value: Any, info: ValidationInfo):\n url: str = info.data[\"url\"]\n ...\n if url == \"/v1/audio/transcriptions\":\n return BatchTranscriptionRequest.model_validate(value)\n if url == \"/v1/audio/translations\":\n return BatchTranslationRequest.model_validate(value)\n```\n\n```\n# run_batch.py Line 770-781\n logger.info(\"Reading batch from %s...\", args.input_file)\n\n # Submit all requests in the file to the engine \"concurrently\".\n response_futures: list[Awaitable[BatchRequestOutput]] = []\n for request_json in (await read_file(args.input_file)).strip().split(\"\\n\"):\n # Skip empty lines.\n request_json = request_json.strip()\n if not request_json:\n continue\n\n request = BatchRequestInput.model_validate_json(request_json)\n```\n\nThe batch runner reads each line of the input file (`args.input_file`), parses it as JSON, and constructs a `BatchTranscriptionRequest` / `BatchTranslationRequest`. Whatever `file_url` appears in that JSON line becomes `batch_request_body.file_url`.\n\n1. `file_url` is passed directly into `download_bytes_from_url`:\n\n```\n# run_batch.py Line 610-623\ndef wrapper(handler_fn: Callable):\n async def transcription_wrapper(\n batch_request_body: (BatchTranscriptionRequest | BatchTranslationRequest),\n ) -> (\n TranscriptionResponse\n | TranscriptionResponseVerbose\n | TranslationResponse\n | TranslationResponseVerbose\n | ErrorResponse\n ):\n try:\n # Download data from URL\n audio_data = await download_bytes_from_url(batch_request_body.file_url)\n```\n\nSo the data flow is:\n\n1. Attacker supplies JSON line in the batch input file with arbitrary `body.file_url`.\n2. `BatchRequestInput` / `BatchTranscriptionRequest` / `BatchTranslationRequest` parse that JSON and store `file_url` verbatim.\n3. `make_transcription_wrapper` calls `download_bytes_from_url(batch_request_body.file_url)`.\n4. `download_bytes_from_url`’s HTTP/HTTPS branch issues `aiohttp.ClientSession().get(url)` to that attacker-controlled URL with no further validation.\n\nThis is a classic SSRF pattern: a server-side component makes arbitrary HTTP requests to a URL string taken from untrusted input.\n\n#### Comparison with safer code\n\nThe project already contains a safer URL-handling path for multimodal media in `vllm/multimodal/media/connector.py`, which demonstrates the intent to mitigate SSRF via domain allowlists and URL normalization:\n\n```\n# connector.py Lines 169-189\n def load_from_url(\n self,\n url: str,\n media_io: MediaIO[_M],\n *,\n fetch_timeout: int | None = None,\n ) -> _M: # type: ignore[type-var]\n url_spec = parse_url(url)\n\n if url_spec.scheme and url_spec.scheme.startswith(\"http\"):\n self._assert_url_in_allowed_media_domains(url_spec)\n\n connection = self.connection\n data = connection.get_bytes(\n url_spec.url,\n timeout=fetch_timeout,\n allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,\n )\n\n return media_io.load_bytes(data)\n```\n\nand:\n\n```\n# connector.py Lines 158-167\n def _assert_url_in_allowed_media_domains(self, url_spec: Url) -> None:\n if (\n self.allowed_media_domains\n and url_spec.hostname not in self.allowed_media_domains\n ):\n raise ValueError(\n f\"The URL must be from one of the allowed domains: \"\n f\"{self.allowed_media_domains}. Input URL domain: \"\n f\"{url_spec.hostname}\"\n )\n```\n\n`download_bytes_from_url` does not reuse this allowlist or any equivalent validation, even though it also fetches user-provided URLs.",
11+
"severity": [
12+
{
13+
"type": "CVSS_V3",
14+
"score": "CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:L/I:N/A:L"
15+
}
16+
],
17+
"affected": [
18+
{
19+
"package": {
20+
"ecosystem": "PyPI",
21+
"name": "vllm"
22+
},
23+
"ranges": [
24+
{
25+
"type": "ECOSYSTEM",
26+
"events": [
27+
{
28+
"introduced": "0.16.0"
29+
},
30+
{
31+
"fixed": "0.19.0"
32+
}
33+
]
34+
}
35+
]
36+
}
37+
],
38+
"references": [
39+
{
40+
"type": "WEB",
41+
"url": "https://github.com/vllm-project/vllm/security/advisories/GHSA-pf3h-qjgv-vcpr"
42+
},
43+
{
44+
"type": "WEB",
45+
"url": "https://github.com/vllm-project/vllm/pull/38482"
46+
},
47+
{
48+
"type": "WEB",
49+
"url": "https://github.com/vllm-project/vllm/commit/57861ae48d3493fa48b4d7d830b7ec9f995783e7"
50+
},
51+
{
52+
"type": "PACKAGE",
53+
"url": "https://github.com/vllm-project/vllm"
54+
}
55+
],
56+
"database_specific": {
57+
"cwe_ids": [
58+
"CWE-918"
59+
],
60+
"severity": "MODERATE",
61+
"github_reviewed": true,
62+
"github_reviewed_at": "2026-04-03T21:51:00Z",
63+
"nvd_published_at": null
64+
}
65+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
{
2+
"schema_version": "1.4.0",
3+
"id": "GHSA-pq5c-rjhq-qp7p",
4+
"modified": "2026-04-03T21:51:35Z",
5+
"published": "2026-04-03T21:51:35Z",
6+
"aliases": [
7+
"CVE-2026-34755"
8+
],
9+
"summary": "vLLM: Denial of Service via Unbounded Frame Count in video/jpeg Base64 Processing",
10+
"details": "## Summary\n\nThe `VideoMediaIO.load_base64()` method at `vllm/multimodal/media/video.py:51-62` splits `video/jpeg` data URLs by comma to extract individual JPEG frames, but does not enforce a frame count limit. The `num_frames` parameter (default: 32), which is enforced by the `load_bytes()` code path at line 47-48, is completely bypassed in the `video/jpeg` base64 path. An attacker can send a single API request containing thousands of comma-separated base64-encoded JPEG frames, causing the server to decode all frames into memory and crash with OOM.\n\n## Details\n\n### Vulnerable code\n\n```python\n# video.py:51-62\ndef load_base64(self, media_type: str, data: str) -> tuple[npt.NDArray, dict[str, Any]]:\n if media_type.lower() == \"video/jpeg\":\n load_frame = partial(self.image_io.load_base64, \"image/jpeg\")\n return np.stack(\n [np.asarray(load_frame(frame_data)) for frame_data in data.split(\",\")]\n # ^^^^^^^^^^\n # Unbounded split — no frame count limit\n ), {}\n return self.load_bytes(base64.b64decode(data))\n```\n\nThe `load_bytes()` path (line 47-48) properly delegates to a video loader that respects `self.num_frames` (default 32). The `load_base64(\"video/jpeg\", ...)` path bypasses this limit entirely — `data.split(\",\")` produces an unbounded list and every frame is decoded into a numpy array.\n\n### video/jpeg is part of vLLM's public API\n\n`video/jpeg` is a vLLM-specific MIME type, not IANA-registered. However it is part of the public API surface:\n\n- `encode_video_url()` at `vllm/multimodal/utils.py:96-108` generates `data:video/jpeg;base64,...` URLs\n- Official test suites at `tests/entrypoints/openai/test_video.py:62` and `tests/entrypoints/test_chat_utils.py:153` both use this format\n\n### Memory amplification\n\nEach JPEG frame decodes to a full numpy array. For 640x480 RGB images, each frame is ~921 KB decoded. 5000 frames = ~4.6 GB. `np.stack()` then creates an additional copy. The compressed JPEG payload is small (~100 KB for 5000 frames) but decompresses to gigabytes.\n\n### Data flow\n\n```\nPOST /v1/chat/completions\n → chat_utils.py:1434 video_url type → mm_parser.parse_video()\n → chat_utils.py:872 parse_video() → self._connector.fetch_video()\n → connector.py:295 fetch_video() → load_from_url(url, self.video_io)\n → connector.py:91 _load_data_url(): url_spec.path.split(\",\", 1)\n → media_type = \"video/jpeg\"\n → data = \"<frame1>,<frame2>,...,<frame10000>\"\n → connector.py:100 media_io.load_base64(\"video/jpeg\", data)\n → video.py:54 data.split(\",\") ← UNBOUNDED\n → video.py:55-57 all frames decoded into numpy arrays\n → video.py:56 np.stack([...]) ← massive combined array → OOM\n```\n\n`connector.py:91` uses `split(\",\", 1)` which splits on only the first comma. All remaining commas stay in `data` and are later split by `video.py:54`.\n\n### Comparison with existing protections\n\n| Code Path | Frame Limit | File |\n|-----------|-------------|------|\n| `load_bytes()` (binary video) | Yes — `num_frames` (default 32) | video.py:46-49 |\n| `load_base64(\"video/jpeg\", ...)` | No — unlimited `data.split(\",\")` | video.py:51-62 |",
11+
"severity": [
12+
{
13+
"type": "CVSS_V3",
14+
"score": "CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:N/I:N/A:H"
15+
}
16+
],
17+
"affected": [
18+
{
19+
"package": {
20+
"ecosystem": "PyPI",
21+
"name": "vllm"
22+
},
23+
"ranges": [
24+
{
25+
"type": "ECOSYSTEM",
26+
"events": [
27+
{
28+
"introduced": "0.7.0"
29+
},
30+
{
31+
"fixed": "0.19.0"
32+
}
33+
]
34+
}
35+
]
36+
}
37+
],
38+
"references": [
39+
{
40+
"type": "WEB",
41+
"url": "https://github.com/vllm-project/vllm/security/advisories/GHSA-pq5c-rjhq-qp7p"
42+
},
43+
{
44+
"type": "WEB",
45+
"url": "https://github.com/vllm-project/vllm/pull/38636"
46+
},
47+
{
48+
"type": "WEB",
49+
"url": "https://github.com/vllm-project/vllm/commit/58ee61422169ce17e08248f8efa1e9df434fe395"
50+
},
51+
{
52+
"type": "PACKAGE",
53+
"url": "https://github.com/vllm-project/vllm"
54+
}
55+
],
56+
"database_specific": {
57+
"cwe_ids": [
58+
"CWE-770"
59+
],
60+
"severity": "MODERATE",
61+
"github_reviewed": true,
62+
"github_reviewed_at": "2026-04-03T21:51:35Z",
63+
"nvd_published_at": null
64+
}
65+
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
{
2+
"schema_version": "1.4.0",
3+
"id": "GHSA-vc68-257w-m432",
4+
"modified": "2026-04-03T21:50:15Z",
5+
"published": "2026-04-03T21:50:14Z",
6+
"aliases": [
7+
"CVE-2026-34543"
8+
],
9+
"summary": "OpenEXR: Heap information disclosure in PXR24 decompression via unchecked decompressed size (undo_pxr24_impl)",
10+
"details": "### Summary\nThe PXR24 decompression function undo_pxr24_impl in OpenEXR (internal_pxr24.c) ignores the actual decompressed size (outSize) returned by exr_uncompress_buffer() and instead reads from the scratch buffer based solely on the expected size (uncompressed_size) derived from the header metadata.\n\nAdditionally, exr_uncompress_buffer() (compression.c:202) treats LIBDEFLATE_SHORT_OUTPUT (where the compressed stream decompresses to fewer bytes than expected) as a successful result rather than an error.\n\nWhen these two issues are combined, an attacker can craft a PXR24 EXR file containing a valid but truncated zlib stream. As a result, the decoder reads uninitialized heap memory and incorporates it into the output pixel data.\n\n### Details\nThis issue occurs due to the combination of two flaws.\n\n1. compression.c:202–205 — LIBDEFLATE_SHORT_OUTPUT treated as success\n```\nelse if (res == LIBDEFLATE_SHORT_OUTPUT)\n{\n /* TODO: is this an error? */\n return EXR_ERR_SUCCESS;\n}\n```\nlibdeflate_zlib_decompress_ex() returns LIBDEFLATE_SHORT_OUTPUT when the compressed stream is successfully decompressed but the resulting output size is smaller than the provided output buffer size. In this case, the actual number of decompressed bytes is written to actual_out. However, the function does not treat this condition as an error and instead returns success.\n\n2. internal_pxr24.c:279–287 — outSize return value ignored\n```\nrstat = exr_uncompress_buffer(\n decode->context, compressed_data, comp_buf_size,\n scratch_data, scratch_size, &outSize); // outSize = actual bytes written\n\nif (rstat != EXR_ERR_SUCCESS) return rstat;\n\n// outSize is never referenced afterwards.\n// The loop below reads the entire scratch_data buffer based on\n// uncompressed_size (the header-derived expected size).\nfor (int y = 0; y < decode->chunk.height; ++y) { ... }\n```\nAfter exr_uncompress_buffer() returns success, the code does not verify whether the actual decompressed size (outSize) matches the expected size (uncompressed_size). The subsequent byte-plane reconstruction loop reads from the scratch buffer up to uncompressed_size bytes. As a result, the region between outSize and uncompressed_size consists of uninitialized heap memory, which is then read by the decoder.\n\n**Affected component**\n- src/lib/OpenEXRCore/internal_pxr24.c — undo_pxr24_impl() (line 261–399)\n- src/lib/OpenEXRCore/compression.c — exr_uncompress_buffer() (line 202–205)\n\n### PoC\nPlease refer to the atta\n[poc.zip](https://github.com/user-attachments/files/26002361/poc.zip)\nched archive file and proceed after extracting it.\n\n1. git clone https://github.com/AcademySoftwareFoundation/openexr.git\n2. mv poc openexr/\n3. cd openexr\n4. docker build -f poc/Dockerfile -t pxr24-poc .\n5. docker run --rm pxr24-poc\n\n<img width=\"858\" height=\"155\" alt=\"스크린샷 2026-03-15 오후 4 38 18\" src=\"https://github.com/user-attachments/assets/ded9eab6-9b92-40f7-9a0d-7b00db7e6088\" />\n\n\n### Impact\n* Sensitive information from heap memory may be leaked through the decoded pixel data (information disclosure).\nTrigger Condition: Occurs under default settings; simply reading a malicious EXR file is sufficient to trigger the issue, without any user interaction.",
11+
"severity": [
12+
{
13+
"type": "CVSS_V4",
14+
"score": "CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:N/VC:H/VI:N/VA:N/SC:N/SI:N/SA:N"
15+
}
16+
],
17+
"affected": [
18+
{
19+
"package": {
20+
"ecosystem": "PyPI",
21+
"name": "openexr"
22+
},
23+
"ranges": [
24+
{
25+
"type": "ECOSYSTEM",
26+
"events": [
27+
{
28+
"introduced": "3.4.0"
29+
},
30+
{
31+
"fixed": "3.4.8"
32+
}
33+
]
34+
}
35+
],
36+
"database_specific": {
37+
"last_known_affected_version_range": "<= 3.4.7"
38+
}
39+
},
40+
{
41+
"package": {
42+
"ecosystem": "PyPI",
43+
"name": "openexr"
44+
},
45+
"ranges": [
46+
{
47+
"type": "ECOSYSTEM",
48+
"events": [
49+
{
50+
"introduced": "3.3.0"
51+
},
52+
{
53+
"last_affected": "3.3.8"
54+
}
55+
]
56+
}
57+
]
58+
},
59+
{
60+
"package": {
61+
"ecosystem": "PyPI",
62+
"name": "openexr"
63+
},
64+
"ranges": [
65+
{
66+
"type": "ECOSYSTEM",
67+
"events": [
68+
{
69+
"introduced": "3.2.0"
70+
},
71+
{
72+
"last_affected": "3.2.6"
73+
}
74+
]
75+
}
76+
]
77+
}
78+
],
79+
"references": [
80+
{
81+
"type": "WEB",
82+
"url": "https://github.com/AcademySoftwareFoundation/openexr/security/advisories/GHSA-vc68-257w-m432"
83+
},
84+
{
85+
"type": "ADVISORY",
86+
"url": "https://nvd.nist.gov/vuln/detail/CVE-2026-34543"
87+
},
88+
{
89+
"type": "WEB",
90+
"url": "https://github.com/AcademySoftwareFoundation/openexr/commit/5f6d0aaa9e43802917af7db90f181e88e083d3b8"
91+
},
92+
{
93+
"type": "PACKAGE",
94+
"url": "https://github.com/AcademySoftwareFoundation/openexr"
95+
},
96+
{
97+
"type": "WEB",
98+
"url": "https://github.com/AcademySoftwareFoundation/openexr/releases/tag/v3.4.8"
99+
}
100+
],
101+
"database_specific": {
102+
"cwe_ids": [
103+
"CWE-908"
104+
],
105+
"severity": "HIGH",
106+
"github_reviewed": true,
107+
"github_reviewed_at": "2026-04-03T21:50:14Z",
108+
"nvd_published_at": "2026-04-01T21:17:01Z"
109+
}
110+
}

0 commit comments

Comments
 (0)