Skip to content

Commit 9894894

Browse files
authored
fix(config): handle corrupted config.toml in credential functions (#481)
* fix(config): handle corrupted config.toml in get_credentials and set_credentials get_credentials() called toml.load() without exception handling, causing a raw TOMLDecodeError to crash any code that imports the runpod package when ~/.runpod/config.toml is malformed. Return None on parse failure, matching the existing pattern in check_credentials(). * fix(test): use module-scoped patches and mock filesystem calls Address review feedback: - Patch os.path.exists via module path for consistency with existing tests - Patch os.makedirs and Path.touch in set_credentials tests for hermeticity * chore: remove broken cleanup-endpoints workflow Fails with 403 on the Runpod GraphQL API. * fix(e2e): replace flash undeploy with direct GraphQL endpoint deletion flash undeploy looks up endpoints in .runpod/resources.pkl which does not exist in CI. Use the Runpod GraphQL API to query endpoints by name and delete them directly. * fix(e2e): make endpoint cleanup non-fatal The RUNPOD_API_KEY in CI lacks GraphQL API access, causing 403 on cleanup. Catch the error so test results are not masked by teardown failures. * fix(e2e): use flash undeploy --all --force for cleanup
1 parent 3fc3df1 commit 9894894

File tree

4 files changed

+81
-136
lines changed

4 files changed

+81
-136
lines changed

.github/workflows/cleanup-endpoints.yml

Lines changed: 0 additions & 110 deletions
This file was deleted.

runpod/cli/groups/config/functions.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,15 @@ def set_credentials(api_key: str, profile: str = "default", overwrite=False) ->
3131
Path(CREDENTIAL_FILE).touch(exist_ok=True)
3232

3333
if not overwrite:
34-
with open(CREDENTIAL_FILE, "rb") as cred_file:
35-
if profile in toml.load(cred_file):
36-
raise ValueError(
37-
"Profile already exists. Use `update_credentials` instead."
38-
)
34+
try:
35+
with open(CREDENTIAL_FILE, "rb") as cred_file:
36+
existing = toml.load(cred_file)
37+
except (TypeError, ValueError):
38+
existing = {}
39+
if profile in existing:
40+
raise ValueError(
41+
"Profile already exists. Use `update_credentials` instead."
42+
)
3943

4044
with open(CREDENTIAL_FILE, "w", encoding="UTF-8") as cred_file:
4145
cred_file.write("[" + profile + "]\n")
@@ -72,12 +76,18 @@ def check_credentials(profile: str = "default"):
7276
def get_credentials(profile="default"):
7377
"""
7478
Returns the credentials for the specified profile from ~/.runpod/config.toml
79+
80+
Returns None if the file does not exist, is not valid TOML, or does not
81+
contain the requested profile.
7582
"""
7683
if not os.path.exists(CREDENTIAL_FILE):
7784
return None
7885

79-
with open(CREDENTIAL_FILE, "rb") as cred_file:
80-
credentials = toml.load(cred_file)
86+
try:
87+
with open(CREDENTIAL_FILE, "rb") as cred_file:
88+
credentials = toml.load(cred_file)
89+
except (TypeError, ValueError):
90+
return None
8191

8292
if profile not in credentials:
8393
return None

tests/e2e/conftest.py

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -57,22 +57,17 @@ def endpoints(require_api_key, test_cases):
5757
log.info("Endpoint ready: name=%s image=%s template.dockerArgs=%s", ep.name, ep.image, ep.template.dockerArgs if ep.template else "N/A")
5858
yield eps
5959

60-
# Undeploy only the endpoints provisioned by this test run.
61-
# Uses by-name undeploy to avoid tearing down unrelated endpoints
62-
# sharing the same API key (parallel CI runs, developer endpoints).
63-
endpoint_names = [ep.name for ep in eps.values()]
64-
log.info("Cleaning up %d provisioned endpoints: %s", len(endpoint_names), endpoint_names)
65-
for name in endpoint_names:
66-
try:
67-
result = subprocess.run(
68-
["flash", "undeploy", name, "--force"],
69-
capture_output=True,
70-
text=True,
71-
timeout=60,
72-
)
73-
if result.returncode == 0:
74-
log.info("Undeployed %s", name)
75-
else:
76-
log.warning("flash undeploy %s failed (rc=%d): %s", name, result.returncode, result.stderr)
77-
except Exception:
78-
log.exception("Failed to undeploy %s", name)
60+
log.info("Cleaning up all provisioned endpoints")
61+
try:
62+
result = subprocess.run(
63+
["flash", "undeploy", "--all", "--force"],
64+
capture_output=True,
65+
text=True,
66+
timeout=120,
67+
)
68+
if result.returncode == 0:
69+
log.info("Undeployed all endpoints")
70+
else:
71+
log.warning("flash undeploy --all --force failed (rc=%d): %s", result.returncode, result.stderr)
72+
except Exception:
73+
log.exception("Failed to undeploy endpoints")

tests/test_cli/test_cli_groups/test_config_functions.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,53 @@ def test_get_credentials_non_existent_profile(
9797
assert result is None
9898
assert mock_open_call.called
9999
assert mock_exists.called
100+
101+
@patch("runpod.cli.groups.config.functions.os.path.exists", return_value=True)
102+
@patch(
103+
"runpod.cli.groups.config.functions.toml.load",
104+
side_effect=ValueError("Invalid value"),
105+
)
106+
@patch("builtins.open", new_callable=mock_open)
107+
def test_get_credentials_corrupted_toml(
108+
self, _mock_open_call, _mock_toml_load, _mock_exists
109+
):
110+
"""get_credentials returns None when config.toml contains invalid TOML."""
111+
result = functions.get_credentials("default")
112+
assert result is None
113+
114+
@patch("runpod.cli.groups.config.functions.os.path.exists", return_value=True)
115+
@patch(
116+
"runpod.cli.groups.config.functions.toml.load",
117+
side_effect=TypeError("bad type"),
118+
)
119+
@patch("builtins.open", new_callable=mock_open)
120+
def test_get_credentials_type_error(
121+
self, _mock_open_call, _mock_toml_load, _mock_exists
122+
):
123+
"""get_credentials returns None on TypeError from corrupted file."""
124+
result = functions.get_credentials("default")
125+
assert result is None
126+
127+
@patch("runpod.cli.groups.config.functions.Path.touch")
128+
@patch("runpod.cli.groups.config.functions.os.makedirs")
129+
@patch("runpod.cli.groups.config.functions.toml.load")
130+
@patch("builtins.open", new_callable=mock_open())
131+
def test_set_credentials_corrupted_toml_allows_overwrite(
132+
self, _mock_file, mock_toml_load, _mock_makedirs, _mock_touch
133+
):
134+
"""set_credentials with overwrite=True ignores corrupted existing file."""
135+
mock_toml_load.side_effect = ValueError("Invalid TOML")
136+
# overwrite=True skips the toml.load check entirely
137+
functions.set_credentials("NEW_KEY", overwrite=True)
138+
139+
@patch("runpod.cli.groups.config.functions.Path.touch")
140+
@patch("runpod.cli.groups.config.functions.os.makedirs")
141+
@patch("runpod.cli.groups.config.functions.toml.load")
142+
@patch("builtins.open", new_callable=mock_open())
143+
def test_set_credentials_corrupted_toml_no_overwrite(
144+
self, _mock_file, mock_toml_load, _mock_makedirs, _mock_touch
145+
):
146+
"""set_credentials without overwrite treats corrupted file as empty."""
147+
mock_toml_load.side_effect = ValueError("Invalid TOML")
148+
# Should not raise — corrupted file is treated as having no profiles
149+
functions.set_credentials("NEW_KEY", overwrite=False)

0 commit comments

Comments
 (0)