Skip to content

Commit 4f61fa1

Browse files
committed
Fix scan
1 parent 6a4ece4 commit 4f61fa1

4 files changed

Lines changed: 238 additions & 8 deletions

File tree

README.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,9 @@ After modifying the `entrypoint.py` as needed, using any dependencies you add in
7777
4. The SDK automatically packages all dependencies when you run `datacustomcode zip`
7878

7979
```zsh
80+
cd my_package
8081
datacustomcode scan ./payload/entrypoint.py
82+
datacustomcode zip --path ./payload
8183
datacustomcode deploy --path ./payload --name my_custom_script --cpu-size CPU_L
8284
```
8385

@@ -183,7 +185,7 @@ Options:
183185

184186

185187
#### `datacustomcode init`
186-
Initialize a new development environment with a template.
188+
Initialize a new development environment with a code package template.
187189

188190
Argument:
189191
- `DIRECTORY`: Directory to create project in (default: ".")
@@ -213,19 +215,19 @@ Options:
213215

214216

215217
#### `datacustomcode zip`
216-
Zip a transformation job in preparation to upload to Data Cloud.
218+
Zip a transformation job in preparation to upload to Data Cloud. Make sure to change directory into your code package folder (e.g., `my_package`) before running this command.
217219

218220
Options:
219-
- `--path TEXT`: Path to the code directory (default: ".")
221+
- `--path TEXT`: Path to the code directory i.e. the payload folder (default: ".")
220222
- `--network TEXT`: docker network (default: "default")
221223

222224

223225
#### `datacustomcode deploy`
224-
Deploy a transformation job to Data Cloud.
226+
Deploy a transformation job to Data Cloud. Note that this command takes care of creating a zip file from provided path before deployment. Make sure to change directory into your code package folder (e.g., `my_package`) before running this command.
225227

226228
Options:
227229
- `--profile TEXT`: Credential profile name (default: "default")
228-
- `--path TEXT`: Path to the code directory (default: ".")
230+
- `--path TEXT`: Path to the code directory i.e. the payload folder (default: ".")
229231
- `--name TEXT`: Name of the transformation job [required]
230232
- `--version TEXT`: Version of the transformation job (default: "0.0.1")
231233
- `--description TEXT`: Description of the transformation job (default: "")

src/datacustomcode/deploy.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,29 @@ def _make_api_call(
9191
logger.debug(f"Request params: {kwargs}")
9292

9393
response = requests.request(method=method, url=url, headers=headers, **kwargs)
94-
json_response = response.json()
9594
if response.status_code >= 400:
96-
logger.debug(f"Error Response: {json_response}")
95+
logger.debug(f"Error Response Status: {response.status_code}")
96+
logger.debug(f"Error Response Headers: {response.headers}")
97+
logger.debug(f"Error Response Text: {response.text[:500]}")
98+
99+
if not response.text or response.text.strip() == "":
100+
response.raise_for_status()
101+
raise ValueError(
102+
f"Received empty response from {method} {url}. "
103+
f"Status code: {response.status_code}"
104+
)
105+
106+
try:
107+
json_response = response.json()
108+
except requests.exceptions.JSONDecodeError as e:
109+
logger.error(f"Failed to parse JSON response. Status: {response.status_code}")
110+
logger.error(f"Response text: {response.text[:500]}")
111+
raise ValueError(
112+
f"Invalid JSON response from {method} {url}. "
113+
f"Status code: {response.status_code}, "
114+
f"Response: {response.text[:200]}"
115+
) from e
116+
97117
response.raise_for_status()
98118
assert isinstance(
99119
json_response, dict

src/datacustomcode/scan.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from __future__ import annotations
1616

1717
import ast
18+
import json
19+
import logging
1820
import os
1921
import sys
2022
from typing import (
@@ -29,12 +31,14 @@
2931

3032
from datacustomcode.version import get_version
3133

34+
logger = logging.getLogger(__name__)
35+
3236
DATA_ACCESS_METHODS = ["read_dlo", "read_dmo", "write_to_dlo", "write_to_dmo"]
3337

3438
DATA_TRANSFORM_CONFIG_TEMPLATE = {
3539
"sdkVersion": get_version(),
3640
"entryPoint": "",
37-
"dataspace": "default",
41+
"dataspace": "",
3842
"permissions": {
3943
"read": {},
4044
"write": {},
@@ -232,6 +236,40 @@ def dc_config_json_from_file(file_path: str) -> dict[str, Any]:
232236
config = DATA_TRANSFORM_CONFIG_TEMPLATE.copy()
233237
config["entryPoint"] = file_path.rpartition("/")[-1]
234238

239+
file_dir = os.path.dirname(file_path)
240+
config_json_path = os.path.join(file_dir, "config.json")
241+
242+
if os.path.exists(config_json_path) and os.path.isfile(config_json_path):
243+
try:
244+
with open(config_json_path, "r") as f:
245+
existing_config = json.load(f)
246+
247+
if "dataspace" in existing_config:
248+
dataspace_value = existing_config["dataspace"]
249+
if not dataspace_value or (
250+
isinstance(dataspace_value, str) and dataspace_value.strip() == ""
251+
):
252+
logger.error(
253+
f"dataspace in {config_json_path} is empty or None."
254+
f"Updating config file to use dataspace 'default'."
255+
)
256+
config["dataspace"] = "default"
257+
else:
258+
config["dataspace"] = dataspace_value
259+
else:
260+
raise ValueError(
261+
f"dataspace must be defined in {config_json_path}. "
262+
f"Please add a 'dataspace' field to the config.json file."
263+
)
264+
except json.JSONDecodeError as e:
265+
raise ValueError(
266+
f"Failed to parse JSON from {config_json_path}: {e}"
267+
) from e
268+
except OSError as e:
269+
raise OSError(f"Failed to read config file {config_json_path}: {e}") from e
270+
else:
271+
config["dataspace"] = "default"
272+
235273
read: dict[str, list[str]] = {}
236274
if output.read_dlo:
237275
read["dlo"] = list(output.read_dlo)
@@ -244,4 +282,5 @@ def dc_config_json_from_file(file_path: str) -> dict[str, Any]:
244282
write["dmo"] = list(output.write_to_dmo)
245283

246284
config["permissions"] = {"read": read, "write": write}
285+
247286
return config

tests/test_scan.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,175 @@ def test_dmo_to_dmo_config(self):
358358
finally:
359359
os.remove(temp_path)
360360

361+
@patch(
362+
"datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE",
363+
{
364+
"sdkVersion": "1.2.3",
365+
"entryPoint": "",
366+
"dataspace": "",
367+
"permissions": {
368+
"read": {},
369+
"write": {},
370+
},
371+
},
372+
)
373+
def test_preserves_existing_dataspace(self):
374+
"""Test that existing dataspace value is preserved when config.json exists."""
375+
import json
376+
377+
content = textwrap.dedent(
378+
"""
379+
from datacustomcode.client import Client
380+
381+
client = Client()
382+
df = client.read_dlo("input_dlo")
383+
client.write_to_dlo("output_dlo", df, "overwrite")
384+
"""
385+
)
386+
temp_path = create_test_script(content)
387+
file_dir = os.path.dirname(temp_path)
388+
config_path = os.path.join(file_dir, "config.json")
389+
390+
try:
391+
# Create an existing config.json with a custom dataspace
392+
existing_config = {
393+
"sdkVersion": "1.0.0",
394+
"entryPoint": "test.py",
395+
"dataspace": "my_custom_dataspace",
396+
"permissions": {
397+
"read": {"dlo": ["old_dlo"]},
398+
"write": {"dlo": ["old_output"]},
399+
},
400+
}
401+
with open(config_path, "w") as f:
402+
json.dump(existing_config, f)
403+
404+
# Generate new config - should preserve dataspace
405+
result = dc_config_json_from_file(temp_path)
406+
assert result["dataspace"] == "my_custom_dataspace"
407+
assert result["permissions"]["read"]["dlo"] == ["input_dlo"]
408+
assert result["permissions"]["write"]["dlo"] == ["output_dlo"]
409+
finally:
410+
os.remove(temp_path)
411+
if os.path.exists(config_path):
412+
os.remove(config_path)
413+
414+
@patch(
415+
"datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE",
416+
{
417+
"sdkVersion": "1.2.3",
418+
"entryPoint": "",
419+
"dataspace": "",
420+
"permissions": {
421+
"read": {},
422+
"write": {},
423+
},
424+
},
425+
)
426+
def test_rejects_empty_dataspace(self):
427+
"""Test that empty dataspace value uses default and logs error."""
428+
import json
429+
430+
content = textwrap.dedent(
431+
"""
432+
from datacustomcode.client import Client
433+
434+
client = Client()
435+
df = client.read_dlo("input_dlo")
436+
client.write_to_dlo("output_dlo", df, "overwrite")
437+
"""
438+
)
439+
temp_path = create_test_script(content)
440+
file_dir = os.path.dirname(temp_path)
441+
config_path = os.path.join(file_dir, "config.json")
442+
443+
try:
444+
# Create an existing config.json with empty dataspace
445+
existing_config = {
446+
"sdkVersion": "1.0.0",
447+
"entryPoint": "test.py",
448+
"dataspace": "",
449+
"permissions": {
450+
"read": {"dlo": ["old_dlo"]},
451+
"write": {"dlo": ["old_output"]},
452+
},
453+
}
454+
with open(config_path, "w") as f:
455+
json.dump(existing_config, f)
456+
457+
# Should use "default" for empty dataspace (not raise error)
458+
result = dc_config_json_from_file(temp_path)
459+
assert result["dataspace"] == "default"
460+
assert result["permissions"]["read"]["dlo"] == ["input_dlo"]
461+
assert result["permissions"]["write"]["dlo"] == ["output_dlo"]
462+
finally:
463+
os.remove(temp_path)
464+
if os.path.exists(config_path):
465+
os.remove(config_path)
466+
467+
@patch(
468+
"datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE",
469+
{
470+
"sdkVersion": "1.2.3",
471+
"entryPoint": "",
472+
"dataspace": "",
473+
"permissions": {
474+
"read": {},
475+
"write": {},
476+
},
477+
},
478+
)
479+
def test_rejects_missing_dataspace(self):
480+
"""Test missing config.json uses default dataspace."""
481+
content = textwrap.dedent(
482+
"""
483+
from datacustomcode.client import Client
484+
485+
client = Client()
486+
df = client.read_dlo("input_dlo")
487+
client.write_to_dlo("output_dlo", df, "overwrite")
488+
"""
489+
)
490+
temp_path = create_test_script(content)
491+
492+
try:
493+
# No existing config.json - should use "default" dataspace
494+
result = dc_config_json_from_file(temp_path)
495+
assert result["dataspace"] == "default"
496+
assert result["permissions"]["read"]["dlo"] == ["input_dlo"]
497+
assert result["permissions"]["write"]["dlo"] == ["output_dlo"]
498+
finally:
499+
os.remove(temp_path)
500+
501+
def test_raises_error_on_invalid_json(self):
502+
"""Test that invalid JSON in config.json raises an error."""
503+
504+
content = textwrap.dedent(
505+
"""
506+
from datacustomcode.client import Client
507+
508+
client = Client()
509+
df = client.read_dlo("input_dlo")
510+
client.write_to_dlo("output_dlo", df, "overwrite")
511+
"""
512+
)
513+
temp_path = create_test_script(content)
514+
file_dir = os.path.dirname(temp_path)
515+
config_path = os.path.join(file_dir, "config.json")
516+
517+
try:
518+
# Create an invalid JSON file
519+
with open(config_path, "w") as f:
520+
f.write("{ invalid json }")
521+
522+
# Should raise ValueError for invalid JSON
523+
with pytest.raises(ValueError, match="Failed to parse JSON"):
524+
dc_config_json_from_file(temp_path)
525+
finally:
526+
os.remove(temp_path)
527+
if os.path.exists(config_path):
528+
os.remove(config_path)
529+
361530

362531
class TestDataAccessLayerCalls:
363532
"""Tests for the DataAccessLayerCalls class directly."""

0 commit comments

Comments
 (0)