Skip to content

Commit f99846c

Browse files
committed
Several bug-fixes in deploy cmd, and added better starting point for entrypoint & config.json
1 parent 45893b8 commit f99846c

6 files changed

Lines changed: 235 additions & 97 deletions

File tree

src/datacustomcode/cli.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,25 @@ def deploy(profile: str, path: str, name: str, version: str, description: str):
101101
@cli.command()
102102
@click.argument("directory", default=".")
103103
def init(directory: str):
104+
from datacustomcode.scan import dc_config_json_from_file
104105
from datacustomcode.template import copy_template
105106

106107
click.echo("Copying template to " + click.style(directory, fg="blue", bold=True))
107108
copy_template(directory)
109+
entrypoint_path = os.path.join(directory, "payload", "entrypoint.py")
110+
config_location = os.path.join(os.path.dirname(entrypoint_path), "config.json")
111+
config_json = dc_config_json_from_file(entrypoint_path)
112+
with open(config_location, "w") as f:
113+
json.dump(config_json, f, indent=2)
114+
108115
click.echo(
109116
"Start developing by updating the code in "
110-
+ click.style(f"{directory}/payload/entrypoint.py", fg="blue", bold=True)
117+
+ click.style(entrypoint_path, fg="blue", bold=True)
118+
)
119+
click.echo(
120+
"You can run "
121+
+ click.style(f"datacustomcode scan {entrypoint_path}", fg="blue", bold=True)
122+
+ " to automatically update config.json when you make changes to your code"
111123
)
112124

113125

src/datacustomcode/deploy.py

Lines changed: 60 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import annotations
1616

1717
from html import unescape
18+
import json
1819
import os
1920
import shutil
2021
import tarfile
@@ -30,11 +31,11 @@
3031
)
3132

3233
from loguru import logger
34+
import pydantic
3335
from pydantic import BaseModel
3436
import requests
3537

3638
from datacustomcode.cmd import cmd_output
37-
from datacustomcode.scan import scan_file
3839

3940
if TYPE_CHECKING:
4041
from datacustomcode.credentials import Credentials
@@ -77,8 +78,10 @@ def _make_api_call(
7778
logger.debug(f"Request params: {kwargs}")
7879

7980
response = requests.request(method=method, url=url, headers=headers, **kwargs)
80-
response.raise_for_status()
8181
json_response = response.json()
82+
if response.status_code >= 400:
83+
logger.debug(f"Error Response: {json_response}")
84+
response.raise_for_status()
8285
assert isinstance(
8386
json_response, dict
8487
), f"Unexpected response type: {type(json_response)}"
@@ -223,77 +226,65 @@ def wait_for_deployment(
223226
callback(status)
224227
if status == "Deployed":
225228
logger.debug(
226-
"Deployment completed, Elapsed time: {time.time() - start_time}"
229+
f"Deployment completed.\nElapsed time: {time.time() - start_time}"
227230
)
228231
break
229232
time.sleep(1)
230233

231234

232235
DATA_TRANSFORM_REQUEST_TEMPLATE: dict[str, Any] = {
233-
"metadata": {
234-
"dbt_schema_version": "https://schemas.getdbt.com/dbt/manifest/v8.json",
235-
"dbt_version": "1.4.6",
236-
"generated_at": "2023-04-25T18:54:11.375589Z",
237-
"invocation_id": "d6c68c69-533a-4d54-861e-1493d6cd8092",
238-
"env": {},
239-
"project_id": "jaffle_shop",
240-
"user_id": "1ca8403c-a1a5-43af-8b88-9265e948b9d2",
241-
"send_anonymous_usage_stats": True,
242-
"adapter_type": "spark",
243-
},
244-
"nodes": {
245-
"model.dcexample.dim_listings_w_hosts": {
246-
"name": "dim_listings_w_hosts",
247-
"resource_type": "model",
248-
"relation_name": "{OUTPUT_DLO}",
249-
"config": {"materialized": "table"},
250-
"compiled_code": "",
251-
"depends_on": {"nodes": []},
252-
}
253-
},
254-
"sources": {
255-
"source.dcexample.listings": {
256-
"name": "listings",
257-
"resource_type": "source",
258-
"relation_name": "{INPUT_DLO}",
259-
"identifier": "{INPUT_DLO}",
260-
}
261-
},
236+
"nodes": {},
237+
"sources": {},
262238
"macros": {
263-
"macro.dcexample.byoc": {
264-
"name": "byoc_example",
265-
"resource_type": "macro",
266-
"path": "",
267-
"original_file_path": "",
268-
"unique_id": "unique id",
269-
"macro_sql": "",
270-
"supported_languages": None,
239+
"macro.byoc": {
271240
"arguments": [{"name": "{SCRIPT_NAME}", "type": "BYOC_SCRIPT"}],
272241
}
273242
},
274243
}
275244

276245

277246
class DataTransformConfig(BaseModel):
278-
input: Union[str, list[str]]
279-
output: Union[str, list[str]]
247+
sdkVersion: str
248+
entryPoint: str
249+
dataspace: str
250+
permissions: Permissions
280251

281252

282-
def get_data_transform_config(directory: str) -> DataTransformConfig:
283-
"""Get the data transform config from the entrypoint.py file."""
284-
entrypoint_file = os.path.join(directory, "entrypoint.py")
285-
data_access_layer_calls = scan_file(entrypoint_file)
286-
input_ = data_access_layer_calls.input_str
287-
output = data_access_layer_calls.output_str
288-
return DataTransformConfig(input=input_, output=output)
253+
class Permissions(BaseModel):
254+
read: Union[DloPermission]
255+
write: Union[DloPermission]
289256

290257

291-
def verify_data_transform_config(directory: str) -> None:
292-
"""Verify that the data transform config.json file exists in the directory."""
258+
class DloPermission(BaseModel):
259+
dlo: list[str]
260+
261+
262+
def get_data_transform_config(directory: str) -> DataTransformConfig:
263+
"""Get the data transform config from the config.json file."""
293264
config_path = os.path.join(directory, "config.json")
294-
if not os.path.exists(config_path):
295-
raise FileNotFoundError(f"config.json not found in {directory}")
265+
try:
266+
with open(config_path, "r") as f:
267+
config = json.loads(f.read())
268+
return DataTransformConfig(**config)
269+
except FileNotFoundError as err:
270+
raise FileNotFoundError(
271+
f"config.json not found in {config_path}"
272+
) from err
273+
except json.JSONDecodeError as err:
274+
raise ValueError(
275+
f"config.json in {config_path} is not valid JSON"
276+
) from err
277+
except pydantic.ValidationError as err:
278+
missing_fields = [str(err["loc"][0]) for err in err.errors()]
279+
raise ValueError(
280+
f"config.json in {config_path} is missing required "
281+
f"fields: {', '.join(missing_fields)}"
282+
) from err
296283

284+
285+
def verify_data_transform_config(directory: str) -> None:
286+
"""Verify the data transform config.json contents."""
287+
get_data_transform_config(directory)
297288
logger.debug(f"Verified data transform config in {directory}")
298289

299290

@@ -306,28 +297,31 @@ def create_data_transform(
306297
script_name = metadata.name
307298
data_transform_config = get_data_transform_config(directory)
308299
request_hydrated = DATA_TRANSFORM_REQUEST_TEMPLATE.copy()
309-
request_hydrated["nodes"]["model.dcexample.dim_listings_w_hosts"][
310-
"relation_name"
311-
] = data_transform_config.input
312-
request_hydrated["sources"]["source.dcexample.listings"][
313-
"relation_name"
314-
] = data_transform_config.output
315-
request_hydrated["sources"]["source.dcexample.listings"][
316-
"identifier"
317-
] = data_transform_config.output
318-
request_hydrated["macros"]["macro.dcexample.byoc"]["arguments"][0][
319-
"name"
320-
] = script_name
300+
301+
# Add nodes for each write DLO
302+
for i, dlo in enumerate(data_transform_config.permissions.write.dlo, 1):
303+
request_hydrated["nodes"][f"node{i}"] = {
304+
"relation_name": dlo,
305+
"config": {"materialized": "table"},
306+
"compiled_code": "",
307+
}
308+
309+
# Add sources for each read DLO
310+
for i, dlo in enumerate(data_transform_config.permissions.read.dlo, 1):
311+
request_hydrated["sources"][f"source{i}"] = {"relation_name": dlo}
312+
313+
request_hydrated["macros"]["macro.byoc"]["arguments"][0]["name"] = script_name
321314

322315
body = {
323316
"definition": {
324-
"type": "DBT",
317+
"type": "DCSQL",
325318
"manifest": request_hydrated,
326319
"version": "56.0",
327320
},
328321
"label": f"{metadata.name}",
329322
"name": f"{metadata.name}",
330323
"type": "BATCH",
324+
"dataSpaceName": data_transform_config.dataspace,
331325
}
332326

333327
url = _join_strip_url(access_token.instance_url, DATA_TRANSFORMS_PATH)

src/datacustomcode/templates/payload/entrypoint.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,24 @@
1+
from pyspark.sql.functions import col, upper
2+
13
from datacustomcode.client import Client
4+
from datacustomcode.io.writer.base import WriteMode
25

36

47
def main():
5-
client = Client() # noqa: F841
6-
# TODO: Add your custom code here
8+
client = Client()
9+
10+
df = client.read_dlo("Account_Home__dll")
11+
12+
# Perform transformations on the DataFrame
13+
df_upper1 = df.withColumn("Description__c", upper(col("Description__c")))
14+
15+
# Drop specific columns related to relationships
16+
df_upper1 = df_upper1.drop("KQ_ParentId__c")
17+
df_upper1 = df_upper1.drop("KQ_Id__c")
18+
19+
# Save the transformed DataFrame
20+
dlo_name = "Account_Home_copy__dll"
21+
client.write_to_dlo(dlo_name, df_upper1, write_mode=WriteMode.APPEND)
722

823

924
if __name__ == "__main__":

tests/test_cli.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import json
2+
import os
3+
from unittest.mock import mock_open, patch
4+
5+
from click.testing import CliRunner
6+
7+
from datacustomcode.cli import init
8+
9+
10+
class TestInit:
11+
@patch("datacustomcode.template.copy_template")
12+
@patch("datacustomcode.scan.dc_config_json_from_file")
13+
@patch("builtins.open", new_callable=mock_open)
14+
def test_init_command(self, mock_file, mock_scan, mock_copy):
15+
"""Test init command."""
16+
mock_scan.return_value = {
17+
"sdkVersion": "1.0.0",
18+
"entryPoint": "entrypoint.py",
19+
"dataspace": "default",
20+
"permissions": {
21+
"read": {"dlo": ["input_dlo"]},
22+
"write": {"dlo": ["output_dlo"]},
23+
},
24+
}
25+
26+
runner = CliRunner()
27+
with runner.isolated_filesystem():
28+
# Create test directory structure
29+
os.makedirs(os.path.join("test_dir", "payload"), exist_ok=True)
30+
31+
result = runner.invoke(init, ["test_dir"])
32+
33+
assert result.exit_code == 0
34+
mock_copy.assert_called_once_with("test_dir")
35+
mock_scan.assert_called_once_with(
36+
os.path.join("test_dir", "payload", "entrypoint.py")
37+
)
38+
39+
# Verify the config.json was written with the correct content
40+
mock_file.assert_any_call(
41+
os.path.join("test_dir", "payload", "config.json"), "w"
42+
)
43+
44+
# Get all write calls and join them to get the complete written content
45+
written_content = "".join(
46+
call.args[0] for call in mock_file().write.call_args_list
47+
)
48+
expected_content = json.dumps(mock_scan.return_value, indent=2)
49+
assert written_content == expected_content

0 commit comments

Comments
 (0)