-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathtest_client.py
More file actions
369 lines (276 loc) · 13.5 KB
/
test_client.py
File metadata and controls
369 lines (276 loc) · 13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
from __future__ import annotations
from unittest.mock import MagicMock, patch
from pyspark.sql import DataFrame, SparkSession
import pytest
from datacustomcode.client import (
Client,
DataCloudAccessLayerException,
DataCloudObjectType,
)
from datacustomcode.config import (
AccessLayerObjectConfig,
ClientConfig,
SparkConfig,
)
from datacustomcode.io.reader.base import BaseDataCloudReader
from datacustomcode.io.writer.base import BaseDataCloudWriter, WriteMode
from datacustomcode.proxy.client.base import BaseProxyClient
class MockDataCloudReader(BaseDataCloudReader):
"""Mock reader for testing."""
CONFIG_NAME = "MockDataCloudReader"
def read_dlo(self, name: str, schema=None, row_limit: int = 1000) -> DataFrame:
df = MagicMock(spec=DataFrame)
return df
def read_dmo(self, name: str, schema=None, row_limit: int = 1000) -> DataFrame:
df = MagicMock(spec=DataFrame)
return df
class MockDataCloudWriter(BaseDataCloudWriter):
"""Mock writer for testing."""
CONFIG_NAME = "MockDataCloudWriter"
def write_to_dlo(
self, name: str, dataframe: DataFrame, write_mode: WriteMode, **kwargs
) -> None:
pass
def write_to_dmo(
self, name: str, dataframe: DataFrame, write_mode: WriteMode, **kwargs
) -> None:
pass
@pytest.fixture
def mock_spark():
return MagicMock(spec=SparkSession)
@pytest.fixture
def mock_config(mock_spark):
reader_config = AccessLayerObjectConfig(
type_config_name="MockDataCloudReader", options={}, force=True
)
writer_config = AccessLayerObjectConfig(
type_config_name="MockDataCloudWriter", options={}, force=True
)
spark_config = SparkConfig(
app_name="test-app", master="local[1]", options={}, force=True
)
return ClientConfig(
reader_config=reader_config,
writer_config=writer_config,
spark_config=spark_config,
)
@pytest.fixture
def mock_proxy():
"""Mock proxy client to avoid starting Spark when reader/writer are provided."""
proxy = MagicMock(spec=BaseProxyClient)
return proxy
@pytest.fixture
def reset_client():
"""Reset the Client singleton between tests."""
Client._instance = None
yield
Client._instance = None
class TestClient:
def test_singleton_pattern(self, reset_client, mock_spark, mock_proxy):
"""Test that Client behaves as a singleton."""
reader = MockDataCloudReader(mock_spark)
writer = MockDataCloudWriter(mock_spark)
client1 = Client(reader=reader, writer=writer, proxy=mock_proxy)
client2 = Client()
assert client1 is client2
with pytest.raises(ValueError):
Client(reader=MagicMock(spec=BaseDataCloudReader))
@patch("datacustomcode.client.config")
def test_initialization_with_config(self, mock_config, reset_client, mock_spark):
"""Test client initialization using configuration."""
from unittest.mock import patch as mock_patch
from datacustomcode.spark.default import DefaultSparkSessionProvider
with mock_patch.object(
DefaultSparkSessionProvider, "get_session"
) as mock_get_session:
mock_get_session.return_value = mock_spark
mock_reader = MagicMock(spec=BaseDataCloudReader)
mock_reader_config = MagicMock()
mock_reader_config.to_object.return_value = mock_reader
mock_reader_config.force = False
mock_writer = MagicMock(spec=BaseDataCloudWriter)
mock_writer_config = MagicMock()
mock_writer_config.to_object.return_value = mock_writer
mock_writer_config.force = False
mock_spark_config = MagicMock(spec=SparkConfig)
mock_config.spark_provider_config = None
mock_config.reader_config = mock_reader_config
mock_config.writer_config = mock_writer_config
mock_config.spark_config = mock_spark_config
client = Client()
mock_get_session.assert_called_once_with(mock_spark_config)
mock_reader_config.to_object.assert_called_once_with(mock_spark)
mock_writer_config.to_object.assert_called_once_with(mock_spark)
assert client._reader is mock_reader
assert client._writer is mock_writer
def test_read_dlo(self, reset_client, mock_spark, mock_proxy):
reader = MagicMock(spec=BaseDataCloudReader)
writer = MagicMock(spec=BaseDataCloudWriter)
mock_df = MagicMock(spec=DataFrame)
reader.read_dlo.return_value = mock_df
client = Client(reader=reader, writer=writer, proxy=mock_proxy)
result = client.read_dlo("test_dlo")
reader.read_dlo.assert_called_once_with("test_dlo", row_limit=1000)
assert result is mock_df
assert "test_dlo" in client._data_layer_history[DataCloudObjectType.DLO]
def test_read_dmo(self, reset_client, mock_spark, mock_proxy):
reader = MagicMock(spec=BaseDataCloudReader)
writer = MagicMock(spec=BaseDataCloudWriter)
mock_df = MagicMock(spec=DataFrame)
reader.read_dmo.return_value = mock_df
client = Client(reader=reader, writer=writer, proxy=mock_proxy)
result = client.read_dmo("test_dmo")
reader.read_dmo.assert_called_once_with("test_dmo", row_limit=1000)
assert result is mock_df
assert "test_dmo" in client._data_layer_history[DataCloudObjectType.DMO]
def test_write_to_dlo(self, reset_client, mock_spark, mock_proxy):
reader = MagicMock(spec=BaseDataCloudReader)
writer = MagicMock(spec=BaseDataCloudWriter)
mock_df = MagicMock(spec=DataFrame)
client = Client(reader=reader, writer=writer, proxy=mock_proxy)
client._record_dlo_access("some_dlo")
client.write_to_dlo("test_dlo", mock_df, WriteMode.APPEND, extra_param=True)
writer.write_to_dlo.assert_called_once_with(
"test_dlo", mock_df, WriteMode.APPEND, extra_param=True
)
def test_write_to_dmo(self, reset_client, mock_spark, mock_proxy):
reader = MagicMock(spec=BaseDataCloudReader)
writer = MagicMock(spec=BaseDataCloudWriter)
mock_df = MagicMock(spec=DataFrame)
client = Client(reader=reader, writer=writer, proxy=mock_proxy)
client._record_dmo_access("some_dmo")
client.write_to_dmo("test_dmo", mock_df, WriteMode.OVERWRITE, extra_param=True)
writer.write_to_dmo.assert_called_once_with(
"test_dmo", mock_df, WriteMode.OVERWRITE, extra_param=True
)
def test_mixed_dlo_dmo_raises_exception(self, reset_client, mock_spark, mock_proxy):
"""Test that mixing DLOs and DMOs raises an exception."""
reader = MagicMock(spec=BaseDataCloudReader)
writer = MagicMock(spec=BaseDataCloudWriter)
mock_df = MagicMock(spec=DataFrame)
client = Client(reader=reader, writer=writer, proxy=mock_proxy)
client._record_dlo_access("test_dlo")
with pytest.raises(DataCloudAccessLayerException) as exc_info:
client.write_to_dmo("test_dmo", mock_df, WriteMode.APPEND)
assert "test_dlo" in str(exc_info.value)
def test_mixed_dmo_dlo_raises_exception(self, reset_client, mock_spark, mock_proxy):
"""Test that mixing DMOs and DLOs raises an exception (converse case)."""
reader = MagicMock(spec=BaseDataCloudReader)
writer = MagicMock(spec=BaseDataCloudWriter)
mock_df = MagicMock(spec=DataFrame)
client = Client(reader=reader, writer=writer, proxy=mock_proxy)
client._record_dmo_access("test_dmo")
with pytest.raises(DataCloudAccessLayerException) as exc_info:
client.write_to_dlo("test_dlo", mock_df, WriteMode.APPEND)
assert "test_dmo" in str(exc_info.value)
def test_read_pattern_flow(self, reset_client, mock_spark, mock_proxy):
"""Test a complete flow of reading and writing within the same object type."""
reader = MagicMock(spec=BaseDataCloudReader)
writer = MagicMock(spec=BaseDataCloudWriter)
mock_df = MagicMock(spec=DataFrame)
reader.read_dlo.return_value = mock_df
client = Client(reader=reader, writer=writer, proxy=mock_proxy)
df = client.read_dlo("source_dlo")
client.write_to_dlo("target_dlo", df, WriteMode.APPEND)
reader.read_dlo.assert_called_once_with("source_dlo", row_limit=1000)
writer.write_to_dlo.assert_called_once_with(
"target_dlo", mock_df, WriteMode.APPEND
)
assert "source_dlo" in client._data_layer_history[DataCloudObjectType.DLO]
# Reset for DMO test
Client._instance = None
client = Client(reader=reader, writer=writer, proxy=mock_proxy)
reader.read_dmo.return_value = mock_df
df = client.read_dmo("source_dmo")
client.write_to_dmo("target_dmo", df, WriteMode.MERGE)
reader.read_dmo.assert_called_once_with("source_dmo", row_limit=1000)
writer.write_to_dmo.assert_called_once_with(
"target_dmo", mock_df, WriteMode.MERGE
)
assert "source_dmo" in client._data_layer_history[DataCloudObjectType.DMO]
def test_read_dlo_with_row_limit(self, reset_client, mock_spark, mock_proxy):
"""Test that row_limit parameter is passed through to reader."""
reader = MagicMock(spec=BaseDataCloudReader)
writer = MagicMock(spec=BaseDataCloudWriter)
mock_df = MagicMock(spec=DataFrame)
reader.read_dlo.return_value = mock_df
client = Client(reader=reader, writer=writer, proxy=mock_proxy)
result = client.read_dlo("test_dlo", row_limit=500)
reader.read_dlo.assert_called_once_with("test_dlo", row_limit=500)
assert result is mock_df
assert "test_dlo" in client._data_layer_history[DataCloudObjectType.DLO]
def test_read_dmo_with_row_limit(self, reset_client, mock_spark, mock_proxy):
"""Test that row_limit parameter is passed through to reader."""
reader = MagicMock(spec=BaseDataCloudReader)
writer = MagicMock(spec=BaseDataCloudWriter)
mock_df = MagicMock(spec=DataFrame)
reader.read_dmo.return_value = mock_df
client = Client(reader=reader, writer=writer, proxy=mock_proxy)
result = client.read_dmo("test_dmo", row_limit=100)
reader.read_dmo.assert_called_once_with("test_dmo", row_limit=100)
assert result is mock_df
assert "test_dmo" in client._data_layer_history[DataCloudObjectType.DMO]
# Add tests for DefaultSparkSessionProvider
class TestDefaultSparkSessionProvider:
@patch("pyspark.sql.SparkSession")
def test_get_session_with_master(self, mock_spark_session):
"""Test DefaultSparkSessionProvider with master specified"""
mock_builder = MagicMock()
mock_master_builder = MagicMock()
mock_app_name_builder = MagicMock()
mock_config_builder = MagicMock()
mock_session = MagicMock()
mock_spark_session.builder = mock_builder
mock_builder.master.return_value = mock_master_builder
mock_master_builder.appName.return_value = mock_app_name_builder
mock_app_name_builder.config.return_value = mock_config_builder
mock_config_builder.getOrCreate.return_value = mock_session
spark_config = SparkConfig(
app_name="test-app",
master="local[1]",
options={"spark.executor.memory": "1g"},
)
from datacustomcode.spark.default import DefaultSparkSessionProvider
provider = DefaultSparkSessionProvider()
result = provider.get_session(spark_config)
mock_builder.master.assert_called_once_with("local[1]")
mock_master_builder.appName.assert_called_once_with("test-app")
mock_app_name_builder.config.assert_called_once_with(
"spark.executor.memory", "1g"
)
mock_config_builder.getOrCreate.assert_called_once()
assert result is mock_session
@patch("pyspark.sql.SparkSession")
def test_get_session_with_multiple_options(self, mock_spark_session):
"""Test DefaultSparkSessionProvider with multiple config options"""
mock_builder = MagicMock()
mock_app_name_builder = MagicMock()
mock_config_builder1 = MagicMock()
mock_config_builder2 = MagicMock()
mock_config_builder3 = MagicMock()
mock_session = MagicMock()
mock_spark_session.builder = mock_builder
mock_builder.appName.return_value = mock_app_name_builder
mock_app_name_builder.config.return_value = mock_config_builder1
mock_config_builder1.config.return_value = mock_config_builder2
mock_config_builder2.config.return_value = mock_config_builder3
mock_config_builder3.getOrCreate.return_value = mock_session
spark_config = SparkConfig(
app_name="test-app",
master=None,
options={
"spark.executor.memory": "1g",
"spark.executor.cores": "2",
"spark.driver.memory": "2g",
},
)
from datacustomcode.spark.default import DefaultSparkSessionProvider
provider = DefaultSparkSessionProvider()
result = provider.get_session(spark_config)
mock_builder.appName.assert_called_once_with("test-app")
# Check config was called for each option (order not guaranteed)
assert mock_app_name_builder.config.call_count == 1
assert mock_config_builder1.config.call_count == 1
assert mock_config_builder2.config.call_count == 1
mock_config_builder3.getOrCreate.assert_called_once()
assert result is mock_session