Skip to content

Commit eccd34c

Browse files
fix: Allow overriding column definitions in schema inheritance (#302)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent c059946 commit eccd34c

File tree

2 files changed

+50
-1
lines changed

2 files changed

+50
-1
lines changed

dataframely/_base_schema.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,9 @@ def __new__(
119119
result = Metadata()
120120
for base in bases:
121121
result.update(mcs._get_metadata_recursively(base))
122-
result.update(mcs._get_metadata(namespace))
122+
namespace_metadata = mcs._get_metadata(namespace)
123+
mcs._remove_overridden_columns(result, namespace, bases)
124+
result.update(namespace_metadata)
123125
namespace[_COLUMN_ATTR] = result.columns
124126
cls = super().__new__(mcs, name, bases, namespace, *args, **kwargs)
125127

@@ -207,6 +209,34 @@ def __getattribute__(cls, name: str) -> Any:
207209
val._name = val.alias or name
208210
return val
209211

212+
@staticmethod
213+
def _remove_overridden_columns(
214+
result: Metadata,
215+
namespace: dict[str, Any],
216+
bases: tuple[type[object], ...],
217+
) -> None:
218+
"""Remove inherited columns that the child namespace explicitly overrides.
219+
220+
Before merging the child namespace, we must drop any parent columns whose
221+
attribute name is redefined in the child. This allows subclasses to redefine
222+
inherited columns while still detecting genuine alias conflicts.
223+
224+
In multiple-inheritance scenarios, the same attribute name may appear in more
225+
than one base with different aliases, so we walk all parent MROs and collect
226+
every matching column key to remove.
227+
"""
228+
for attr, value in namespace.items():
229+
if not isinstance(value, Column):
230+
continue
231+
keys_to_remove: set[str] = set()
232+
for base in bases:
233+
for parent_cls in base.__mro__:
234+
parent_col = parent_cls.__dict__.get(attr)
235+
if parent_col is not None and isinstance(parent_col, Column):
236+
keys_to_remove.add(parent_col.alias or attr)
237+
for parent_key in keys_to_remove:
238+
result.columns.pop(parent_key, None)
239+
210240
@staticmethod
211241
def _get_metadata_recursively(kls: type[object]) -> Metadata:
212242
result = Metadata()

tests/schema/test_base.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,22 @@ def test_user_error_polars_datatype_type() -> None:
141141
class MySchemaWithPolarsDataTypeType(dy.Schema):
142142
a = dy.Int32(nullable=False)
143143
b = pl.String # User error: Used pl.String instead of dy.String()
144+
145+
146+
def test_override() -> None:
147+
class FirstSchema(dy.Schema):
148+
x = dy.Int64()
149+
150+
class SecondSchema(FirstSchema):
151+
x = dy.Int64(nullable=True)
152+
153+
first_columns = FirstSchema.columns()
154+
second_columns = SecondSchema.columns()
155+
156+
assert set(first_columns) == {"x"}
157+
assert set(second_columns) == {"x"}
158+
159+
assert first_columns["x"].nullable is False
160+
assert second_columns["x"].nullable is True
161+
162+
assert type(second_columns["x"]) is type(first_columns["x"])

0 commit comments

Comments
 (0)