Skip to content

Commit 4adee6f

Browse files
gab23rgabrielAndreasAlbertQC
authored
fix: Raise on duplicated aliases (#291)
Co-authored-by: gabriel <gabriel.g.robin@airbus.com> Co-authored-by: Andreas Albert <103571926+AndreasAlbertQC@users.noreply.github.com> Co-authored-by: Andreas Albert <andreas.albert@quantco.com>
1 parent b3edd6a commit 4adee6f

File tree

2 files changed

+42
-1
lines changed

2 files changed

+42
-1
lines changed

dataframely/_base_schema.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@
2121
else:
2222
from typing_extensions import Self
2323

24-
2524
_COLUMN_ATTR = "__dataframely_columns__"
2625
_RULE_ATTR = "__dataframely_rules__"
2726

2827
ORIGINAL_COLUMN_PREFIX = "__DATAFRAMELY_ORIGINAL__"
2928

29+
3030
# --------------------------------------- UTILS -------------------------------------- #
3131

3232

@@ -84,6 +84,25 @@ class Metadata:
8484
rules: dict[str, RuleFactory] = field(default_factory=dict)
8585

8686
def update(self, other: Self) -> None:
87+
"""Merge another Metadata instance into this one.
88+
89+
Overlapping keys are allowed if and only if they refer to the *same* underlying
90+
object. This accommodates multiple-inheritance / diamond patterns where the same
91+
base schema is visited more than once.
92+
"""
93+
# Detect conflicting column definitions: same name, different Column instance
94+
duplicated_column_names = self.columns.keys() & other.columns.keys()
95+
conflicting_columns = {
96+
name
97+
for name in duplicated_column_names
98+
if self.columns[name] is not other.columns[name]
99+
}
100+
if conflicting_columns:
101+
raise ImplementationError(
102+
f"Columns {conflicting_columns} are duplicated with conflicting definitions."
103+
)
104+
105+
# All clear
87106
self.columns.update(other.columns)
88107
self.rules.update(other.rules)
89108

@@ -203,6 +222,8 @@ def _get_metadata(source: dict[str, Any]) -> Metadata:
203222
k: v for k, v in source.items() if not k.startswith("__")
204223
}.items():
205224
if isinstance(value, Column):
225+
if (col_name := value.alias or attr) in result.columns:
226+
raise ImplementationError(f"Column {col_name!r} is duplicated.")
206227
result.columns[value.alias or attr] = value
207228
if isinstance(value, RuleFactory):
208229
# We must ensure that custom rules do not clash with internal rules.

tests/columns/test_alias.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
# SPDX-License-Identifier: BSD-3-Clause
33

44
import polars as pl
5+
import pytest
56

67
import dataframely as dy
8+
from dataframely.exc import ImplementationError
79

810

911
class AliasSchema(dy.Schema):
@@ -36,3 +38,21 @@ def test_alias_unset() -> None:
3638
no_alias_col = dy.Int32()
3739
assert no_alias_col.alias is None
3840
assert no_alias_col.name == ""
41+
42+
43+
def test_duplicate_alias_same_schema() -> None:
44+
with pytest.raises(ImplementationError, match="'a' is duplicated"):
45+
46+
class MySchema(dy.Schema):
47+
a = dy.Int64(alias="a")
48+
b = dy.String(alias="a")
49+
50+
51+
def test_duplicate_alias_inherited_schema() -> None:
52+
class MySchema(dy.Schema):
53+
a = dy.Int64(alias="a")
54+
55+
with pytest.raises(ImplementationError, match="'a'.*duplicated"):
56+
57+
class MySchema2(MySchema):
58+
b = dy.Int64(alias="a")

0 commit comments

Comments
 (0)