From e919fb2396b6aad59f4ba53052895be8b26917b8 Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Tue, 24 Mar 2026 17:22:31 +0100 Subject: [PATCH 1/3] fix: Allow overriding column definitions in schema inheritance The duplicate alias check from #291 also rejected intentional column overrides in subclasses. Before merging the child namespace, walk the parent MRO to remove columns that share the same attribute name so the child definition takes precedence, while still detecting genuine alias conflicts. Co-Authored-By: Claude Opus 4.6 --- dataframely/_base_schema.py | 20 +++++++++++++++++++- tests/schema/test_base.py | 8 ++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/dataframely/_base_schema.py b/dataframely/_base_schema.py index b875e6bf..bcf9aee2 100644 --- a/dataframely/_base_schema.py +++ b/dataframely/_base_schema.py @@ -119,7 +119,25 @@ def __new__( result = Metadata() for base in bases: result.update(mcs._get_metadata_recursively(base)) - result.update(mcs._get_metadata(namespace)) + # Before merging the child namespace, remove any parent columns that the + # child explicitly overrides (same attribute name). This allows subclasses to + # redefine inherited columns while still detecting genuine alias conflicts. + namespace_metadata = mcs._get_metadata(namespace) + for attr, value in namespace.items(): + if not isinstance(value, Column): + continue + # Walk parent MRO to find if this attribute was a Column in a parent class. + for base in bases: + for parent_cls in base.__mro__: + parent_col = parent_cls.__dict__.get(attr) + if parent_col is not None and isinstance(parent_col, Column): + parent_key = parent_col.alias or attr + result.columns.pop(parent_key, None) + break + else: + continue + break + result.update(namespace_metadata) namespace[_COLUMN_ATTR] = result.columns cls = super().__new__(mcs, name, bases, namespace, *args, **kwargs) diff --git a/tests/schema/test_base.py b/tests/schema/test_base.py index 6eb20849..0477630a 100644 --- a/tests/schema/test_base.py +++ b/tests/schema/test_base.py @@ -141,3 +141,11 @@ def test_user_error_polars_datatype_type() -> None: class MySchemaWithPolarsDataTypeType(dy.Schema): a = dy.Int32(nullable=False) b = pl.String # User error: Used pl.String instead of dy.String() + + +def test_override() -> None: + class FirstSchema(dy.Schema): + x = dy.Int64() + + class SecondSchema(FirstSchema): + x = dy.Int64(nullable=True) From 13f740399d90fa557e666b95b7c6a8e93e97f536 Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Tue, 24 Mar 2026 17:53:21 +0100 Subject: [PATCH 2/3] fix: Address review feedback - Scan all bases/MROs instead of breaking after the first match, so multiple-inheritance overrides remove all inherited columns - Add assertions to test_override to verify the child definition actually takes precedence Co-Authored-By: Claude Opus 4.6 --- dataframely/_base_schema.py | 14 +++++++------- tests/schema/test_base.py | 11 +++++++++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/dataframely/_base_schema.py b/dataframely/_base_schema.py index bcf9aee2..068943c5 100644 --- a/dataframely/_base_schema.py +++ b/dataframely/_base_schema.py @@ -126,17 +126,17 @@ def __new__( for attr, value in namespace.items(): if not isinstance(value, Column): continue - # Walk parent MRO to find if this attribute was a Column in a parent class. + # Walk all parent MROs to find if this attribute was a Column in any + # parent class. In multiple-inheritance scenarios, the same attribute + # name may appear in more than one base with different aliases. + keys_to_remove: set[str] = set() for base in bases: for parent_cls in base.__mro__: parent_col = parent_cls.__dict__.get(attr) if parent_col is not None and isinstance(parent_col, Column): - parent_key = parent_col.alias or attr - result.columns.pop(parent_key, None) - break - else: - continue - break + keys_to_remove.add(parent_col.alias or attr) + for parent_key in keys_to_remove: + result.columns.pop(parent_key, None) result.update(namespace_metadata) namespace[_COLUMN_ATTR] = result.columns cls = super().__new__(mcs, name, bases, namespace, *args, **kwargs) diff --git a/tests/schema/test_base.py b/tests/schema/test_base.py index 0477630a..eb86ca38 100644 --- a/tests/schema/test_base.py +++ b/tests/schema/test_base.py @@ -149,3 +149,14 @@ class FirstSchema(dy.Schema): class SecondSchema(FirstSchema): x = dy.Int64(nullable=True) + + first_columns = FirstSchema.columns() + second_columns = SecondSchema.columns() + + assert set(first_columns) == {"x"} + assert set(second_columns) == {"x"} + + assert first_columns["x"].nullable is False + assert second_columns["x"].nullable is True + + assert type(second_columns["x"]) is type(first_columns["x"]) From 99b142f79dca77d0169c6c39c54de5fdd7856961 Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Tue, 31 Mar 2026 18:42:04 +0200 Subject: [PATCH 3/3] refactor: Extract column override logic into `_remove_overridden_columns` Co-Authored-By: Claude Opus 4.6 --- dataframely/_base_schema.py | 46 +++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/dataframely/_base_schema.py b/dataframely/_base_schema.py index 068943c5..f1506fd5 100644 --- a/dataframely/_base_schema.py +++ b/dataframely/_base_schema.py @@ -119,24 +119,8 @@ def __new__( result = Metadata() for base in bases: result.update(mcs._get_metadata_recursively(base)) - # Before merging the child namespace, remove any parent columns that the - # child explicitly overrides (same attribute name). This allows subclasses to - # redefine inherited columns while still detecting genuine alias conflicts. namespace_metadata = mcs._get_metadata(namespace) - for attr, value in namespace.items(): - if not isinstance(value, Column): - continue - # Walk all parent MROs to find if this attribute was a Column in any - # parent class. In multiple-inheritance scenarios, the same attribute - # name may appear in more than one base with different aliases. - keys_to_remove: set[str] = set() - for base in bases: - for parent_cls in base.__mro__: - parent_col = parent_cls.__dict__.get(attr) - if parent_col is not None and isinstance(parent_col, Column): - keys_to_remove.add(parent_col.alias or attr) - for parent_key in keys_to_remove: - result.columns.pop(parent_key, None) + mcs._remove_overridden_columns(result, namespace, bases) result.update(namespace_metadata) namespace[_COLUMN_ATTR] = result.columns cls = super().__new__(mcs, name, bases, namespace, *args, **kwargs) @@ -225,6 +209,34 @@ def __getattribute__(cls, name: str) -> Any: val._name = val.alias or name return val + @staticmethod + def _remove_overridden_columns( + result: Metadata, + namespace: dict[str, Any], + bases: tuple[type[object], ...], + ) -> None: + """Remove inherited columns that the child namespace explicitly overrides. + + Before merging the child namespace, we must drop any parent columns whose + attribute name is redefined in the child. This allows subclasses to redefine + inherited columns while still detecting genuine alias conflicts. + + In multiple-inheritance scenarios, the same attribute name may appear in more + than one base with different aliases, so we walk all parent MROs and collect + every matching column key to remove. + """ + for attr, value in namespace.items(): + if not isinstance(value, Column): + continue + keys_to_remove: set[str] = set() + for base in bases: + for parent_cls in base.__mro__: + parent_col = parent_cls.__dict__.get(attr) + if parent_col is not None and isinstance(parent_col, Column): + keys_to_remove.add(parent_col.alias or attr) + for parent_key in keys_to_remove: + result.columns.pop(parent_key, None) + @staticmethod def _get_metadata_recursively(kls: type[object]) -> Metadata: result = Metadata()