From 6f9744df75a16f6174c025dcedcc92d9b0b6e3d7 Mon Sep 17 00:00:00 2001 From: nkwork9999 <143652584+nkwork9999@users.noreply.github.com> Date: Tue, 30 Jun 2026 22:56:23 +0900 Subject: [PATCH] Fix seed boolean null handling Signed-off-by: nkwork9999 <143652584+nkwork9999@users.noreply.github.com> --- sqlmesh/core/model/definition.py | 5 ++--- tests/dbt/test_transformation.py | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/sqlmesh/core/model/definition.py b/sqlmesh/core/model/definition.py index a059fa4e87..4936e9b176 100644 --- a/sqlmesh/core/model/definition.py +++ b/sqlmesh/core/model/definition.py @@ -1706,6 +1706,7 @@ def render( def render_seed(self) -> t.Iterator[QueryOrDF]: import numpy as np + import pandas as pd self._ensure_hydrated() @@ -1746,8 +1747,6 @@ def render_seed(self) -> t.Iterator[QueryOrDF]: # convert all date/time types to native pandas timestamp for column in [*date_columns, *datetime_columns]: - import pandas as pd - df[column] = pd.to_datetime(df[column], infer_datetime_format=True, errors="ignore") # type: ignore # extract datetime.date from pandas timestamp for DATE columns @@ -1763,7 +1762,7 @@ def render_seed(self) -> t.Iterator[QueryOrDF]: ) for column in bool_columns: - df[column] = df[column].apply(lambda i: str_to_bool(str(i))) + df[column] = df[column].apply(lambda i: None if pd.isna(i) else str_to_bool(str(i))) df.loc[:, string_columns] = df[string_columns].mask( cond=lambda x: x.notna(), # type: ignore diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index fe6073dfad..934b8f64b8 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -947,6 +947,31 @@ def test_seed_single_whitespace_is_na(tmp_path): assert df["col_b"].to_list() == [1, None] +def test_seed_boolean_nulls_are_preserved(tmp_path): + seed_csv = tmp_path / "seed.csv" + with open(seed_csv, "w", encoding="utf-8") as fd: + fd.write("id,test_ind\n") + fd.write("1,null\n") + fd.write("2,false\n") + fd.write("3,true\n") + fd.write("4,null\n") + + seed = SeedConfig( + name="test_model", + package="foo", + path=Path(seed_csv), + column_types={"test_ind": "boolean"}, + ) + + context = DbtContext() + context.project_name = "foo" + context.target = DuckDbConfig(name="target", schema="test") + sqlmesh_seed = seed.to_sqlmesh(context) + + df = next(sqlmesh_seed.render_seed()) + assert df["test_ind"].to_list() == [None, False, True, None] + + def test_seed_partial_column_inference(tmp_path): seed_csv = tmp_path / "seed.csv" with open(seed_csv, "w", encoding="utf-8") as fd: