From 2e03cca1c436b8df325799e9201a071c161ef060 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 7 Apr 2026 21:46:51 +0800 Subject: [PATCH 1/9] Update cast handling in planner for field-aware types Refactor logical Expr::Cast to use field-aware CastExpr, ensuring target FieldRef metadata is preserved. Enhance tests to confirm metadata retention, validate that same-type casts aren't elided for fields with semantics, and ensure existing TryCast rejection for extension types remains effective. --- datafusion/physical-expr/src/planner.rs | 95 ++++++++++++++++++------- 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index fd2de812e4664..174db5e33820d 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -289,23 +289,17 @@ pub fn create_physical_expr( Ok(expressions::case(expr, when_then_expr, else_expr)?) } Expr::Cast(Cast { expr, field }) => { - if !field.metadata().is_empty() { - let (_, src_field) = expr.to_field(input_dfschema)?; - return plan_err!( - "Cast from {} to {} is not supported", - format_type_and_metadata( - src_field.data_type(), - Some(src_field.metadata()), - ), - format_type_and_metadata(field.data_type(), Some(field.metadata())) - ); - } + let expr = create_physical_expr(expr, input_dfschema, execution_props)?; - expressions::cast( - create_physical_expr(expr, input_dfschema, execution_props)?, - input_schema, - field.data_type().clone(), - ) + // Reuse the standard CAST validation path, but preserve the logical + // target field instead of lowering to a type-only physical cast. + expressions::cast(Arc::clone(&expr), input_schema, field.data_type().clone())?; + + Ok(Arc::new(expressions::CastExpr::new_with_target_field( + expr, + Arc::clone(field), + None, + ))) } Expr::TryCast(TryCast { expr, field }) => { if !field.metadata().is_empty() { @@ -476,7 +470,63 @@ mod tests { } #[test] - fn test_cast_to_extension_type() -> Result<()> { + fn test_cast_lowering_preserves_target_field_metadata() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); + let df_schema = DFSchema::try_from(schema.clone())?; + let target_field = Arc::new( + Field::new("cast_target", DataType::Int64, true).with_metadata( + [("target_meta".to_string(), "1".to_string())].into(), + ), + ); + let cast_expr = Expr::Cast(Cast::new_from_field( + Box::new(col("a")), + Arc::clone(&target_field), + )); + + let physical = + create_physical_expr(&cast_expr, &df_schema, &ExecutionProps::new())?; + let cast = physical + .as_any() + .downcast_ref::() + .expect("planner should lower logical CAST to CastExpr"); + + assert_eq!(cast.target_field(), &target_field); + assert_eq!(physical.return_field(&schema)?, target_field); + assert!(physical.nullable(&schema)?); + + Ok(()) + } + + #[test] + fn test_cast_lowering_preserves_same_type_field_semantics() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); + let df_schema = DFSchema::try_from(schema.clone())?; + let target_field = Arc::new( + Field::new("same_type_cast", DataType::Int32, true).with_metadata( + [("target_meta".to_string(), "same-type".to_string())].into(), + ), + ); + let cast_expr = Expr::Cast(Cast::new_from_field( + Box::new(col("a")), + Arc::clone(&target_field), + )); + + let physical = + create_physical_expr(&cast_expr, &df_schema, &ExecutionProps::new())?; + let cast = physical + .as_any() + .downcast_ref::() + .expect("same-type casts should not be elided when the target field carries semantics"); + + assert_eq!(cast.target_field(), &target_field); + assert_eq!(physical.return_field(&schema)?, target_field); + assert!(physical.nullable(&schema)?); + + Ok(()) + } + + #[test] + fn test_try_cast_to_extension_type_is_rejected() -> Result<()> { let extension_field_type = Arc::new( DataType::FixedSizeBinary(16) .into_nullable_field() @@ -486,17 +536,8 @@ mod tests { ), ); let expr = lit("3230e5d4-888e-408b-b09b-831f44aa0c58"); - let cast_expr = Expr::Cast(Cast::new_from_field( - Box::new(expr.clone()), - Arc::clone(&extension_field_type), - )); - let err = - create_physical_expr(&cast_expr, &DFSchema::empty(), &ExecutionProps::new()) - .unwrap_err(); - assert!(err.message().contains("arrow.uuid")); - let try_cast_expr = Expr::TryCast(TryCast::new_from_field( - Box::new(expr.clone()), + Box::new(expr), Arc::clone(&extension_field_type), )); let err = create_physical_expr( From 96a3f32474fcbc0d661c0db38b1269da98deaffb Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 7 Apr 2026 21:52:26 +0800 Subject: [PATCH 2/9] Refactor cast expression handling in planner Expose shared cast_with_target_field helper to validate and build field-aware CastExprs in one place. Update planner to directly call this helper, removing the need for temporary type-only casts. Add regression tests to cover standard casts, metadata-bearing casts, and same-type semantic-preserving casts. --- .../physical-expr/src/expressions/cast.rs | 54 ++++++++++++++++--- .../physical-expr/src/expressions/mod.rs | 2 +- datafusion/physical-expr/src/planner.rs | 34 ++++++++---- 3 files changed, 73 insertions(+), 17 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs index 24e486f8050fe..344a5f55bcdc2 100644 --- a/datafusion/physical-expr/src/expressions/cast.rs +++ b/datafusion/physical-expr/src/expressions/cast.rs @@ -201,6 +201,12 @@ impl CastExpr { } } +fn is_default_target_field(target_field: &FieldRef) -> bool { + target_field.name().is_empty() + && target_field.is_nullable() + && target_field.metadata().is_empty() +} + pub(crate) fn is_order_preserving_cast_family( source_type: &DataType, target_type: &DataType, @@ -315,23 +321,57 @@ pub fn cast_with_options( input_schema: &Schema, cast_type: DataType, cast_options: Option>, +) -> Result> { + cast_with_target_field( + expr, + input_schema, + cast_type.into_nullable_field_ref(), + cast_options, + ) +} + +/// Return a PhysicalExpression representing `expr` casted to `target_field`, +/// preserving any explicit field semantics such as name, nullability, and +/// metadata. +pub fn cast_with_target_field( + expr: Arc, + input_schema: &Schema, + target_field: FieldRef, + cast_options: Option>, ) -> Result> { let expr_type = expr.data_type(input_schema)?; - if expr_type == cast_type { - Ok(Arc::clone(&expr)) - } else if requires_nested_struct_cast(&expr_type, &cast_type) { - if can_cast_named_struct_types(&expr_type, &cast_type) { + let cast_type = target_field.data_type(); + if expr_type == *cast_type { + if is_default_target_field(&target_field) { + return Ok(Arc::clone(&expr)); + } + + Ok(Arc::new(CastExpr::new_with_target_field( + expr, + target_field, + cast_options, + ))) + } else if requires_nested_struct_cast(&expr_type, cast_type) { + if can_cast_named_struct_types(&expr_type, cast_type) { // Allow casts involving structs (including nested inside Lists, Dictionaries, // etc.) that pass name-based compatibility validation. This validation is // applied at planning time (now) to fail fast, rather than deferring errors // to execution time. The name-based casting logic will be executed at runtime // via ColumnarValue::cast_to. - Ok(Arc::new(CastExpr::new(expr, cast_type, cast_options))) + Ok(Arc::new(CastExpr::new_with_target_field( + expr, + target_field, + cast_options, + ))) } else { not_impl_err!("Unsupported CAST from {expr_type} to {cast_type}") } - } else if can_cast_types(&expr_type, &cast_type) { - Ok(Arc::new(CastExpr::new(expr, cast_type, cast_options))) + } else if can_cast_types(&expr_type, cast_type) { + Ok(Arc::new(CastExpr::new_with_target_field( + expr, + target_field, + cast_options, + ))) } else { not_impl_err!("Unsupported CAST from {expr_type} to {cast_type}") } diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index c9e02708d6c28..ef8218c814887 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -41,7 +41,7 @@ pub use crate::aggregate::stats::StatsType; pub use binary::{BinaryExpr, binary, similar_to}; pub use case::{CaseExpr, case}; -pub use cast::{CastExpr, cast}; +pub use cast::{CastExpr, cast, cast_with_target_field}; pub use cast_column::CastColumnExpr; pub use column::{Column, col, with_new_schema}; pub use datafusion_expr::utils::format_state_name; diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index 174db5e33820d..df80a45e58209 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -289,17 +289,12 @@ pub fn create_physical_expr( Ok(expressions::case(expr, when_then_expr, else_expr)?) } Expr::Cast(Cast { expr, field }) => { - let expr = create_physical_expr(expr, input_dfschema, execution_props)?; - - // Reuse the standard CAST validation path, but preserve the logical - // target field instead of lowering to a type-only physical cast. - expressions::cast(Arc::clone(&expr), input_schema, field.data_type().clone())?; - - Ok(Arc::new(expressions::CastExpr::new_with_target_field( - expr, + expressions::cast_with_target_field( + create_physical_expr(expr, input_dfschema, execution_props)?, + input_schema, Arc::clone(field), None, - ))) + ) } Expr::TryCast(TryCast { expr, field }) => { if !field.metadata().is_empty() { @@ -497,6 +492,27 @@ mod tests { Ok(()) } + #[test] + fn test_cast_lowering_preserves_standard_cast_semantics() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); + let df_schema = DFSchema::try_from(schema.clone())?; + let cast_expr = Expr::Cast(Cast::new(Box::new(col("a")), DataType::Int64)); + + let physical = + create_physical_expr(&cast_expr, &df_schema, &ExecutionProps::new())?; + let cast = physical + .as_any() + .downcast_ref::() + .expect("planner should lower ordinary CAST to CastExpr"); + + assert_eq!(cast.cast_type(), &DataType::Int64); + assert_eq!(physical.return_field(&schema)?.name(), "a"); + assert_eq!(physical.return_field(&schema)?.data_type(), &DataType::Int64); + assert!(!physical.nullable(&schema)?); + + Ok(()) + } + #[test] fn test_cast_lowering_preserves_same_type_field_semantics() -> Result<()> { let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); From 12acf9b7fea83a4428e6a336ceefe321c0e80f59 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 7 Apr 2026 21:58:54 +0800 Subject: [PATCH 3/9] Refactor cast expression handling Consolidate default-target-field predicate and success construction path in cast_with_target_field to reduce duplicate code in cast.rs. Simplify tests in planner.rs by implementing shared setup helpers and caching return_field() results for standard casts. --- .../physical-expr/src/expressions/cast.rs | 54 +++++++------------ datafusion/physical-expr/src/planner.rs | 32 ++++++----- 2 files changed, 38 insertions(+), 48 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs index 344a5f55bcdc2..dfb20725c7a91 100644 --- a/datafusion/physical-expr/src/expressions/cast.rs +++ b/datafusion/physical-expr/src/expressions/cast.rs @@ -151,14 +151,8 @@ impl CastExpr { &self.cast_options } - fn is_default_target_field(&self) -> bool { - self.target_field.name().is_empty() - && self.target_field.is_nullable() - && self.target_field.metadata().is_empty() - } - fn resolved_target_field(&self, input_schema: &Schema) -> Result { - if self.is_default_target_field() { + if is_default_target_field(&self.target_field) { self.expr.return_field(input_schema).map(|field| { Arc::new( field @@ -345,36 +339,28 @@ pub fn cast_with_target_field( if is_default_target_field(&target_field) { return Ok(Arc::clone(&expr)); } + } - Ok(Arc::new(CastExpr::new_with_target_field( - expr, - target_field, - cast_options, - ))) - } else if requires_nested_struct_cast(&expr_type, cast_type) { - if can_cast_named_struct_types(&expr_type, cast_type) { - // Allow casts involving structs (including nested inside Lists, Dictionaries, - // etc.) that pass name-based compatibility validation. This validation is - // applied at planning time (now) to fail fast, rather than deferring errors - // to execution time. The name-based casting logic will be executed at runtime - // via ColumnarValue::cast_to. - Ok(Arc::new(CastExpr::new_with_target_field( - expr, - target_field, - cast_options, - ))) - } else { - not_impl_err!("Unsupported CAST from {expr_type} to {cast_type}") - } - } else if can_cast_types(&expr_type, cast_type) { - Ok(Arc::new(CastExpr::new_with_target_field( - expr, - target_field, - cast_options, - ))) + let can_build_cast = if requires_nested_struct_cast(&expr_type, cast_type) { + // Allow casts involving structs (including nested inside Lists, Dictionaries, + // etc.) that pass name-based compatibility validation. This validation is + // applied at planning time (now) to fail fast, rather than deferring errors + // to execution time. The name-based casting logic will be executed at runtime + // via ColumnarValue::cast_to. + can_cast_named_struct_types(&expr_type, cast_type) } else { - not_impl_err!("Unsupported CAST from {expr_type} to {cast_type}") + can_cast_types(&expr_type, cast_type) + }; + + if !can_build_cast { + return not_impl_err!("Unsupported CAST from {expr_type} to {cast_type}"); } + + Ok(Arc::new(CastExpr::new_with_target_field( + expr, + target_field, + cast_options, + ))) } /// Return a PhysicalExpression representing `expr` casted to diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index df80a45e58209..90f52bebc3274 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -439,6 +439,15 @@ mod tests { use super::*; + fn test_cast_schema() -> Schema { + Schema::new(vec![Field::new("a", DataType::Int32, false)]) + } + + fn lower_cast_expr(expr: &Expr, schema: &Schema) -> Result> { + let df_schema = DFSchema::try_from(schema.clone())?; + create_physical_expr(expr, &df_schema, &ExecutionProps::new()) + } + #[test] fn test_create_physical_expr_scalar_input_output() -> Result<()> { let expr = col("letter").eq(lit("A")); @@ -466,8 +475,7 @@ mod tests { #[test] fn test_cast_lowering_preserves_target_field_metadata() -> Result<()> { - let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); - let df_schema = DFSchema::try_from(schema.clone())?; + let schema = test_cast_schema(); let target_field = Arc::new( Field::new("cast_target", DataType::Int64, true).with_metadata( [("target_meta".to_string(), "1".to_string())].into(), @@ -478,8 +486,7 @@ mod tests { Arc::clone(&target_field), )); - let physical = - create_physical_expr(&cast_expr, &df_schema, &ExecutionProps::new())?; + let physical = lower_cast_expr(&cast_expr, &schema)?; let cast = physical .as_any() .downcast_ref::() @@ -494,20 +501,19 @@ mod tests { #[test] fn test_cast_lowering_preserves_standard_cast_semantics() -> Result<()> { - let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); - let df_schema = DFSchema::try_from(schema.clone())?; + let schema = test_cast_schema(); let cast_expr = Expr::Cast(Cast::new(Box::new(col("a")), DataType::Int64)); - let physical = - create_physical_expr(&cast_expr, &df_schema, &ExecutionProps::new())?; + let physical = lower_cast_expr(&cast_expr, &schema)?; let cast = physical .as_any() .downcast_ref::() .expect("planner should lower ordinary CAST to CastExpr"); + let returned_field = physical.return_field(&schema)?; assert_eq!(cast.cast_type(), &DataType::Int64); - assert_eq!(physical.return_field(&schema)?.name(), "a"); - assert_eq!(physical.return_field(&schema)?.data_type(), &DataType::Int64); + assert_eq!(returned_field.name(), "a"); + assert_eq!(returned_field.data_type(), &DataType::Int64); assert!(!physical.nullable(&schema)?); Ok(()) @@ -515,8 +521,7 @@ mod tests { #[test] fn test_cast_lowering_preserves_same_type_field_semantics() -> Result<()> { - let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); - let df_schema = DFSchema::try_from(schema.clone())?; + let schema = test_cast_schema(); let target_field = Arc::new( Field::new("same_type_cast", DataType::Int32, true).with_metadata( [("target_meta".to_string(), "same-type".to_string())].into(), @@ -527,8 +532,7 @@ mod tests { Arc::clone(&target_field), )); - let physical = - create_physical_expr(&cast_expr, &df_schema, &ExecutionProps::new())?; + let physical = lower_cast_expr(&cast_expr, &schema)?; let cast = physical .as_any() .downcast_ref::() From 7b0c2d1638781024739734162fdac56d784aeefd Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 7 Apr 2026 22:06:15 +0800 Subject: [PATCH 4/9] Restrict cast_with_target_field to crate-only Narrow cast_with_target_field from a public re-export to a crate-only re-export in datafusion/physical-expr/src/expressions/mod.rs. This change allows the planner to still utilize it while reducing the public expressions API surface. --- datafusion/physical-expr/src/expressions/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index ef8218c814887..8a27009280b0e 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -41,7 +41,7 @@ pub use crate::aggregate::stats::StatsType; pub use binary::{BinaryExpr, binary, similar_to}; pub use case::{CaseExpr, case}; -pub use cast::{CastExpr, cast, cast_with_target_field}; +pub use cast::{CastExpr, cast}; pub use cast_column::CastColumnExpr; pub use column::{Column, col, with_new_schema}; pub use datafusion_expr::utils::format_state_name; @@ -56,3 +56,5 @@ pub use no_op::NoOp; pub use not::{NotExpr, not}; pub use try_cast::{TryCastExpr, try_cast}; pub use unknown_column::UnKnownColumn; + +pub(crate) use cast::cast_with_target_field; From 95a7e7e6250b8ea8823ae710de5a20ed3c930f69 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 7 Apr 2026 22:09:58 +0800 Subject: [PATCH 5/9] Refactor planner tests and document cast behavior Use as_planner_cast(...) helper in planner tests to eliminate repeated downcasting. Update cast_with_target_field documentation to clarify that default synthesized fields are elided while explicit field semantics are preserved. --- .../physical-expr/src/expressions/cast.rs | 5 +++++ datafusion/physical-expr/src/planner.rs | 22 +++++++++---------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs index dfb20725c7a91..d66a3d7a316b3 100644 --- a/datafusion/physical-expr/src/expressions/cast.rs +++ b/datafusion/physical-expr/src/expressions/cast.rs @@ -327,6 +327,11 @@ pub fn cast_with_options( /// Return a PhysicalExpression representing `expr` casted to `target_field`, /// preserving any explicit field semantics such as name, nullability, and /// metadata. +/// +/// If the input expression already has the same data type, this helper still +/// preserves an explicit `target_field` by constructing a field-aware +/// [`CastExpr`]. Only the default synthesized field created by the legacy +/// type-only API is elided back to the original child expression. pub fn cast_with_target_field( expr: Arc, input_schema: &Schema, diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index 90f52bebc3274..de8a5e90bc0d9 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -448,6 +448,13 @@ mod tests { create_physical_expr(expr, &df_schema, &ExecutionProps::new()) } + fn as_planner_cast(physical: &Arc) -> &expressions::CastExpr { + physical + .as_any() + .downcast_ref::() + .expect("planner should lower logical CAST to CastExpr") + } + #[test] fn test_create_physical_expr_scalar_input_output() -> Result<()> { let expr = col("letter").eq(lit("A")); @@ -487,10 +494,7 @@ mod tests { )); let physical = lower_cast_expr(&cast_expr, &schema)?; - let cast = physical - .as_any() - .downcast_ref::() - .expect("planner should lower logical CAST to CastExpr"); + let cast = as_planner_cast(&physical); assert_eq!(cast.target_field(), &target_field); assert_eq!(physical.return_field(&schema)?, target_field); @@ -505,10 +509,7 @@ mod tests { let cast_expr = Expr::Cast(Cast::new(Box::new(col("a")), DataType::Int64)); let physical = lower_cast_expr(&cast_expr, &schema)?; - let cast = physical - .as_any() - .downcast_ref::() - .expect("planner should lower ordinary CAST to CastExpr"); + let cast = as_planner_cast(&physical); let returned_field = physical.return_field(&schema)?; assert_eq!(cast.cast_type(), &DataType::Int64); @@ -533,10 +534,7 @@ mod tests { )); let physical = lower_cast_expr(&cast_expr, &schema)?; - let cast = physical - .as_any() - .downcast_ref::() - .expect("same-type casts should not be elided when the target field carries semantics"); + let cast = as_planner_cast(&physical); assert_eq!(cast.target_field(), &target_field); assert_eq!(physical.return_field(&schema)?, target_field); From efa0e3fa08d2e2a9613edf12fdf67586468b7fda Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 7 Apr 2026 22:28:14 +0800 Subject: [PATCH 6/9] feat: simplify Cast expression handling in planner.rs - Refactored the handling of `Expr::Cast` to remove unnecessary line breaks and improve readability in the `create_physical_expr` function. - Modified the test for cast lowering to maintain consistent formatting while preserving target field metadata. --- datafusion/physical-expr/src/planner.rs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index de8a5e90bc0d9..5a7a858af0541 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -288,14 +288,12 @@ pub fn create_physical_expr( }; Ok(expressions::case(expr, when_then_expr, else_expr)?) } - Expr::Cast(Cast { expr, field }) => { - expressions::cast_with_target_field( - create_physical_expr(expr, input_dfschema, execution_props)?, - input_schema, - Arc::clone(field), - None, - ) - } + Expr::Cast(Cast { expr, field }) => expressions::cast_with_target_field( + create_physical_expr(expr, input_dfschema, execution_props)?, + input_schema, + Arc::clone(field), + None, + ), Expr::TryCast(TryCast { expr, field }) => { if !field.metadata().is_empty() { let (_, src_field) = expr.to_field(input_dfschema)?; @@ -484,9 +482,8 @@ mod tests { fn test_cast_lowering_preserves_target_field_metadata() -> Result<()> { let schema = test_cast_schema(); let target_field = Arc::new( - Field::new("cast_target", DataType::Int64, true).with_metadata( - [("target_meta".to_string(), "1".to_string())].into(), - ), + Field::new("cast_target", DataType::Int64, true) + .with_metadata([("target_meta".to_string(), "1".to_string())].into()), ); let cast_expr = Expr::Cast(Cast::new_from_field( Box::new(col("a")), From 442fedd88614aaf600c710b351eca1d987354677 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 7 Apr 2026 22:30:44 +0800 Subject: [PATCH 7/9] Refactor cast.rs to collapse nested if statements Collapse the nested if statements in datafusion/physical-expr/src/expressions/cast.rs to satisfy Clippy's collapsible_if lint. This change does not alter any existing behavior. --- datafusion/physical-expr/src/expressions/cast.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs index d66a3d7a316b3..7c4ac60419ec5 100644 --- a/datafusion/physical-expr/src/expressions/cast.rs +++ b/datafusion/physical-expr/src/expressions/cast.rs @@ -340,10 +340,8 @@ pub fn cast_with_target_field( ) -> Result> { let expr_type = expr.data_type(input_schema)?; let cast_type = target_field.data_type(); - if expr_type == *cast_type { - if is_default_target_field(&target_field) { - return Ok(Arc::clone(&expr)); - } + if expr_type == *cast_type && is_default_target_field(&target_field) { + return Ok(Arc::clone(&expr)); } let can_build_cast = if requires_nested_struct_cast(&expr_type, cast_type) { From af7616e6af0808e9946f809e5af6a87b0ad113bf Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 8 Apr 2026 14:40:15 +0800 Subject: [PATCH 8/9] Add SLT support for UUID target-field metadata Introduce TypePlanner hook in test_context.rs for file-specific SLT to handle UUID target-field metadata. Added cast_extension_type_metadata.slt to cover new test cases for CAST and TRY_CAST with FixedSizeBinary. Ensure target metadata is preserved and acknowledge unchanged rejection path for TRY_CAST. --- datafusion/sqllogictest/src/test_context.rs | 31 +++++++++++- .../cast_extension_type_metadata.slt | 49 +++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 datafusion/sqllogictest/test_files/cast_extension_type_metadata.slt diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs index 773f61655e41f..23bbf1c951446 100644 --- a/datafusion/sqllogictest/src/test_context.rs +++ b/datafusion/sqllogictest/src/test_context.rs @@ -28,7 +28,9 @@ use arrow::array::{ LargeStringArray, StringArray, TimestampNanosecondArray, UnionArray, }; use arrow::buffer::ScalarBuffer; -use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit, UnionFields}; +use arrow::datatypes::{ + DataType, Field, FieldRef, Schema, SchemaRef, TimeUnit, UnionFields, +}; use arrow::record_batch::RecordBatch; use datafusion::catalog::{ CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider, Session, @@ -36,6 +38,7 @@ use datafusion::catalog::{ use datafusion::common::{DataFusionError, Result, not_impl_err}; use datafusion::functions::math::abs; use datafusion::logical_expr::async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl}; +use datafusion::logical_expr::planner::TypePlanner; use datafusion::logical_expr::{ ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility, create_udf, @@ -54,6 +57,7 @@ use datafusion::common::cast::as_float64_array; use datafusion::execution::SessionStateBuilder; use datafusion::execution::runtime_env::RuntimeEnv; use log::info; +use sqlparser::ast; use tempfile::TempDir; /// Context for running tests @@ -64,6 +68,23 @@ pub struct TestContext { test_dir: Option, } +#[derive(Debug)] +struct SqlLogicTestTypePlanner; + +impl TypePlanner for SqlLogicTestTypePlanner { + fn plan_type_field(&self, sql_type: &ast::DataType) -> Result> { + match sql_type { + ast::DataType::Uuid => Ok(Some(Arc::new( + Field::new("", DataType::FixedSizeBinary(16), true).with_metadata( + [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())] + .into(), + ), + ))), + _ => Ok(None), + } + } +} + impl TestContext { pub fn new(ctx: SessionContext) -> Self { Self { @@ -92,6 +113,14 @@ impl TestContext { state_builder = state_builder.with_spark_features(); } + if matches!( + relative_path.file_name().and_then(|name| name.to_str()), + Some("cast_extension_type_metadata.slt") + ) { + state_builder = + state_builder.with_type_planner(Arc::new(SqlLogicTestTypePlanner)); + } + let state = state_builder.build(); let mut test_ctx = TestContext::new(SessionContext::new_with_state(state)); diff --git a/datafusion/sqllogictest/test_files/cast_extension_type_metadata.slt b/datafusion/sqllogictest/test_files/cast_extension_type_metadata.slt new file mode 100644 index 0000000000000..425d8ac16eaee --- /dev/null +++ b/datafusion/sqllogictest/test_files/cast_extension_type_metadata.slt @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Regression tests for logical CAST targets that carry explicit field metadata. + +query ?T +SELECT + CAST( + arrow_cast(X'00010203040506070809000102030506', 'FixedSizeBinary(16)') + AS UUID + ), + arrow_metadata( + CAST( + arrow_cast(X'00010203040506070809000102030506', 'FixedSizeBinary(16)') + AS UUID + ), + 'ARROW:extension:name' + ); +---- +00010203040506070809000102030506 arrow.uuid + +query ?T +SELECT + CAST(raw AS UUID), + arrow_metadata(CAST(raw AS UUID), 'ARROW:extension:name') +FROM ( + VALUES ( + arrow_cast(X'00010203040506070809000102030506', 'FixedSizeBinary(16)') + ) +) AS uuids(raw); +---- +00010203040506070809000102030506 arrow.uuid + +statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*TryCast from FixedSizeBinary\(16\) to FixedSizeBinary\(16\)<\{"ARROW:extension:name": "arrow\.uuid"\}> is not supported +SELECT TRY_CAST(arrow_cast(X'00010203040506070809000102030506', 'FixedSizeBinary(16)') AS UUID); From 2eaca26ef8204d1253fb17adcbf133db1d3bbdb6 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 8 Apr 2026 14:47:23 +0800 Subject: [PATCH 9/9] Remove redundant Rust test for SLT coverage Eliminate the test_try_cast_to_extension_type_is_rejected from planner.rs as the new SLT now directly covers this case. This cleanup ensures better maintainability and reduces test duplication. --- datafusion/physical-expr/src/planner.rs | 27 ------------------------- 1 file changed, 27 deletions(-) diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index 5a7a858af0541..3227e20f68def 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -432,7 +432,6 @@ pub fn logical2physical(expr: &Expr, schema: &Schema) -> Arc { mod tests { use arrow::array::{ArrayRef, BooleanArray, RecordBatch, StringArray}; use arrow::datatypes::{DataType, Field}; - use datafusion_common::datatype::DataTypeExt; use datafusion_expr::col; use super::*; @@ -540,32 +539,6 @@ mod tests { Ok(()) } - #[test] - fn test_try_cast_to_extension_type_is_rejected() -> Result<()> { - let extension_field_type = Arc::new( - DataType::FixedSizeBinary(16) - .into_nullable_field() - .with_metadata( - [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())] - .into(), - ), - ); - let expr = lit("3230e5d4-888e-408b-b09b-831f44aa0c58"); - let try_cast_expr = Expr::TryCast(TryCast::new_from_field( - Box::new(expr), - Arc::clone(&extension_field_type), - )); - let err = create_physical_expr( - &try_cast_expr, - &DFSchema::empty(), - &ExecutionProps::new(), - ) - .unwrap_err(); - assert!(err.message().contains("arrow.uuid")); - - Ok(()) - } - /// Test that deeply nested expressions do not cause a stack overflow. /// /// This test only runs when the `recursive_protection` feature is enabled,