Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
cb7765b
[TESTS][CONNECT] Expand Connect-specific tests for DataFrame column r…
zhengruifeng May 18, 2026
2a0af65
Group divergence tests in SparkConnectColumnResolutionTests with Clas…
zhengruifeng May 18, 2026
28bf33b
Rewrite layered tests in Reyden-style with deeper layered SQL
zhengruifeng May 18, 2026
3350fa5
Drop external-project reference from layered test section comment
zhengruifeng May 18, 2026
0576487
Use pyspark.errors.AnalysisException for both Classic and Connect blocks
zhengruifeng May 18, 2026
bc88433
Rewrite layered tests in DataFrame API; correct Classic/Connect parit…
zhengruifeng May 18, 2026
060e9a6
Rename Connect-side df variables to cdf; drop redundant type imports
zhengruifeng May 18, 2026
1d586fa
Add tests for cases documented in ColumnResolutionHelper.scala
zhengruifeng May 18, 2026
4b50bb1
Move layered tests to ColumnTestsMixin; drop outer shadow
zhengruifeng May 28, 2026
061d37b
Use df.col_name getattr form in layered tests
zhengruifeng May 29, 2026
29f715b
Move resolution tests to ColumnTestsMixin; override divergences
zhengruifeng May 29, 2026
59d2d3b
Hoist Window import to module head; note Connect divergences
zhengruifeng May 29, 2026
e2ff1ae
Fix agg_alias_shadow: use df["c"] since c is not on source df
zhengruifeng May 30, 2026
64e5d39
Fix agg_alias_shadow: use df with column c so df.c works
zhengruifeng May 30, 2026
6e5088a
Correct the union/intersect divergence comments
cloud-fan May 31, 2026
2f08dc1
Reword cube comment: 'rollup' -> 'subtotal' for the (cat, status, NUL…
cloud-fan May 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions python/pyspark/sql/tests/connect/test_parity_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

import unittest

from pyspark.errors import AnalysisException
from pyspark.sql import functions as sf
from pyspark.sql.tests.test_column import ColumnTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase

Expand All @@ -38,6 +40,16 @@ def tearDownClass(cls):
def test_validate_column_types(self):
super().test_validate_column_types()

def test_resolve_after_union(self):
# Connect diverges from Classic here: Union is treated as a leaf when
# walking the plan tree for plan-id resolution, so the left-side plan
# id is never found and CANNOT_RESOLVE_DATAFRAME_COLUMN is thrown
# before any name-based fallback - in both strict and lenient modes.
df1 = self.spark.sql("SELECT 1 AS c")
df2 = self.spark.sql("SELECT 2 AS c")
with self.assertRaisesRegex(AnalysisException, "CANNOT_RESOLVE_DATAFRAME_COLUMN"):
df1.union(df2).select(df1.c).collect()

def test_df_col_resolution_mode(self):
self.assertEqual(
self.spark.conf.get("spark.sql.analyzer.strictDataFrameColumnResolution"),
Expand Down Expand Up @@ -68,6 +80,30 @@ def test_df_col_resolution_mode(self):
"false",
)

# The shadowing trio diverges in lenient mode: where Classic and Connect
# strict raise, lenient resolves the tagged reference by name against the
# current (shadowed) output.

def test_resolve_after_chained_withcolumn_shadow(self):
df = self.spark.sql("SELECT 1 AS c")
rows = (
df.withColumn("c", sf.col("c").cast("string"))
.withColumn("c", sf.col("c").cast("int"))
.select(df.c)
.collect()
)
self.assertEqual([r.c for r in rows], [1])

def test_resolve_after_select_alias_shadow(self):
df = self.spark.sql("SELECT 1 AS c")
rows = df.select(df.c.cast("string").alias("c")).select(df.c).collect()
self.assertEqual([r.c for r in rows], ["1"])

def test_resolve_after_agg_alias_shadow(self):
df = self.spark.sql("SELECT 1 AS c")
rows = df.groupBy().agg(sf.sum("c").alias("c")).select(df.c).collect()
self.assertEqual([r.c for r in rows], [1])


if __name__ == "__main__":
from pyspark.testing import main
Expand Down
Loading