diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py index 92ffea233215f..d832a9ffa7d04 100644 --- a/python/pyspark/sql/tests/test_catalog.py +++ b/python/pyspark/sql/tests/test_catalog.py @@ -588,6 +588,47 @@ def test_catalog_analyze_table(self): spark.sql(f"INSERT INTO {t} VALUES (1)") spark.catalog.analyzeTable(t, noScan=True) + def test_path_current_path_disabled(self): + # current_path() is a regular builtin and resolves even when + # spark.sql.path.enabled is false. The DataFrame and SQL surfaces must agree. + from pyspark.sql.functions import current_path + + spark = self.spark + with self.sql_conf({"spark.sql.path.enabled": False}): + sql_form = spark.sql("SELECT current_path()").collect()[0][0] + self.assertIsInstance(sql_form, str) + self.assertNotEqual(sql_form, "") + api_form = spark.range(1).select(current_path()).collect()[0][0] + self.assertEqual(sql_form, api_form) + + def test_path_set_path_and_current_path(self): + # SET PATH is parsed and applied; current_path() reflects it + # over both the SQL and DataFrame surfaces. Restores DEFAULT_PATH on exit. + from pyspark.sql.functions import current_path + + spark = self.spark + with self.sql_conf({"spark.sql.path.enabled": True}): + try: + spark.sql("SET PATH = spark_catalog.default, system.builtin") + sql_form = spark.sql("SELECT current_path()").collect()[0][0] + self.assertEqual(sql_form, "spark_catalog.default,system.builtin") + api_form = spark.range(1).select(current_path()).collect()[0][0] + self.assertEqual(sql_form, api_form) + finally: + spark.sql("SET PATH = DEFAULT_PATH") + + def test_path_set_path_rejected_when_disabled(self): + # SET PATH must raise UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED + # when the feature flag is off (covers both classic and Connect error paths). + spark = self.spark + with self.sql_conf({"spark.sql.path.enabled": False}): + with self.assertRaises(AnalysisException) as ctx: + spark.sql("SET PATH = spark_catalog.default") + self.assertEqual( + ctx.exception.getCondition(), + "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED", + ) + class CatalogTests(CatalogTestsMixin, ReusedSQLTestCase): pass diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 1e3acbc001b3c..5761028f60234 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -217,6 +217,10 @@ singleTableSchema : colTypeList EOF ; +singlePathElementList + : pathElement (COMMA pathElement)* EOF + ; + singleRoutineParamList : colDefinitionList EOF ; diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java index 23f3acc7230fa..20586f57bcfdd 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java @@ -28,9 +28,10 @@ * views, and functions. *

* Catalog implementations must implement this marker interface to be loaded by - * {@link Catalogs#load(String, SQLConf)}. The loader will instantiate catalog classes using the + * {@link org.apache.spark.sql.connector.catalog.Catalogs#load(String,SQLConf)}. + * The loader will instantiate catalog classes using the * required public no-arg constructor. After creating an instance, it will be configured by calling - * {@link #initialize(String, CaseInsensitiveStringMap)}. + * {@link #initialize(String,CaseInsensitiveStringMap)}. *

* Catalog implementations are registered to a name by adding a configuration option to Spark: * {@code spark.sql.catalog.catalog-name=com.example.YourCatalogClass}. All configuration properties @@ -56,8 +57,8 @@ public interface CatalogPlugin { /** * Called to get this catalog's name. *

- * This method is only called after {@link #initialize(String, CaseInsensitiveStringMap)} is - * called to pass the catalog's name. + * This method is only called after + * {@link #initialize(String,CaseInsensitiveStringMap)} is called to pass the catalog's name. */ String name(); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index faa78e0306364..838cda21b4c39 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -338,10 +338,7 @@ class Analyzer( AnalysisContext.reset() try { AnalysisHelper.markInAnalyzer { - sessionConf match { - case Some(c) => SQLConf.withExistingConf(c) { runAnalysis() } - case None => runAnalysis() - } + runWithSessionConf(runAnalysis()) } } finally { AnalysisContext.reset() @@ -349,16 +346,29 @@ class Analyzer( } else { AnalysisContext.withNewAnalysisContext { AnalysisHelper.markInAnalyzer { - sessionConf match { - case Some(c) => SQLConf.withExistingConf(c) { runAnalysis() } - case None => runAnalysis() - } + runWithSessionConf(runAnalysis()) } } } } } + /** + * Runs `thunk` under the analyzer's [[sessionConf]] for analyzer isolation, but yields to any + * outer [[SQLConf.withExistingConf]] scope (e.g. a SQL UDF / view body that pinned the + * creation-time configs). Falls through unchanged when [[sessionConf]] is unset, or when the + * outer scope already installed a different conf -- otherwise the outer scope's conf would be + * silently clobbered. + */ + private def runWithSessionConf[T](thunk: => T): T = sessionConf match { + case None => thunk + case Some(c) => + SQLConf.getExistingConfIfSet match { + case Some(outer) if outer ne c => thunk + case _ => SQLConf.withExistingConf(c) { thunk } + } + } + /** * Returns a copy of this analyzer that uses the given [[CatalogManager]] for all catalog * lookups. All other configuration (extended rules, checks, etc.) is preserved. Used by @@ -392,13 +402,8 @@ class Analyzer( } } - private def executeSameContext(plan: LogicalPlan): LogicalPlan = sessionConf match { - // Respect explicit nested SQLConf overrides (e.g. persisted SQL UDF/view configs). - // Otherwise, run analysis with the captured session conf for analyzer isolation. - case Some(c) if SQLConf.get ne c => super.execute(plan) - case Some(c) => SQLConf.withExistingConf(c) { super.execute(plan) } - case None => super.execute(plan) - } + private def executeSameContext(plan: LogicalPlan): LogicalPlan = + runWithSessionConf(super.execute(plan)) def resolver: Resolver = conf.resolver @@ -1977,14 +1982,15 @@ class Analyzer( * This is used for special syntax transformations (e.g., COUNT(*) -> COUNT(1)) that * should only apply to builtin functions, not to user-defined functions. * - * In legacy mode (sessionOrder="first"), temp functions shadow builtins, so an - * unqualified name that matches a temp function should NOT be treated as builtin. + * When the effective SQL PATH puts `system.session` before `system.builtin`, temp + * functions shadow builtins, so an unqualified name that matches a temp function + * should NOT be treated as builtin. */ private def matchesFunctionName(nameParts: Seq[String], expectedName: String): Boolean = { if (!FunctionResolution.isUnqualifiedOrBuiltinFunctionName(nameParts, expectedName)) { return false } - if (nameParts.size == 1 && conf.sessionFunctionResolutionOrder == "first") { + if (nameParts.size == 1 && functionResolution.isSessionBeforeBuiltinInPath) { val v1Catalog = catalogManager.v1SessionCatalog !v1Catalog.isTemporaryFunction(FunctionIdentifier(nameParts.head)) } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala index 8f8c77f38feac..4f6aee03967cb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala @@ -71,18 +71,27 @@ class FunctionResolution( private val trimWarningEnabled = new AtomicBoolean(true) - /** Returns the current catalog path, preferring the view's context if resolving a view. */ - private def currentCatalogPath: Seq[String] = { - val ctx = AnalysisContext.get.catalogAndNamespace - if (ctx.nonEmpty) ctx - else (Seq(catalogManager.currentCatalog.name) ++ catalogManager.currentNamespace).toSeq - } - /** True if nameParts is 3-part and the first part is the system catalog name. */ private def isSystemCatalogQualified(nameParts: Seq[String]): Boolean = nameParts.length == 3 && nameParts.head.equalsIgnoreCase(CatalogManager.SYSTEM_CATALOG_NAME) + /** + * True iff `system.session` is searched before `system.builtin` in the effective SQL PATH. + * + * Drives the `count(*) -> count(1)` rewrite (which must skip transformation when a temp + * `count` shadows the builtin) and the `SessionCatalog` security check that blocks creating + * a temp function with a builtin's name. Reads the live PATH via `CatalogManager` and + * applies the same kinds extraction that drives `SessionCatalog`'s fast-path provider, so + * the predicate stays in sync with the lookup loop's actual order. Uses the consolidated + * snapshot helper (SPARK-56939) so the (catalog, namespace, path) triple is observed + * atomically. + */ + def isSessionBeforeBuiltinInPath: Boolean = { + catalogManager.sessionFunctionKindsForUnqualifiedResolution().headOption + .contains(org.apache.spark.sql.catalyst.catalog.SessionCatalog.Temp) + } + /** * Produces the ordered list of candidate names for resolution. Expansion happens in two cases: * @@ -101,18 +110,10 @@ class FunctionResolution( * directly, matching [[RelationResolution.relationResolutionEntries]] so routine order stays * aligned with relation order. */ - private[analysis] def sqlResolutionPathEntriesForAnalysis: Seq[Seq[String]] = { - AnalysisContext.get.resolutionPathEntries match { - case Some(entries) if conf.pathEnabled => entries - case _ => - val pathDefault = currentCatalogPath - catalogManager.sqlResolutionPathEntries( - pathDefault.head, - pathDefault.tail.toSeq, - catalogManager.currentCatalog.name, - catalogManager.currentNamespace.toSeq) - } - } + private[analysis] def sqlResolutionPathEntriesForAnalysis: Seq[Seq[String]] = + catalogManager.resolutionPathEntriesForAnalysis( + AnalysisContext.get.resolutionPathEntries, + AnalysisContext.get.catalogAndNamespace) private def resolutionCandidates(nameParts: Seq[String]): Seq[Seq[String]] = { if (nameParts.size == 1) { @@ -370,7 +371,20 @@ class FunctionResolution( if (nameParts.length == 1) { // Must match [[resolutionCandidates]] / [[resolveFunction]]: single-part names use PATH + // session order, not only the current namespace (LookupCatalog single-part rule). - for (candidate <- resolutionCandidates(nameParts)) { + // `system.session.` and `system.builtin.` candidates were already resolved by + // [[lookupBuiltinOrTempFunction]] / [[lookupBuiltinOrTempTableFunction]] above (they + // route through `identifierFromSystemNameParts`, which only accepts those two + // namespaces); skip them here to avoid redundant catalog calls. Other `system.` + // namespaces -- if any are ever added -- still go through persistent lookup. + val persistentCandidates = resolutionCandidates(nameParts).filterNot { c => + c.length >= 2 && + c.head.equalsIgnoreCase(CatalogManager.SYSTEM_CATALOG_NAME) && { + val ns = c(1) + ns.equalsIgnoreCase(CatalogManager.SESSION_NAMESPACE) || + ns.equalsIgnoreCase(CatalogManager.BUILTIN_NAMESPACE) + } + } + for (candidate <- persistentCandidates) { try { candidate match { case CatalogAndIdentifier(catalog, ident) => @@ -380,7 +394,12 @@ class FunctionResolution( case _ => } } catch { - case NonFatal(_) => + // Only treat explicit "not found" / "forbidden" signals as a miss. Any other failure + // (e.g. permission denied, transient catalog error) propagates. + case _: NoSuchFunctionException + | _: NoSuchNamespaceException + | _: CatalogNotFoundException => + case e: AnalysisException if e.getCondition == "FORBIDDEN_OPERATION" => } } return FunctionType.NotFound diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index ef5862547574b..2a3ed248aa6d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -130,25 +130,9 @@ class RelationResolution( * When PATH is disabled, legacy resolution rules apply. */ private def relationResolutionEntries: Seq[Seq[String]] = { - val pinned = AnalysisContext.get.resolutionPathEntries - if (pinned.isDefined && conf.pathEnabled) { - pinned.get - } else { - val expandCatalog = catalogManager.currentCatalog.name - val expandNamespace = catalogManager.currentNamespace.toSeq - val (pathCatalog, pathNamespace) = - if (isResolvingView) { - val p = AnalysisContext.get.catalogAndNamespace - (p.head, p.tail.toSeq) - } else { - (expandCatalog, expandNamespace) - } - catalogManager.sqlResolutionPathEntries( - pathCatalog, - pathNamespace, - expandCatalog, - expandNamespace) - } + catalogManager.resolutionPathEntriesForAnalysis( + AnalysisContext.get.resolutionPathEntries, + AnalysisContext.get.catalogAndNamespace) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index f7319e9b03e84..185a5503b1107 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -44,7 +44,7 @@ class ResolveCatalogs(val catalogManager: CatalogManager) case c @ CreateVariable(identifiers, _, _) => // We resolve only UnresolvedIdentifiers, and pass on the other nodes val resolved = identifiers.map { - case UnresolvedIdentifier(nameParts, _) => + case u @ UnresolvedIdentifier(nameParts, _) => if (withinLocalVariableScope) { if (c.replace) { throw new AnalysisException( @@ -67,26 +67,22 @@ class ResolveCatalogs(val catalogManager: CatalogManager) val resolvedIdentifier = catalogManager.tempVariableManager.qualify(nameParts.last) - assertValidSessionVariableNameParts(nameParts, resolvedIdentifier) + assertValidSessionVariableNameParts(nameParts, resolvedIdentifier, u.origin) resolvedIdentifier } case plan => plan } c.copy(names = resolved) - case d @ DropVariable(UnresolvedIdentifier(nameParts, _), _) => + case d @ DropVariable(u @ UnresolvedIdentifier(nameParts, _), _) => if (withinLocalVariableScope) { throw new AnalysisException( "UNSUPPORTED_FEATURE.SQL_SCRIPTING_DROP_TEMPORARY_VARIABLE", Map.empty) } - if (nameParts.length == 1 && - !catalogManager.sessionScopeUnqualifiedAllowed( - catalogManager.currentCatalog.name(), - catalogManager.currentNamespace.toSeq)) { - throw QueryCompilationErrors.unresolvedVariableError(nameParts, Seq("SYSTEM", "SESSION")) - } + // DDL on session variables targets `system.session` directly; the SQL path only applies + // to DML (see [[VariableResolution.allowUnqualifiedSessionTempVariableLookup]]). val resolved = catalogManager.tempVariableManager.qualify(nameParts.last) - assertValidSessionVariableNameParts(nameParts, resolved) + assertValidSessionVariableNameParts(nameParts, resolved, u.origin) d.copy(name = resolved) case CreateFunction(UnresolvedIdentifier(nameParts, _), _, _, _, _) @@ -221,13 +217,15 @@ class ResolveCatalogs(val catalogManager: CatalogManager) private def assertValidSessionVariableNameParts( nameParts: Seq[String], - resolvedIdentifier: ResolvedIdentifier): Unit = { + resolvedIdentifier: ResolvedIdentifier, + origin: Origin): Unit = { if (!validSessionVariableName(nameParts)) { throw QueryCompilationErrors.unresolvedVariableError( nameParts, - Seq( + Seq(Seq( resolvedIdentifier.catalog.name(), - resolvedIdentifier.identifier.namespace().head) + resolvedIdentifier.identifier.namespace().head)), + origin ) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveFetchCursor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveFetchCursor.scala index b47332ace2b85..34942fcf08bcc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveFetchCursor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveFetchCursor.scala @@ -64,7 +64,8 @@ class ResolveFetchCursor(val catalogManager: CatalogManager) extends Rule[Logica nameParts = u.nameParts ) match { case Some(variable) => variable.copy(canFold = false) - case _ => throw unresolvedVariableError(u.nameParts, Seq("SYSTEM", "SESSION")) + case _ => throw unresolvedVariableError( + u.nameParts, variableResolution.searchPathEntriesForError, u.origin) } case other => throw SparkException.internalError( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSetVariable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSetVariable.scala index 6ecbc87d35530..ab80fc829cf47 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSetVariable.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSetVariable.scala @@ -62,7 +62,11 @@ class ResolveSetVariable(val catalogManager: CatalogManager) extends Rule[Logica nameParts = u.nameParts ) match { case Some(variable) => variable.copy(canFold = false) - case _ => throw unresolvedVariableError(u.nameParts, Seq("SYSTEM", "SESSION")) + case _ => + throw unresolvedVariableError( + u.nameParts, + variableResolution.searchPathEntriesForError, + u.origin) } case other => throw SparkException.internalError( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/VariableResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/VariableResolution.scala index f8cce0d6f821e..bc85ccfee34c1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/VariableResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/VariableResolution.scala @@ -43,10 +43,25 @@ class VariableResolution( * (PATH enabled and explicitly set). */ private def allowUnqualifiedSessionTempVariableLookup(nameParts: Seq[String]): Boolean = { - if (nameParts.length != 1) return true - catalogManager.sessionScopeUnqualifiedAllowed( - catalogManager.currentCatalog.name(), - catalogManager.currentNamespace.toSeq) + nameParts.length != 1 || catalogManager.isSystemSessionOnPath + } + + /** + * Search-path entries to report in `UNRESOLVED_VARIABLE` for DML lookups (`SET VAR`, + * `FETCH ... INTO`). The full SQL path is reported regardless of how the name was + * qualified, matching the convention used by `TABLE_OR_VIEW_NOT_FOUND` and + * `UNRESOLVED_ROUTINE`. Keeping the rendering qualification-independent also avoids + * re-shaping the error if Spark ever grows struct-field assignment, where 2-part forms + * become genuinely ambiguous. + * + * DDL paths (`DECLARE` / `DROP` name validation in + * [[org.apache.spark.sql.catalyst.analysis.ResolveCatalogs]]) do not consult the SQL path + * and report `[system.session]` directly at their throw site. + */ + def searchPathEntriesForError: Seq[Seq[String]] = { + catalogManager.resolutionPathEntriesForAnalysis( + AnalysisContext.get.resolutionPathEntries, + AnalysisContext.get.catalogAndNamespace) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolver.scala index 1a8658bb764d5..dd70963a79841 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolver.scala @@ -60,7 +60,7 @@ import org.apache.spark.sql.catalyst.util.CollationFactory */ class FunctionResolver( expressionResolver: ExpressionResolver, - functionResolution: FunctionResolution, + protected val functionResolution: FunctionResolution, aggregateExpressionResolver: AggregateExpressionResolver, binaryArithmeticResolver: BinaryArithmeticResolver) extends TreeNodeResolver[UnresolvedFunction, Expression] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolverUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolverUtils.scala index 503c94fc9cdf6..3c5a3f1832e8d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolverUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolverUtils.scala @@ -19,7 +19,9 @@ package org.apache.spark.sql.catalyst.analysis.resolver import java.util.Locale +import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.{ + FunctionResolution, ResolvedStar, Star, UnresolvedFunction, @@ -35,6 +37,7 @@ import org.apache.spark.sql.internal.SQLConf */ trait FunctionResolverUtils { protected def expressionResolver: ExpressionResolver + protected def functionResolution: FunctionResolution protected def conf: SQLConf private val scopes = expressionResolver.getNameScopes @@ -99,7 +102,21 @@ trait FunctionResolverUtils { unresolvedFunction: UnresolvedFunction, normalizeFunctionName: Boolean = true ): Boolean = { - !unresolvedFunction.isDistinct && isCount(unresolvedFunction, normalizeFunctionName) + !unresolvedFunction.isDistinct && + isCount(unresolvedFunction, normalizeFunctionName) && + !isUnqualifiedCountShadowedByTemp(unresolvedFunction) + } + + /** + * Keep single-pass behavior aligned with fixed-point: when PATH puts system.session before + * system.builtin and a temp `count` exists, unqualified `count(*)` must not be rewritten to + * `count(1)`. + */ + private def isUnqualifiedCountShadowedByTemp(unresolvedFunction: UnresolvedFunction): Boolean = { + unresolvedFunction.nameParts.length == 1 && + functionResolution.isSessionBeforeBuiltinInPath && + functionResolution.catalogManager.v1SessionCatalog + .isTemporaryFunction(FunctionIdentifier(unresolvedFunction.nameParts.head)) } private def isCount( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/HigherOrderFunctionResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/HigherOrderFunctionResolver.scala index 6b90a5c05baf1..676ef381f2f17 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/HigherOrderFunctionResolver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/HigherOrderFunctionResolver.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.errors.QueryCompilationErrors */ class HigherOrderFunctionResolver( protected val expressionResolver: ExpressionResolver, - functionResolution: FunctionResolution) + protected val functionResolution: FunctionResolution) extends TreeNodeResolver[UnresolvedFunction, Expression] with ProducesUnresolvedSubtree with CoercesExpressionTypes diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 9c863a7b55fe7..9e5a2176612cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -113,18 +113,73 @@ class SessionCatalog( identifier.copy(funcName = "") == SESSION_NAMESPACE_TEMPLATE /** - * Session function kinds in resolution order for unqualified lookups. - * Matches [[SQLConf.sessionFunctionResolutionOrder]]: "first" (session first), - * "second" (default), "last" (builtin only; session tried after persistent). + * When set, unqualified builtin/temp function resolution uses this fixed kind order instead of + * [[catalogManagerForSessionFunctionKinds]] / [[SQLConf.systemPathOrder]]. For unit tests only; + * production relies on the catalog manager binding. */ - private def sessionFunctionKindsInResolutionOrder: Seq[SessionFunctionKind] = { - conf.sessionFunctionResolutionOrder match { - case "first" => Seq(Temp, Builtin) - case "last" => Seq(Builtin) - case _ => Seq(Builtin, Temp) // "second" (default) - } + @volatile private var sessionFunctionKindsTestOverride: Option[Seq[SessionFunctionKind]] = None + + /** + * Live PATH for session function kinds. Set from + * [[org.apache.spark.sql.connector.catalog.CatalogManager]]'s constructor via + * [[bindCatalogManagerForSessionFunctionKinds]] so unqualified lookups and the security check + * that blocks temp functions from shadowing builtins read the effective SQL PATH (post-`SET + * PATH`, with [[SQLConf.DEFAULT_PATH]] and [[SQLConf.defaultPathOrder]] fallbacks already + * applied). + * + * When unset (e.g. standalone [[SessionCatalog]] in tests), kinds derive from + * [[SQLConf.systemPathOrder]] -- the seeded default path -- without assuming other legacy + * resolution-order conf beyond seeding `defaultPathOrder`. + */ + @volatile private var catalogManagerForSessionFunctionKinds: Option[CatalogManager] = None + + /** + * Wire live PATH-derived session function kinds from the session [[CatalogManager]]. + * Called once from [[org.apache.spark.sql.connector.catalog.CatalogManager]]'s constructor. + */ + private[sql] def bindCatalogManagerForSessionFunctionKinds(cm: CatalogManager): Unit = { + catalogManagerForSessionFunctionKinds = Some(cm) + } + + /** + * Pin session function kinds for tests (`None` clears). Uses `private[sql]` so tests under the + * `org.apache.spark.sql` package can control ordering without a public catalog API. + */ + private[sql] def setSessionFunctionKindsTestOverride( + kinds: Option[Seq[SessionFunctionKind]]): Unit = { + sessionFunctionKindsTestOverride = kinds } + /** + * Session function kinds in resolution order for unqualified lookups: test override if set, + * else live PATH from [[catalogManagerForSessionFunctionKinds]], else + * [[SQLConf.systemPathOrder]]. + * + * MUST NOT be called while holding [[SessionCatalog]]'s intrinsic lock (see SPARK-56939): + * the path-driven branch delegates to [[CatalogManager]], which has its own intrinsic lock + * and re-enters this catalog through `USE` paths, so nesting the two locks here would + * deadlock. + */ + private def sessionFunctionKindsInResolutionOrder: Seq[SessionFunctionKind] = + sessionFunctionKindsTestOverride.getOrElse { + catalogManagerForSessionFunctionKinds match { + case Some(cm) => + // Use the consolidated helper so unqualified resolution observes a consistent + // (currentCatalog, currentNamespace, path) triple in a single critical section. + cm.sessionFunctionKindsForUnqualifiedResolution() + case None => + CatalogManager.systemFunctionKindsFromPath(conf.systemPathOrder) + } + } + + /** + * True iff the effective SQL PATH searches `system.session` before `system.builtin`. Used + * to gate the security check that blocks temporary functions from silently shadowing a + * builtin of the same name. + */ + private def sessionFirstInPath: Boolean = + sessionFunctionKindsInResolutionOrder.headOption.contains(Temp) + /** * Checks if a namespace represents temporary functions. */ @@ -2081,12 +2136,11 @@ class SessionCatalog( qualifyIdentifier(func) } - // Security check: When legacy mode is enabled, block SQL-created temporary functions - // from shadowing builtin functions (to preserve master behavior) - // Scala UDFs are still allowed to shadow in legacy mode - // We throw ROUTINE_ALREADY_EXISTS to indicate the builtin function already exists - val sessionFirst = conf.sessionFunctionResolutionOrder == "first" - if (func.database.isEmpty && sessionFirst && !overrideIfExists) { + // Security check: when the effective SQL PATH searches `system.session` before + // `system.builtin`, block creating an unqualified temporary function whose name + // collides with a builtin so it cannot silently shadow that builtin via unqualified + // resolution. We throw ROUTINE_ALREADY_EXISTS to indicate the conflict. + if (func.database.isEmpty && sessionFirstInPath && !overrideIfExists) { val funcName = func.funcName // Check if function exists in builtin namespace (extensions are stored as builtins) val builtinIdent = FunctionRegistry.builtinFunctionIdentifier(funcName) @@ -2206,10 +2260,11 @@ class SessionCatalog( // Use FunctionIdentifier with session namespace for temporary functions val tempIdentifier = tempFunctionIdentifier(function.name.funcName) - // Security check: When legacy mode is enabled, block SQL-created temporary functions - // from shadowing builtin functions (including extensions) as a safeguard - // We throw ROUTINE_ALREADY_EXISTS to indicate the builtin function already exists - if ((conf.sessionFunctionResolutionOrder == "first") && !overrideIfExists) { + // Security check: when the effective SQL PATH searches `system.session` before + // `system.builtin`, block creating an unqualified temporary function whose name + // collides with a builtin (including extensions) so it cannot silently shadow that + // builtin via unqualified resolution. + if (sessionFirstInPath && !overrideIfExists) { val funcName = function.name.funcName // Check if function exists in builtin namespace (extensions are stored as builtins) val builtinIdent = FunctionRegistry.builtinFunctionIdentifier(funcName) @@ -2515,7 +2570,14 @@ class SessionCatalog( * Look up the `ExpressionInfo` of the given function by name. * Resolution order follows the configured path (e.g. builtin then session). */ - def lookupBuiltinOrTempTableFunction(name: String): Option[ExpressionInfo] = synchronized { + def lookupBuiltinOrTempTableFunction(name: String): Option[ExpressionInfo] = { + // Intentionally not `synchronized` on this [[SessionCatalog]]: resolution order may call + // into [[CatalogManager]] (e.g. [[CatalogManager.sqlResolutionPathEntries]] via + // [[sessionFunctionKindsInResolutionOrder]]), which synchronizes on the manager. The + // SPARK-56939 fix removed the reverse `CatalogManager -> SessionCatalog` nest from the + // `USE`-style mutators that previously closed the deadlock cycle; keeping this method + // un-synchronized preserves the `SessionCatalog -> CatalogManager` direction as the + // single allowed ordering, so the invariant survives future regressions. lookupFunctionWithShadowing(name, tableFunctionRegistry, checkBuiltinOperators = false) } @@ -2667,7 +2729,12 @@ class SessionCatalog( /** * Look up the [[ExpressionInfo]] associated with the specified function, assuming it exists. */ - def lookupFunctionInfo(name: FunctionIdentifier): ExpressionInfo = synchronized { + def lookupFunctionInfo(name: FunctionIdentifier): ExpressionInfo = { + // Intentionally not `synchronized` on this [[SessionCatalog]] (see + // [[lookupBuiltinOrTempTableFunction]]): unqualified builtin/temp resolution uses + // [[sessionFunctionKindsInResolutionOrder]] / [[CatalogManager]], and SPARK-56939 + // requires this catalog's intrinsic lock to NEVER be held when reaching into + // [[CatalogManager]] from a function-resolution path. if (name.database.isEmpty) { lookupBuiltinOrTempFunction(name.funcName) .orElse(lookupBuiltinOrTempTableFunction(name.funcName)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/VariableManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/VariableManager.scala index 4c7d8db6604b6..e9edd45fae514 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/VariableManager.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/VariableManager.scala @@ -24,6 +24,7 @@ import scala.collection.mutable import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{FakeSystemCatalog, ResolvedIdentifier} import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.connector.catalog.{CatalogManager, Identifier} import org.apache.spark.sql.connector.catalog.CatalogManager.{SESSION_NAMESPACE, SYSTEM_CATALOG_NAME} import org.apache.spark.sql.errors.DataTypeErrorsBase @@ -49,8 +50,11 @@ trait VariableManager { * * @param nameParts Name parts of the variable. * @param varDef The new VariableDefinition of the variable. + * @param origin Origin of the SET reference, used in + * [[org.apache.spark.sql.errors.QueryCompilationErrors.unresolvedVariableError]] + * if the variable is unexpectedly absent at execution time. */ - def set(nameParts: Seq[String], varDef: VariableDefinition): Unit + def set(nameParts: Seq[String], varDef: VariableDefinition, origin: Origin): Unit /** * Get an existing variable. @@ -130,11 +134,14 @@ class TempVariableManager extends VariableManager with DataTypeErrorsBase { variables.put(name, varDef) } - override def set(nameParts: Seq[String], varDef: VariableDefinition): Unit = synchronized { + override def set( + nameParts: Seq[String], + varDef: VariableDefinition, + origin: Origin): Unit = synchronized { val name = nameParts.last // Sanity check as this is already checked in ResolveSetVariable. if (!variables.contains(name)) { - throw unresolvedVariableError(nameParts, Seq("SYSTEM", "SESSION")) + throw unresolvedVariableError(nameParts, Seq(Seq("SYSTEM", "SESSION")), origin) } variables.put(name, varDef) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala index 216136d8a7c82..29bf924f244e8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.parser.ParserUtils.withOrigin import org.apache.spark.sql.catalyst.plans.logical.{CompoundPlanStatement, LogicalPlan} import org.apache.spark.sql.catalyst.trees.Origin +import org.apache.spark.sql.connector.catalog.PathElement import org.apache.spark.sql.errors.QueryParsingErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType @@ -110,6 +111,18 @@ abstract class AbstractSqlParser extends AbstractParser with ParserInterface { } } + /** + * Parse the right-hand side of `SET PATH = ...` (a comma-separated list of path elements). + * Used by [[org.apache.spark.sql.connector.catalog.CatalogManager]] to honor the + * [[SQLConf.DEFAULT_PATH]] conf without re-implementing the SET PATH grammar. + */ + private[sql] def parsePathElements(sqlText: String): Seq[PathElement] = parse(sqlText) { parser => + val ctx = parser.singlePathElementList() + withErrorHandling(ctx, Some(sqlText)) { + astBuilder.visitSinglePathElementList(ctx) + } + } + def withErrorHandling[T](ctx: ParserRuleContext, sqlText: Option[String])(toResult: => T): T = { withOrigin(ctx, sqlText) { try { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 2b282467b305b..cdb01c36d744b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -47,7 +47,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, CollationFactory, DateTimeUtils, EvaluateUnresolvedInlineTable, IntervalUtils} import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTime, stringToTimestamp, stringToTimestampWithoutTimeZone} -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, ChangelogInfo, SupportsNamespaces, TableCatalog, TableWritePrivilege} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, ChangelogInfo, PathElement, SupportsNamespaces, TableCatalog, TableWritePrivilege} import org.apache.spark.sql.connector.catalog.ChangelogRange.{TimestampRange, UnboundedRange, VersionRange} import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform} @@ -708,6 +708,26 @@ class AstBuilder extends DataTypeAstBuilder visitMultipartIdentifier(ctx.multipartIdentifier) } + override def visitSinglePathElementList( + ctx: SinglePathElementListContext): Seq[PathElement] = withOrigin(ctx) { + ctx.pathElement().asScala.map(visitPathElement).toSeq + } + + override def visitPathElement(ctx: PathElementContext): PathElement = withOrigin(ctx) { + if (ctx.DEFAULT_PATH() != null) PathElement.DefaultPath + else if (ctx.SYSTEM_PATH() != null) PathElement.SystemPath + else if (ctx.PATH() != null) PathElement.PathRef + else if (ctx.CURRENT_DATABASE() != null || ctx.CURRENT_SCHEMA() != null) { + PathElement.CurrentSchema + } else { + val parts = visitMultipartIdentifier(ctx.multipartIdentifier()) + if (parts.length < 2) { + throw QueryCompilationErrors.invalidSqlPathSchemaReferenceError(parts.mkString(".")) + } + PathElement.SchemaInPath(parts) + } + } + override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) { typedVisit[DataType](ctx.dataType) } @@ -7140,11 +7160,13 @@ class AstBuilder extends DataTypeAstBuilder dataTypeOpt.map { dt => default.copy(child = Cast(default.child, dt)) }.getOrElse(default) } CreateVariable( - ctx.identifierReferences.asScala.map ( - identifierReference => { - withIdentClause(identifierReference, UnresolvedIdentifier(_)) - } - ).toSeq, + ctx.identifierReferences.asScala.map { identifierReference => + // Give each `UnresolvedIdentifier` its own origin pointing at the variable name + // fragment so analyzer-time errors (e.g. UNRESOLVED_VARIABLE) can highlight just + // that identifier rather than the whole `DECLARE ...` statement. + withIdentClause(identifierReference, parts => + withOrigin(identifierReference) { UnresolvedIdentifier(parts) }) + }.toSeq, defaultExpression, ctx.REPLACE() != null ) @@ -7160,7 +7182,8 @@ class AstBuilder extends DataTypeAstBuilder */ override def visitDropVariable(ctx: DropVariableContext): LogicalPlan = withOrigin(ctx) { DropVariable( - withIdentClause(ctx.identifierReference(), UnresolvedIdentifier(_)), + withIdentClause(ctx.identifierReference(), parts => + withOrigin(ctx.identifierReference()) { UnresolvedIdentifier(parts) }), ctx.EXISTS() != null ) } @@ -7285,7 +7308,7 @@ class AstBuilder extends DataTypeAstBuilder // The SET variable source is a query val variables = multipartIdentifierList.multipartIdentifier.asScala.map { variableIdent => val varName = visitMultipartIdentifier(variableIdent) - UnresolvedAttribute(varName) + withOrigin(variableIdent) { UnresolvedAttribute(varName) } }.toSeq SetVariable(variables, visitQuery(query)) } else { @@ -7297,7 +7320,7 @@ class AstBuilder extends DataTypeAstBuilder case n: NamedExpression => n case e => Alias(e, varIdent.last)() } - (UnresolvedAttribute(varIdent), varNamedExpr) + (withOrigin(assign.key) { UnresolvedAttribute(varIdent) }, varNamedExpr) }.toSeq.unzip SetVariable(variables, Project(values, OneRowRelation())) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala index 0a2ad28051dd3..3aad52dbd1d01 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.connector.catalog +import java.util.concurrent.atomic.AtomicReference + import scala.collection.mutable import scala.util.Try @@ -24,6 +26,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SQLConfHelper import org.apache.spark.sql.catalyst.catalog.{SessionCatalog, TempVariableManager} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.connector.catalog.transactions.Transaction import org.apache.spark.sql.errors.QueryCompilationErrors @@ -52,6 +55,12 @@ class CatalogManager( // TODO: create a real SYSTEM catalog to host `TempVariableManager` under the SESSION namespace. val tempVariableManager: TempVariableManager = new TempVariableManager + // Wire `SessionCatalog`'s fast-path kinds to the live SQL PATH. The kinds list itself is + // pure data conversion (system entries from the path, in path order); the *decision* to use + // path-order kinds for unqualified lookups lives at the Strategy layer (see callers of + // [[CatalogManager.systemFunctionKindsFromPath]]). + v1SessionCatalog.bindCatalogManagerForSessionFunctionKinds(this) + def catalog(name: String): CatalogPlugin = synchronized { if (name.equalsIgnoreCase(SESSION_CATALOG_NAME)) { v2SessionCatalog @@ -123,23 +132,109 @@ class CatalogManager( } } - def setCurrentNamespace(namespace: Array[String]): Unit = synchronized { - if (isSessionCatalog(currentCatalog) && namespace.length == 1) { + def setCurrentNamespace(namespace: Array[String]): Unit = { + // SPARK-56939: do NOT hold [[CatalogManager]]'s intrinsic lock across the callbacks below. + // [[v1SessionCatalog.setCurrentDatabaseWithNameCheck]] briefly synchronizes on + // [[SessionCatalog]], and concurrent unqualified function resolution acquires the + // [[SessionCatalog]] lock and then reaches into [[CatalogManager]] via + // [[sqlResolutionPathEntries]]; nesting the manager lock outside the catalog lock here + // would invert that order and deadlock. Snapshot the dispatch decision under the lock, + // run callbacks outside it, then publish the new namespace under the lock again. + // + // Concurrency trade-offs versus the pre-SPARK-56939 atomic version (v1-side and + // CM-side drift modes): + // + // (a) v1-side drift. The `isSession` snapshot can drift if a concurrent + // [[setCurrentCatalog]] switches to a v2 catalog between this read and the v1 + // callback below -- the callback would still touch `v1.currentDb` even though + // the active catalog is no longer the session catalog. A later switch back to + // the session catalog resets `v1.currentDb` to `default` (see + // [[setCurrentCatalog]]), so long-term state remains consistent; only the + // intermediate observation is novel. + // + // (b) CM-side publish-overwrite drift (sticky). Between the v1 callback returning + // and the publish below, a concurrent [[setCurrentCatalog]] can complete fully + // -- switching `_currentCatalogName` to (say) a v2 catalog and clearing + // `_currentNamespace = None` -- before this method's publish overwrites that + // with `Some(namespace)`. End state: `_currentNamespace = Some(namespace)` is + // published under a different `_currentCatalogName` than the one observed when + // [[isSession]] was snapshotted at the top. Unlike (a) there is no analogous + // auto-recovery; the mismatch sticks until the next `USE`. This is still + // last-writer-wins for two racing `USE` commands, which is the conventional + // expectation, so it is accepted as a trade-off against the deadlock alternative. + val isSession = synchronized(isSessionCatalog(currentCatalog)) + if (isSession && namespace.length == 1) { v1SessionCatalog.setCurrentDatabaseWithNameCheck( namespace.head, _ => assertNamespaceExist(namespace)) } else { assertNamespaceExist(namespace) } - _currentNamespace = Some(namespace) + synchronized { + _currentNamespace = Some(namespace) + } } import CatalogManager.SessionPathEntry private var _sessionPath: Option[Seq[SessionPathEntry]] = None - /** Returns the raw stored session path entries, or None if no path is set. */ - def sessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized { _sessionPath } + /** + * Cache for [[confDefaultPathEntries]]: stores the expanded [[SessionPathEntry]] list keyed + * on the trimmed [[SQLConf#DEFAULT_PATH]] string and + * [[SQLConf#SESSION_FUNCTION_RESOLUTION_ORDER]] value (the only conf that affects the + * expansion of `DEFAULT_PATH` / `SYSTEM_PATH` tokens). + * `CurrentSchemaEntry` markers are preserved unresolved so the cache stays valid across + * `USE SCHEMA`. + */ + private val confDefaultPathCache = + new AtomicReference[Option[(String, String, Seq[SessionPathEntry])]](None) + + /** + * Returns the effective session path entries: the explicit `SET PATH` value if stored, + * else the parsed [[SQLConf#DEFAULT_PATH]] conf if non-empty (mirroring how + * [[currentCatalog]] falls back to [[SQLConf#DEFAULT_CATALOG]]). Returns `None` when + * [[SQLConf#PATH_ENABLED]] is false or both sources are empty. + */ + def sessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized { + if (!conf.pathEnabled) None + else _sessionPath.orElse(confDefaultPathEntries) + } + + /** Raw `_sessionPath` (post-`SET PATH`), without the [[SQLConf#DEFAULT_PATH]] fallback. */ + def storedSessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized { _sessionPath } + + /** + * Parsed and expanded [[SQLConf#DEFAULT_PATH]] value, or `None` when the conf is empty. + * Reuses the SET PATH grammar via + * [[org.apache.spark.sql.catalyst.parser.AbstractSqlParser#parsePathElements]] (via + * [[org.apache.spark.sql.catalyst.parser.CatalystSqlParser]]). An inner + * `DEFAULT_PATH` token resolves to the spark-builtin default ordering (cycle break). + * + * Unlike `SET PATH`, this does NOT run a duplicate check: lookup uses first-match + * resolution, so any redundant entry (including ones that only collide after a later + * `USE SCHEMA`) is dead code rather than an error. Cached so the hot path is a single + * atomic load on conf-stable sessions. + */ + def confDefaultPathEntries: Option[Seq[SessionPathEntry]] = { + val confValue = conf.defaultPath + if (confValue == null || confValue.trim.isEmpty) { + confDefaultPathCache.set(None) + None + } else { + val trimmed = confValue.trim + val sessionOrder = conf.sessionFunctionResolutionOrder + val expanded = confDefaultPathCache.get() match { + case Some((k, ord, cached)) if k == trimmed && ord == sessionOrder => cached + case _ => + val elements = CatalystSqlParser.parsePathElements(trimmed) + val computed = PathElement.expand(elements, conf, this, isConfDefaultExpansion = true) + confDefaultPathCache.set(Some((trimmed, sessionOrder, computed))) + computed + } + if (expanded.isEmpty) None else Some(expanded) + } + } def setSessionPath(entries: Seq[SessionPathEntry]): Unit = synchronized { _sessionPath = Some(entries) @@ -150,18 +245,27 @@ class CatalogManager( } private[sql] def copySessionPathFrom(other: CatalogManager): Unit = synchronized { - _sessionPath = other.sessionPathEntries + _sessionPath = other.storedSessionPathEntries } /** * String form of the current resolution path for CURRENT_PATH(). - * When PATH is enabled and a session path is stored, formats the effective path entries - * with markers expanded. Otherwise falls back to the legacy resolutionSearchPath. + * When PATH is enabled and a session path is in effect (stored or via + * [[SQLConf#DEFAULT_PATH]]), formats the resolved entries. Otherwise falls back to the legacy + * resolutionSearchPath. + * + * SPARK-56939 note: this is currently the only intentional `CatalogManager.synchronized -> + * SessionCatalog.synchronized` nest left in this class. The transitive call into + * [[v1SessionCatalog.getCurrentDatabase]] happens via [[currentNamespace]], which fetches + * the v1 current database under the CM lock. It is safe today because no code path holds + * [[SessionCatalog]]'s intrinsic lock while waiting on [[CatalogManager]]'s -- the + * SPARK-56939 fix removed every such SC->CM ordering. Any future change that introduces a + * new SC->CM ordering must take `currentPathString` (or any other CM->SC nest) into + * account to avoid resurrecting the deadlock. */ def currentPathString: String = synchronized { import CatalogV2Implicits._ - val stored = if (conf.pathEnabled) _sessionPath else None - stored match { + sessionPathEntries match { case Some(entries) => val resolved = CatalogManager.resolvePathEntries( entries, currentCatalog.name(), currentNamespace.toSeq) @@ -174,8 +278,9 @@ class CatalogManager( /** * Ordered catalog/schema path entries for resolving unqualified SQL object names. - * When PATH is off or unset, applies [[SQLConf.defaultPathOrder]] (legacy). - * When PATH is explicitly set, uses the resolved stored path entries. + * When PATH is off or unset, applies [[SQLConf#defaultPathOrder]] (legacy). + * When PATH is in effect (stored or via the [[SQLConf#DEFAULT_PATH]] conf), uses the + * resolved entries. */ def sqlResolutionPathEntries( pathDefaultCatalog: String, @@ -185,8 +290,7 @@ class CatalogManager( val defaultEntry = if (pathDefaultNamespace.isEmpty) Seq(pathDefaultCatalog) else pathDefaultCatalog +: pathDefaultNamespace - val stored = if (conf.pathEnabled) _sessionPath else None - stored match { + sessionPathEntries match { case Some(entries) => CatalogManager.resolvePathEntries(entries, expandCatalog, expandNamespace) case None => @@ -202,12 +306,106 @@ class CatalogManager( currentCatalog, currentNamespace, currentCatalog, currentNamespace) - /** True if [[sqlResolutionPathEntries]] includes `system.session`. */ - def sessionScopeUnqualifiedAllowed( - currentCatalog: String, - currentNamespace: Seq[String]): Boolean = - sqlResolutionPathEntries(currentCatalog, currentNamespace) - .exists(CatalogManager.isSystemSessionPathEntry) + /** + * Snapshot the live PATH-derived [[SessionCatalog.SessionFunctionKind]] order used by + * unqualified function/table-function resolution. + * + * The `(currentCatalog, _currentNamespace, sessionPath)` triple is read together inside a + * single CM critical section so a concurrent `USE` / `SET PATH` cannot return a torn + * snapshot for those three fields (e.g. catalog from one observation, explicit namespace + * from another). + * + * The `v1SessionCatalog.getCurrentDatabase` read needed for the default-namespace fallback + * is taken OUTSIDE the CM lock and is therefore intentionally racy w.r.t. a concurrent + * `USE SCHEMA`. That staleness is harmless for this helper's output: this method consumes + * `effectiveNs` only to expand `CURRENT_SCHEMA` markers in the SQL path, and + * [[CatalogManager.systemFunctionKindsFromPath]] only retains literal `system.builtin` / + * `system.session` entries from the resolved path -- it never inspects any + * `(catalog, namespace)` derived from `v1`. So if `v1CurrentDb` lags by one `USE SCHEMA`, + * a `CURRENT_SCHEMA` entry might briefly resolve to the previous database, but the kinds + * list (the only thing returned here) is unaffected. Moving the read inside the CM lock + * would re-introduce the SPARK-56939 lock-order inversion this helper exists to avoid. + * + * Callers (e.g. [[SessionCatalog.sessionFunctionKindsInResolutionOrder]], + * [[org.apache.spark.sql.catalyst.analysis.FunctionResolution.isSessionBeforeBuiltinInPath]]) + * MUST NOT hold [[SessionCatalog]]'s intrinsic lock when invoking this method. + */ + def sessionFunctionKindsForUnqualifiedResolution(): Seq[SessionCatalog.SessionFunctionKind] = { + // SPARK-56939: read v1's current database before taking the CM lock; see the method + // doc for why the resulting staleness is harmless for the kinds list. + val v1CurrentDb = v1SessionCatalog.getCurrentDatabase + val pathEntries = synchronized { + val catName = currentCatalog.name() + val effectiveNs: Seq[String] = _currentNamespace.map(_.toSeq).getOrElse { + if (catName == SESSION_CATALOG_NAME) { + Seq(v1CurrentDb) + } else { + currentCatalog.defaultNamespace().toSeq + } + } + sqlResolutionPathEntries(catName, effectiveNs) + } + CatalogManager.systemFunctionKindsFromPath(pathEntries) + } + + /** + * True if `system.session` is on the SQL path. Only literal path entries can match: the + * [[org.apache.spark.sql.connector.catalog.CatalogManager.CurrentSchemaEntry$]] marker expands to + * `currentCatalog.name() +: currentNamespace`, and + * `system` is not a registered catalog (it is a synthetic namespace served via + * [[org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog]] / `lookupBuiltinOrTempFunction`, + * not loadable via [[catalog]]), so `currentCatalog.name()` cannot be `"system"`. If that + * invariant ever changes, this short-circuit must be revisited. + * Inspecting effective entries directly avoids loading the configured default catalog. + */ + def isSystemSessionOnPath: Boolean = synchronized { + if (!conf.pathEnabled) return true + sessionPathEntries match { + case None => true + case Some(entries) => entries.exists { + case CatalogManager.LiteralPathEntry(parts) => + CatalogManager.isSystemSessionPathEntry(parts) + case _ => false + } + } + } + + /** + * Single source of truth for analysis-time resolution path entries used by relation, routine, + * and procedure resolution. When `pinnedEntries` are set (a view or SQL function body's + * persisted frozen path) and PATH is enabled, returns them as-is so unqualified lookups follow + * the creation-time path. Otherwise falls back to [[sqlResolutionPathEntries]] using the view's + * catalog/namespace as the path default (so unqualified names inside a view body see the view's + * home schema first), while always expanding markers like CURRENT_SCHEMA against the live + * session catalog/namespace. + * + * @param pinnedEntries persisted frozen path entries from view / SQL function metadata + * (typically `AnalysisContext.resolutionPathEntries`). + * @param viewCatalogAndNamespace the view's catalog and namespace + * (typically `AnalysisContext.catalogAndNamespace`); empty when + * not resolving a view body. + */ + def resolutionPathEntriesForAnalysis( + pinnedEntries: Option[Seq[Seq[String]]], + viewCatalogAndNamespace: Seq[String]): Seq[Seq[String]] = { + pinnedEntries match { + case Some(entries) if conf.pathEnabled => entries + case _ => + val expandCatalog = currentCatalog.name() + val expandNamespace = currentNamespace.toSeq + val (pathCatalog, pathNamespace) = + if (viewCatalogAndNamespace.nonEmpty) { + (viewCatalogAndNamespace.head, viewCatalogAndNamespace.tail.toSeq) + } else { + (expandCatalog, expandNamespace) + } + sqlResolutionPathEntries( + pathCatalog, + pathNamespace, + expandCatalog, + expandNamespace) + } + } private var _currentCatalogName: Option[String] = None @@ -215,15 +413,41 @@ class CatalogManager( catalog(_currentCatalogName.getOrElse(conf.getConf(SQLConf.DEFAULT_CATALOG))) } - def setCurrentCatalog(catalogName: String): Unit = synchronized { - // `setCurrentCatalog` is noop if it doesn't switch to a different catalog. - if (currentCatalog.name() != catalogName) { - catalog(catalogName) - _currentCatalogName = Some(catalogName) - _currentNamespace = None + def setCurrentCatalog(catalogName: String): Unit = { + // SPARK-56939: see [[setCurrentNamespace]]. Avoid nesting [[CatalogManager]]'s lock + // across [[v1SessionCatalog.setCurrentDatabase]] (which synchronizes on + // [[SessionCatalog]]) to prevent a lock-order inversion with concurrent unqualified + // function resolution. + val needsSwitch = synchronized { + // `setCurrentCatalog` is noop if it doesn't switch to a different catalog. + if (currentCatalog.name() != catalogName) { + // Force-load the named catalog while holding the manager lock to keep the + // not-found error semantics; if loading fails, throw before mutating state. + catalog(catalogName) + true + } else { + false + } + } + if (needsSwitch) { // Reset the current database of v1 `SessionCatalog` when switching current catalog, so that // when we switch back to session catalog, the current namespace definitely is ["default"]. + // Run this BEFORE publishing the new catalog name so that if a reader observes the new + // catalog, the v1 state is already consistent with it. + // + // Concurrency trade-off versus the pre-SPARK-56939 atomic version: between this v1 write + // and the publish below, a concurrent reader of `currentNamespace` sees + // `(oldCatalog, v1.currentDb = default)`. When the old catalog is the session catalog + // (the common case for `USE CATALOG`), the user's previous namespace is briefly invisible + // to that reader until the new name is published. The opposite torn observation + // (`newCatalog`, stale `v1.currentDb`) is avoided by this ordering. This trade-off + // (transient invisibility instead of transient inconsistency, exchanged for breaking the + // deadlock cycle) is accepted; the long-term post-switch state is the same as before. v1SessionCatalog.setCurrentDatabase(conf.defaultDatabase) + synchronized { + _currentCatalogName = Some(catalogName) + _currentNamespace = None + } } } @@ -233,11 +457,20 @@ class CatalogManager( } // Clear all the registered catalogs. Only used in tests. - private[sql] def reset(): Unit = synchronized { - catalogs.clear() - _currentNamespace = None - _currentCatalogName = None - _sessionPath = None + // + // SPARK-56939: apply the same split-lock pattern as [[setCurrentNamespace]] / + // [[setCurrentCatalog]] so the locking contract is uniform across every CM mutator that + // calls back into [[v1SessionCatalog]]. Test-only callers don't race against unqualified + // function resolution today, but keeping the contract symmetric prevents future test + // helpers (e.g. session reset in a concurrent harness) from reintroducing the cycle. + private[sql] def reset(): Unit = { + synchronized { + catalogs.clear() + _currentNamespace = None + _currentCatalogName = None + _sessionPath = None + confDefaultPathCache.set(None) + } v1SessionCatalog.setCurrentDatabase(conf.defaultDatabase) } } @@ -282,16 +515,37 @@ private[sql] object CatalogManager extends Logging { /** * True if the multipart name uses the session temp view namespace: two-part `session.view` * or three-part `system.session.view`. The two-part form can also denote a persistent relation - * in schema `session`; resolution order is controlled by [[SQLConf.prioritizeSystemCatalog]]. + * in schema `session`; resolution order is controlled by [[SQLConf#prioritizeSystemCatalog]]. */ def isSessionQualifiedViewName(nameParts: Seq[String]): Boolean = { (nameParts.length == 2 && nameParts.head.equalsIgnoreCase(SESSION_NAMESPACE)) || isFullyQualifiedSystemSessionViewName(nameParts) } - /** True if a SQL path entry is the well-known `system.session` entry. */ + /** True if a SQL path entry is the well-known `system.session` entry (case-insensitive). */ def isSystemSessionPathEntry(parts: Seq[String]): Boolean = - parts == Seq(SYSTEM_CATALOG_NAME, SESSION_NAMESPACE) + parts.length == 2 && + parts.head.equalsIgnoreCase(SYSTEM_CATALOG_NAME) && + parts(1).equalsIgnoreCase(SESSION_NAMESPACE) + + /** True if a SQL path entry is the well-known `system.builtin` entry (case-insensitive). */ + def isSystemBuiltinPathEntry(parts: Seq[String]): Boolean = + parts.length == 2 && + parts.head.equalsIgnoreCase(SYSTEM_CATALOG_NAME) && + parts(1).equalsIgnoreCase(BUILTIN_NAMESPACE) + + /** + * Extract `system.builtin` / `system.session` entries from a resolved PATH, mapped to + * [[SessionCatalog.SessionFunctionKind]] in path order. Pure data conversion -- callers + * decide whether and how to use this list. + */ + def systemFunctionKindsFromPath( + path: Seq[Seq[String]]): Seq[SessionCatalog.SessionFunctionKind] = + path.flatMap { e => + if (isSystemBuiltinPathEntry(e)) Some(SessionCatalog.Builtin) + else if (isSystemSessionPathEntry(e)) Some(SessionCatalog.Temp) + else None + } /** * A single entry in the session SQL path: either a literal schema diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/PathElement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/PathElement.scala new file mode 100644 index 0000000000000..ee9959762da9e --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/PathElement.scala @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.catalog + +import java.util.Locale + +import scala.collection.mutable + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.connector.catalog.CatalogManager.{ + CurrentSchemaEntry, LiteralPathEntry, SessionPathEntry +} +import org.apache.spark.sql.internal.SQLConf + +/** + * One element on the right-hand side of `SET PATH = ...`: either a well-known shortcut + * keyword (DEFAULT_PATH, SYSTEM_PATH, PATH, CURRENT_SCHEMA / CURRENT_DATABASE) or a + * fully qualified schema reference (`catalog.namespace...` with at least 2 parts). + * + * The same grammar is reused to parse the + * [[org.apache.spark.sql.internal.SQLConf#DEFAULT_PATH()]] conf value, so this + * AST node lives in catalyst beside [[CatalogManager]] rather than in the runtime + * [[org.apache.spark.sql.execution.command.SetPathCommand]]. + */ +private[sql] sealed trait PathElement + +private[sql] object PathElement { + case object DefaultPath extends PathElement + case object SystemPath extends PathElement + case object PathRef extends PathElement + + /** + * Current database/schema (SQL aliases). Stored as the + * [[org.apache.spark.sql.connector.catalog.CatalogManager.CurrentSchemaEntry$]] + * marker so resolution candidates expand against the live `USE SCHEMA`. + */ + case object CurrentSchema extends PathElement + + /** Fully qualified schema reference (`catalog.namespace...`). Must have at least 2 parts. */ + case class SchemaInPath(parts: Seq[String]) extends PathElement + + /** + * Expand a parsed [[PathElement]] list into concrete [[SessionPathEntry]] entries + * suitable for storing in [[CatalogManager._sessionPath]] or returning from + * [[CatalogManager#sessionPathEntries]]. + * + * @param isConfDefaultExpansion when true, an inner [[DefaultPath]] token resolves + * to the spark-builtin default ordering (cycle break) + * rather than reading + * [[org.apache.spark.sql.internal.SQLConf#DEFAULT_PATH()]] again. + * Set to true when this method is invoked while + * parsing [[org.apache.spark.sql.internal.SQLConf#DEFAULT_PATH()]] + * itself. + */ + def expand( + elements: Seq[PathElement], + conf: SQLConf, + catalogManager: CatalogManager, + isConfDefaultExpansion: Boolean = false): Seq[SessionPathEntry] = { + val currentSchemaSentinel = Seq("__current_schema__") + + def toEntries(parts: Seq[Seq[String]]): Seq[SessionPathEntry] = parts.map { + case p if p == currentSchemaSentinel => CurrentSchemaEntry + case p => LiteralPathEntry(p) + } + + def builtinDefaultWithCurrentSchema: Seq[SessionPathEntry] = + toEntries(conf.defaultPathOrder(Seq(currentSchemaSentinel))) + + def defaultPathExpansion: Seq[SessionPathEntry] = { + if (isConfDefaultExpansion) { + // Cycle break: inner DEFAULT_PATH inside the conf default value falls back to the + // spark-builtin default ordering instead of recursing. + builtinDefaultWithCurrentSchema + } else { + catalogManager.confDefaultPathEntries.getOrElse(builtinDefaultWithCurrentSchema) + } + } + + elements.flatMap { + case DefaultPath => + defaultPathExpansion + case SystemPath => + toEntries(conf.systemPathOrder) + case CurrentSchema => + Seq(CurrentSchemaEntry) + case PathRef => + catalogManager.storedSessionPathEntries.getOrElse(defaultPathExpansion) + case SchemaInPath(parts) => + Seq(LiteralPathEntry(parts)) + } + } + + /** + * Reject *static* duplicates in a SET PATH entry list: identical + * [[CatalogManager#LiteralPathEntry]] parts and repeated + * [[org.apache.spark.sql.connector.catalog.CatalogManager.CurrentSchemaEntry$]] markers + * (the `current_schema` / `current_database` + * cross-alias case). Used for the interactive `SET PATH` form to surface user typos at + * statement time. + * + * Deliberately does NOT compare a [[CatalogManager#LiteralPathEntry]] against a + * [[org.apache.spark.sql.connector.catalog.CatalogManager.CurrentSchemaEntry$]]: such a + * "duplicate" depends on the live `USE SCHEMA` + * and is harmless at lookup (first-match resolution skips the dead literal). + * [[org.apache.spark.sql.internal.SQLConf#DEFAULT_PATH()]] expansion skips this check + * entirely so transient `USE`-induced + * collisions don't wedge unqualified resolution. + */ + def validateNoStaticDuplicates( + entries: Seq[SessionPathEntry], + caseSensitive: Boolean): Seq[SessionPathEntry] = { + val seenLiterals = new mutable.HashSet[Seq[String]] + var seenCurrentSchema = false + entries.foreach { + case CurrentSchemaEntry => + if (seenCurrentSchema) { + throw new AnalysisException( + errorClass = "DUPLICATE_SQL_PATH_ENTRY", + messageParameters = Map("pathEntry" -> "current_schema")) + } + seenCurrentSchema = true + case LiteralPathEntry(parts) => + val key = if (caseSensitive) parts else parts.map(_.toLowerCase(Locale.ROOT)) + if (!seenLiterals.add(key)) { + throw new AnalysisException( + errorClass = "DUPLICATE_SQL_PATH_ENTRY", + messageParameters = Map( + "pathEntry" -> + parts.map(p => if (p.contains(".")) s"`$p`" else p).mkString("."))) + } + } + entries + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 5cfdbc66e3f42..9b899867a9e37 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -909,23 +909,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat messageParameters = Map("dt" -> dt.toString)) } - def unresolvedVariableError(name: Seq[String], searchPath: Seq[String]): Throwable = { - new AnalysisException( - errorClass = "UNRESOLVED_VARIABLE", - messageParameters = Map( - "variableName" -> toSQLId(name), - "searchPath" -> toSQLId(searchPath))) - } - def unresolvedVariableError( name: Seq[String], - searchPath: Seq[String], + pathEntries: Seq[Seq[String]], origin: Origin): Throwable = { new AnalysisException( errorClass = "UNRESOLVED_VARIABLE", messageParameters = Map( "variableName" -> toSQLId(name), - "searchPath" -> toSQLId(searchPath)), + "searchPath" -> pathEntries.map(toSQLId).mkString("[", ", ", "]")), origin = origin) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 03d6fe96c33bf..77ef8bb600f9c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -43,9 +43,11 @@ import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.catalyst.analysis.{HintErrorLogger, Resolver} import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.catalyst.plans.logical.HintErrorHandler import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME +import org.apache.spark.sql.connector.catalog.PathElement.PathRef import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.types.{AtomicType, TimestampNTZType, TimestampType} import org.apache.spark.storage.{StorageLevel, StorageLevelMapper} @@ -155,6 +157,13 @@ object SQLConf { override def initialValue: SQLConf = null } + /** + * Returns the [[SQLConf]] installed by an outer [[withExistingConf]] scope, or [[None]] if + * there is no such scope. Unlike [[get]], this peeks directly at the threadlocal so callers + * can distinguish "no outer scope" from "outer scope happens to install the same conf". + */ + def getExistingConfIfSet: Option[SQLConf] = Option(existingConf.get()) + def withExistingConf[T](conf: SQLConf)(f: => T): T = { val old = existingConf.get() existingConf.set(conf) @@ -2443,6 +2452,29 @@ object SQLConf { .booleanConf .createWithDefault(false) + val DEFAULT_PATH = + buildConf("spark.sql.defaultPath") + .version("4.2.0") + .doc("Default SQL PATH used when no SET PATH has been issued in the session; this is " + + "also the value to which `SET PATH = DEFAULT_PATH` expands. Accepts the full SET PATH " + + "grammar; an inner DEFAULT_PATH token resolves to the spark-builtin default ordering. " + + "The PATH keyword is not allowed in this conf value. " + + "When empty, the spark-builtin default ordering controlled by " + + "`spark.sql.functionResolution.sessionOrder` applies. Validated for syntax at set time; " + + "redundant entries are tolerated (lookup uses first-match resolution). The interactive " + + "SET PATH form still rejects static duplicates as a typo guard.") + .withBindingPolicy(ConfigBindingPolicy.SESSION) + .stringConf + .checkValue( + v => + v == null || v.trim.isEmpty || + Try(CatalystSqlParser.parsePathElements(v.trim)) + .toOption + .exists(!_.contains(PathRef)), + "The value must be empty or a comma-separated SET PATH element list " + + "(same grammar as SET PATH, except PATH is not allowed).") + .createWithDefault("") + // Whether to retain group by columns or not in GroupedData.agg. val DATAFRAME_RETAIN_GROUP_COLUMNS = buildConf("spark.sql.retainGroupColumns") .version("1.4.0") @@ -8525,6 +8557,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf { def pathEnabled: Boolean = getConf(SQLConf.PATH_ENABLED) + def defaultPath: String = getConf(SQLConf.DEFAULT_PATH) + /** * Returns the resolution search path for error messages and resolution order. * This is the single source of truth for the search path used for functions, tables, and views. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala index 63d5523be072d..75846aa49616c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala @@ -83,6 +83,9 @@ class TableLookupCacheSuite extends AnalysisTest with Matchers { .thenReturn(defaultPath) when(catalogManager.sqlResolutionPathEntries(any[String], any[Seq[String]])) .thenReturn(defaultPath) + when(catalogManager.resolutionPathEntriesForAnalysis( + any[Option[Seq[Seq[String]]]], any[Seq[String]])) + .thenReturn(defaultPath) new Analyzer(catalogManager) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SqlPathFormatSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SqlPathFormatSuite.scala new file mode 100644 index 0000000000000..0ed3bcfb19639 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SqlPathFormatSuite.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.catalog + +import org.json4s.JsonAST.{JArray, JObject, JString} +import org.json4s.jackson.JsonMethods.{compact, render} + +import org.apache.spark.SparkFunSuite + +/** + * Unit tests for [[SqlPathFormat]] -- the helper that converts the raw JSON-array-of-arrays + * path stored on view / SQL function metadata into the JSON-object form used by DESCRIBE + * AS JSON and the human-readable form used by DESCRIBE EXTENDED. + */ +class SqlPathFormatSuite extends SparkFunSuite { + + private def compactJson(v: JArray): String = compact(render(v)) + + test("toDescribeJson: maps each [catalog, ns...] entry to a JSON object") { + val stored = + """[["spark_catalog","default"],["system","builtin"]]""" + val result = SqlPathFormat.toDescribeJson(stored) + .getOrElse(fail(s"Expected a JSON value, got None for: $stored")) + val expected = JArray(List( + JObject("catalog_name" -> JString("spark_catalog"), + "namespace" -> JArray(List(JString("default")))), + JObject("catalog_name" -> JString("system"), + "namespace" -> JArray(List(JString("builtin")))))) + assert(compactJson(result.asInstanceOf[JArray]) == compactJson(expected)) + } + + test("toDescribeJson: multi-level namespace becomes [head, tail...]") { + val stored = """[["cat1","db","sub"]]""" + val result = SqlPathFormat.toDescribeJson(stored) + .getOrElse(fail("Expected a JSON value")) + val expected = JArray(List( + JObject("catalog_name" -> JString("cat1"), + "namespace" -> JArray(List(JString("db"), JString("sub")))))) + assert(compactJson(result.asInstanceOf[JArray]) == compactJson(expected)) + } + + test("toDescribeJson: empty array returns None") { + assert(SqlPathFormat.toDescribeJson("[]").isEmpty) + } + + test("toDescribeJson: malformed payloads return None") { + Seq( + "", + "not_json", + "{}", + """{"foo":1}""", + """[1, 2, 3]""" + ).foreach { payload => + assert(SqlPathFormat.toDescribeJson(payload).isEmpty, s"payload=$payload") + } + } + + test("formatForDisplay: renders plain identifiers without backticks") { + val json = SqlPathFormat.toDescribeJson( + """[["spark_catalog","default"],["system","builtin"]]""") + .getOrElse(fail("Expected a JSON value")) + val rendered = SqlPathFormat.formatForDisplay(json) + .getOrElse(fail("Expected a display string")) + assert(rendered == "spark_catalog.default, system.builtin") + } + + test("formatForDisplay: backticks identifiers that need quoting") { + val json = SqlPathFormat.toDescribeJson( + """[["spark_catalog","weird.schema"]]""") + .getOrElse(fail("Expected a JSON value")) + val rendered = SqlPathFormat.formatForDisplay(json) + .getOrElse(fail("Expected a display string")) + assert(rendered == "spark_catalog.`weird.schema`") + } + + test("formatForDisplay: round-trips multi-level namespaces") { + val json = SqlPathFormat.toDescribeJson("""[["cat","db","ns"]]""") + .getOrElse(fail("Expected a JSON value")) + val rendered = SqlPathFormat.formatForDisplay(json) + .getOrElse(fail("Expected a display string")) + assert(rendered == "cat.db.ns") + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala index acf86aae1eea3..64b2ac91fbd61 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala @@ -22,9 +22,11 @@ import java.net.URI import scala.jdk.CollectionConverters._ import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2SessionCatalog, NoSuchNamespaceException} import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog => V1InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.connector.catalog.CatalogManager.{CurrentSchemaEntry, LiteralPathEntry} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -150,6 +152,115 @@ class CatalogManagerSuite extends SparkFunSuite with SQLHelper { assert(CatalogManager.deserializePathEntries(payload).isEmpty, s"payload=$payload") } } + + test("serializePathEntries round-trips through deserialize for typical inputs") { + val cases = Seq( + Seq(Seq("spark_catalog", "default"), Seq("system", "builtin")), + Seq(Seq("system", "session")), + Seq.empty[Seq[String]]) + cases.foreach { entries => + val payload = CatalogManager.serializePathEntries(entries) + val parsed = CatalogManager.deserializePathEntries(payload) + .getOrElse(fail(s"Expected payload to round-trip: $payload")) + assert(parsed === entries, s"Round-trip mismatch for $entries; got $parsed") + } + } + + test("serializePathEntries round-trips multi-level and quoted identifiers") { + val entries = Seq( + Seq("cat", "ns1", "ns2"), + Seq("spark_catalog", "sch.with.dots"), + Seq("spark_catalog", "schema with spaces")) + val payload = CatalogManager.serializePathEntries(entries) + val parsed = CatalogManager.deserializePathEntries(payload) + .getOrElse(fail(s"Expected payload to round-trip: $payload")) + assert(parsed === entries) + } + + test("deserializePathEntriesOrFail raises a clear AnalysisException for bad payloads") { + val e = intercept[AnalysisException] { + CatalogManager.deserializePathEntriesOrFail( + storedPathStr = "{bad-json", + objectType = "view", + objectName = "default.v_broken") + } + assert(e.getMessage.contains("Invalid stored SQL path metadata for view")) + assert(e.getMessage.contains("default.v_broken")) + } + + // --------------------------------------------------------------------------- + // Direct unit tests for [[PathElement.validateNoStaticDuplicates]]. The end-to-end + // `SetPathSuite` exercises this via SQL, but the duplicate-detection rules + // (literal-vs-literal, current_schema-vs-current_schema, case-sensitivity) are pure + // data and benefit from focused tests close to the implementation. + // --------------------------------------------------------------------------- + + private def literalEntry(parts: String*): LiteralPathEntry = LiteralPathEntry(parts.toSeq) + + test("validateNoStaticDuplicates: no duplicates returns the input unchanged") { + val entries = Seq( + literalEntry("spark_catalog", "default"), + literalEntry("system", "builtin"), + CurrentSchemaEntry) + assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries) + } + + test("validateNoStaticDuplicates: duplicate literal under case-insensitive collation") { + val entries = Seq( + literalEntry("spark_catalog", "default"), + literalEntry("Spark_Catalog", "DEFAULT")) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY") + assert(e.getMessageParameters.get("pathEntry") == "Spark_Catalog.DEFAULT") + } + + test("validateNoStaticDuplicates: case-sensitive mode keeps differently cased entries") { + val entries = Seq( + literalEntry("spark_catalog", "DEFAULT"), + literalEntry("spark_catalog", "default")) + assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = true) === entries) + } + + test("validateNoStaticDuplicates: repeated CurrentSchemaEntry is rejected") { + val entries = Seq(CurrentSchemaEntry, CurrentSchemaEntry) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY") + assert(e.getMessageParameters.get("pathEntry") == "current_schema") + } + + test("validateNoStaticDuplicates: literal-vs-CurrentSchemaEntry collision is tolerated") { + // The CurrentSchemaEntry marker resolves dynamically against USE SCHEMA, so a literal + // that happens to match the live current schema is intentionally not flagged here. + val entries = Seq( + literalEntry("spark_catalog", "default"), + CurrentSchemaEntry, + literalEntry("system", "builtin")) + assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries) + } + + test("validateNoStaticDuplicates: identifier containing a dot is quoted in the error") { + val entries = Seq( + literalEntry("spark_catalog", "weird.schema"), + literalEntry("spark_catalog", "weird.schema")) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getMessageParameters.get("pathEntry") == "spark_catalog.`weird.schema`") + } + + test("validateNoStaticDuplicates: multi-level namespace duplicate is flagged") { + val entries = Seq( + literalEntry("cat", "db", "ns"), + literalEntry("cat", "db", "ns")) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getMessageParameters.get("pathEntry") == "cat.db.ns") + } } class DummyCatalog extends CatalogPlugin { diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SqlPathE2ETestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SqlPathE2ETestSuite.scala new file mode 100644 index 0000000000000..88ed1f31c86ae --- /dev/null +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SqlPathE2ETestSuite.scala @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connect + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.connect.test.{ConnectFunSuite, RemoteSparkSession, SQLHelper} +import org.apache.spark.sql.functions.current_path + +/** + * End-to-end coverage for the SQL Standard PATH feature over Spark Connect. + * + * SET PATH and the frozen-path semantics for persisted views / SQL functions are implemented + * entirely server-side, but the analyzer state (`AnalysisContext`) that carries the pinned path + * must survive plan reification across the gRPC boundary. These tests run the public surface over + * a real Connect client so regressions there are caught: + * - `SET PATH = ...` is parsed and applied to the session, + * - `current_path()` (SQL and the DataFrame builtin) reflects it, + * - a persisted view created under one path resolves its body under the frozen path even when + * the invoker switches the session path. + */ +class SqlPathE2ETestSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelper { + + test("SET PATH and current_path() round-trip over Connect") { + withSQLConf("spark.sql.path.enabled" -> "true") { + try { + spark.sql("SET PATH = spark_catalog.default, system.builtin") + val sqlPath = spark.sql("SELECT current_path()").head().getString(0) + assert( + sqlPath == "spark_catalog.default,system.builtin", + s"current_path() over Connect should reflect SET PATH; got: $sqlPath") + + // DataFrame builtin should agree with the SQL form. + val apiPath = spark.range(1).select(current_path()).head().getString(0) + assert( + apiPath == sqlPath, + s"functions.current_path() should match SQL current_path(); got: $apiPath vs $sqlPath") + } finally { + spark.sql("SET PATH = DEFAULT_PATH") + } + } + } + + test("Persisted view body uses frozen path over Connect") { + withSQLConf("spark.sql.path.enabled" -> "true") { + withDatabase("connect_path_a", "connect_path_b") { + spark.sql("CREATE DATABASE connect_path_a") + spark.sql("CREATE DATABASE connect_path_b") + spark.sql("CREATE TABLE connect_path_a.frozen_t USING parquet AS SELECT 1 AS id") + spark.sql("CREATE TABLE connect_path_b.frozen_t USING parquet AS SELECT 2 AS id") + withView("default.v_path_connect") { + try { + // Create the view under PATH=a. + spark.sql("SET PATH = spark_catalog.connect_path_a, system.builtin") + spark.sql("CREATE VIEW default.v_path_connect AS SELECT id FROM frozen_t") + + // Switch the session path to b; bare `frozen_t` now resolves through b, + // but the view's frozen path keeps it pinned to a. + spark.sql("SET PATH = spark_catalog.connect_path_b, system.builtin") + val bare = spark.sql("SELECT id FROM frozen_t").head().getInt(0) + assert(bare == 2, s"Bare `frozen_t` should follow live PATH=b; got: $bare") + val viaView = spark.sql("SELECT id FROM default.v_path_connect").head().getInt(0) + assert( + viaView == 1, + s"View body should resolve via the frozen creation-time PATH; got: $viaView") + } finally { + spark.sql("SET PATH = DEFAULT_PATH") + } + } + } + } + } + + test("SET PATH is rejected over Connect when feature is disabled") { + withSQLConf("spark.sql.path.enabled" -> "false") { + val ex = intercept[AnalysisException] { + spark.sql("SET PATH = spark_catalog.default") + } + assert( + ex.getCondition == "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED", + s"Expected SET_PATH_WHEN_DISABLED, got: ${ex.getCondition}") + } + } +} diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala index 8f20b277ddeb8..8cfa219d370c9 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala @@ -132,17 +132,16 @@ class ProtoToParsedPlanTestSuite extends SharedSparkSession with ResourceHelper /** * Isolated from [[SharedSparkSession]] so PATH / session path settings do not affect catalog. + * Cloned from the test session's conf so all sparkConf overrides (ANSI, alias config, etc.) are + * preserved automatically; only the genuine isolation knob is overridden explicitly. */ - private val analyzerIsolationConf: SQLConf = { - val c = new SQLConf() + private lazy val analyzerIsolationConf: SQLConf = { + val c = spark.sessionState.conf.clone() c.setConf(SQLConf.PATH_ENABLED, false) - // Match [[sparkConf]]: a bare SQLConf defaults ANSI_ENABLED to true, which changes - // function signatures in analyzed plans (e.g. make_date) vs golden files. - c.setConf(SQLConf.ANSI_ENABLED, false) c } - private val analyzer = { + private lazy val analyzer = { val inMemoryCatalog = new InMemoryChangelogCatalog // Name must match [[CatalogManager.SESSION_CATALOG_NAME]]: path entries use // [[currentCatalog.name()]], then resolution calls [[catalogManager.catalog]] on that segment. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 8f4c77840f0cc..b4ece7329094e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -359,15 +359,7 @@ class SparkSqlAstBuilder extends AstBuilder { * }}} */ override def visitSetPath(ctx: SetPathContext): LogicalPlan = withOrigin(ctx) { - val elements = ctx.pathElement().asScala.map { pe => - if (pe.DEFAULT_PATH() != null) PathElement.DefaultPath - else if (pe.SYSTEM_PATH() != null) PathElement.SystemPath - else if (pe.PATH() != null) PathElement.PathRef - else if (pe.CURRENT_DATABASE() != null) PathElement.CurrentDatabase - else if (pe.CURRENT_SCHEMA() != null) PathElement.CurrentSchema - else PathElement.SchemaInPath(visitMultipartIdentifier(pe.multipartIdentifier())) - }.toSeq - SetPathCommand(elements) + SetPathCommand(ctx.pathElement().asScala.map(visitPathElement).toSeq) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetPathCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetPathCommand.scala index 70538160eefdb..82ab46ec9b140 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetPathCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetPathCommand.scala @@ -17,40 +17,17 @@ package org.apache.spark.sql.execution.command -import java.util.Locale - import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.connector.catalog.CatalogManager -import org.apache.spark.sql.connector.catalog.CatalogManager.{ - CurrentSchemaEntry, LiteralPathEntry, SessionPathEntry -} -import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.connector.catalog.PathElement import org.apache.spark.sql.internal.SQLConf -/** - * Path element for SET PATH: either a well-known shortcut or a fully qualified schema reference. - * SchemaInPath requires at least 2 parts (catalog.namespace); multi-level namespaces are allowed. - */ -sealed trait PathElement - -object PathElement { - case object DefaultPath extends PathElement - case object SystemPath extends PathElement - case object PathRef extends PathElement - /** - * Current database/schema (SQL aliases). Stored as system.current_schema; expands when - * building resolution candidates so later USE SCHEMA is reflected. - */ - case object CurrentDatabase extends PathElement - case object CurrentSchema extends PathElement - /** Fully qualified schema reference (catalog.namespace...). Must have at least 2 parts. */ - case class SchemaInPath(parts: Seq[String]) extends PathElement -} - /** * Command for SET PATH = pathElement (, pathElement)* * Expands shortcuts at run time, validates no duplicates, and sets the internal session path. + * + * The [[PathElement]] AST and its expansion live in catalyst so that the same grammar can be + * reused to parse the [[SQLConf.DEFAULT_PATH]] conf value. */ case class SetPathCommand(elements: Seq[PathElement]) extends LeafRunnableCommand { @@ -64,23 +41,9 @@ case class SetPathCommand(elements: Seq[PathElement]) extends LeafRunnableComman } val conf = sparkSession.sessionState.conf val catalogManager = sparkSession.sessionState.catalogManager - val currentCatalog = catalogManager.currentCatalog.name - val currentNamespace = catalogManager.currentNamespace.toSeq - val caseSensitive = conf.caseSensitiveAnalysis - val expanded = expandPathElements(elements, conf, catalogManager) - val seen = new scala.collection.mutable.HashSet[Seq[String]] - expanded.foreach { entry => - val concrete = entry.resolve(currentCatalog, currentNamespace) - def normalize(s: String): String = if (caseSensitive) s else s.toLowerCase(Locale.ROOT) - val key = concrete.map(normalize) - if (!seen.add(key)) { - throw new AnalysisException( - errorClass = "DUPLICATE_SQL_PATH_ENTRY", - messageParameters = Map("pathEntry" -> - concrete.map(p => if (p.contains(".")) s"`$p`" else p).mkString("."))) - } - } + val expanded0 = PathElement.expand(elements, conf, catalogManager) + val expanded = PathElement.validateNoStaticDuplicates(expanded0, conf.caseSensitiveAnalysis) if (expanded.isEmpty) { catalogManager.clearSessionPath() @@ -89,36 +52,4 @@ case class SetPathCommand(elements: Seq[PathElement]) extends LeafRunnableComman } Seq.empty } - - private def expandPathElements( - elements: Seq[PathElement], - conf: SQLConf, - catalogManager: CatalogManager): Seq[SessionPathEntry] = { - val currentSchemaSentinel = Seq("__current_schema__") - - def toEntries(parts: Seq[Seq[String]]): Seq[SessionPathEntry] = parts.map { - case p if p == currentSchemaSentinel => CurrentSchemaEntry - case p => LiteralPathEntry(p) - } - - def defaultWithCurrentSchema: Seq[SessionPathEntry] = - toEntries(conf.defaultPathOrder(Seq(currentSchemaSentinel))) - - elements.flatMap { - case PathElement.DefaultPath => - defaultWithCurrentSchema - case PathElement.SystemPath => - toEntries(conf.systemPathOrder) - case PathElement.CurrentDatabase | PathElement.CurrentSchema => - Seq(CurrentSchemaEntry) - case PathElement.PathRef => - catalogManager.sessionPathEntries.getOrElse(defaultWithCurrentSchema) - case PathElement.SchemaInPath(parts) => - if (parts.length < 2) { - throw QueryCompilationErrors.invalidSqlPathSchemaReferenceError(parts.mkString(".")) - } - Seq(LiteralPathEntry(parts)) - } - } - } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala index 9839a3edbbab5..79db97744496e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala @@ -254,9 +254,13 @@ case class DropFunctionCommand( identifier.funcName } - // Check if temp function exists first - if it does, allow dropping it even if a builtin - // with the same name exists (shadowing case) - if (!catalog.isTemporaryFunction(FunctionIdentifier(funcName)) && + // Keep DROP TEMPORARY FUNCTION semantics consistent for unqualified names: + // - builtin name, no temp present, no IF EXISTS => FORBIDDEN_OPERATION + // - IF EXISTS => no-op + // Qualified temp namespaces (session / system.session) always target temp functions. + if (identifier.database.isEmpty && + !ifExists && + !catalog.isTemporaryFunction(FunctionIdentifier(funcName)) && catalog.isBuiltinFunction(funcName)) { throw QueryCompilationErrors.cannotDropBuiltinFuncError(funcName) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/FetchCursorExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/FetchCursorExec.scala index ad867e6537671..dc28d4a7e7f14 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/FetchCursorExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/FetchCursorExec.scala @@ -194,7 +194,8 @@ case class FetchCursorExec( case FakeLocalCatalog => scriptingVariableManager.get case FakeSystemCatalog if tempVariableManager.get(namePartsCaseAdjusted).isEmpty => - throw unresolvedVariableError(namePartsCaseAdjusted, Seq("SYSTEM", "SESSION")) + throw unresolvedVariableError( + namePartsCaseAdjusted, Seq(Seq("SYSTEM", "SESSION")), varRef.origin) case FakeSystemCatalog => tempVariableManager @@ -207,7 +208,7 @@ case class FetchCursorExec( Literal(value, varRef.dataType) ) - variableManager.set(namePartsCaseAdjusted, varDef) + variableManager.set(namePartsCaseAdjusted, varDef, varRef.origin) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/SetVariableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/SetVariableExec.scala index ef8e238832b35..9861bd77616ac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/SetVariableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/SetVariableExec.scala @@ -80,7 +80,8 @@ case class SetVariableExec(variables: Seq[VariableReference], query: SparkPlan) case FakeLocalCatalog => scriptingVariableManager.get case FakeSystemCatalog if tempVariableManager.get(namePartsCaseAdjusted).isEmpty => - throw unresolvedVariableError(namePartsCaseAdjusted, Seq("SYSTEM", "SESSION")) + throw unresolvedVariableError( + namePartsCaseAdjusted, Seq(Seq("SYSTEM", "SESSION")), variable.origin) case FakeSystemCatalog => tempVariableManager @@ -90,7 +91,7 @@ case class SetVariableExec(variables: Seq[VariableReference], query: SparkPlan) val varDef = VariableDefinition( variable.identifier, variable.varDef.defaultValueSQL, Literal(value, variable.dataType)) - variableManager.set(namePartsCaseAdjusted, varDef) + variableManager.set(namePartsCaseAdjusted, varDef, variable.origin) } override def output: Seq[Attribute] = Seq.empty diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/VariableAssignmentUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/VariableAssignmentUtils.scala index 3a4d55169d900..d99ddae538a67 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/VariableAssignmentUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/VariableAssignmentUtils.scala @@ -68,7 +68,8 @@ object VariableAssignmentUtils { case FakeLocalCatalog => scriptingVariableManager.get case FakeSystemCatalog if tempVariableManager.get(namePartsCaseAdjusted).isEmpty => - throw unresolvedVariableError(namePartsCaseAdjusted, Seq("SYSTEM", "SESSION")) + throw unresolvedVariableError( + namePartsCaseAdjusted, Seq(Seq("SYSTEM", "SESSION")), varRef.origin) case FakeSystemCatalog => tempVariableManager @@ -81,6 +82,6 @@ object VariableAssignmentUtils { Literal(value, varRef.dataType) ) - variableManager.set(namePartsCaseAdjusted, varDef) + variableManager.set(namePartsCaseAdjusted, varDef, varRef.origin) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingLocalVariableManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingLocalVariableManager.scala index 0ad1974b7d769..c8a893f374f39 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingLocalVariableManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingLocalVariableManager.scala @@ -21,6 +21,7 @@ import org.apache.spark.SparkException import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{FakeLocalCatalog, ResolvedIdentifier} import org.apache.spark.sql.catalyst.catalog.{VariableDefinition, VariableManager} +import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.connector.catalog.Identifier import org.apache.spark.sql.errors.DataTypeErrorsBase import org.apache.spark.sql.errors.QueryCompilationErrors.unresolvedVariableError @@ -47,13 +48,18 @@ class SqlScriptingLocalVariableManager(context: SqlScriptingExecutionContext) context.currentScope.variables.put(name, varDef) } - override def set(nameParts: Seq[String], varDef: VariableDefinition): Unit = { + override def set( + nameParts: Seq[String], + varDef: VariableDefinition, + origin: Origin): Unit = { val scope = findScopeOfVariable(nameParts) .getOrElse( - throw unresolvedVariableError(nameParts, varDef.identifier.namespace().toIndexedSeq)) + throw unresolvedVariableError( + nameParts, Seq(varDef.identifier.namespace().toIndexedSeq), origin)) if (!scope.variables.contains(nameParts.last)) { - throw unresolvedVariableError(nameParts, varDef.identifier.namespace().toIndexedSeq) + throw unresolvedVariableError( + nameParts, Seq(varDef.identifier.namespace().toIndexedSeq), origin) } scope.variables.put(nameParts.last, varDef) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out new file mode 100644 index 0000000000000..3a494d1cd3b74 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out @@ -0,0 +1,1041 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = Spark_Catalog.Default, System.Builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(Spark_Catalog, Default)), SchemaInPath(List(System, Builtin))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = spark_catalog.`sch.b`, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sch.b)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = SYSTEM_PATH +-- !query analysis +SetPathCommand [SystemPath] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SET PATH = PATH, system.session +-- !query analysis +SetPathCommand [PathRef, SchemaInPath(List(system, session))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +USE spark_catalog.default +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default] + + +-- !query +SET PATH = current_schema, system.builtin +-- !query analysis +SetPathCommand [CurrentSchema, SchemaInPath(List(system, builtin))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = current_database, system.builtin +-- !query analysis +SetPathCommand [CurrentSchema, SchemaInPath(List(system, builtin))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT CURRENT_PATH = current_path() AS ansi_form_matches +-- !query analysis +Project [(current_path() = current_path()) AS ansi_form_matches#x] ++- OneRowRelation + + +-- !query +SELECT current_path(1) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "1", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "0", + "functionName" : "`current_path`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 22, + "fragment" : "current_path(1)" + } ] +} + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SET PATH = spark_catalog.default, spark_catalog.default +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "spark_catalog.default" + } +} + + +-- !query +SET PATH = spark_catalog.DEFAULT, spark_catalog.default +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "spark_catalog.default" + } +} + + +-- !query +SET PATH = DEFAULT_PATH, system.builtin +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "system.builtin" + } +} + + +-- !query +SET PATH = SYSTEM_PATH, SYSTEM_PATH +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "system.builtin" + } +} + + +-- !query +SET PATH = current_database, current_schema +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "current_schema" + } +} + + +-- !query +SET PATH = my_schema_no_catalog +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_PATH_SCHEMA_REFERENCE", + "sqlState" : "42601", + "messageParameters" : { + "qualifiedName" : "my_schema_no_catalog" + } +} + + +-- !query +CREATE SCHEMA sql_path_routines +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines] + + +-- !query +CREATE FUNCTION sql_path_routines.pick() RETURNS INT RETURN 7 +-- !query analysis +CreateSQLFunctionCommand spark_catalog.sql_path_routines.pick, INT, 7, false, false, false, false + + +-- !query +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT pick() +-- !query analysis +Project [spark_catalog.sql_path_routines.pick() AS spark_catalog.sql_path_routines.pick()#x] ++- Project + +- OneRowRelation + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +CREATE FUNCTION sql_path_routines.pick_tvf() +RETURNS TABLE(val INT) +RETURN SELECT 7 AS val +-- !query analysis +CreateSQLFunctionCommand spark_catalog.sql_path_routines.pick_tvf, val INT, SELECT 7 AS val, true, false, false, false + + +-- !query +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT * FROM pick_tvf() +-- !query analysis +Project [val#x] ++- SQLFunctionNode spark_catalog.sql_path_routines.pick_tvf + +- SubqueryAlias pick_tvf + +- Project [cast(val#x as int) AS val#x] + +- Project [7 AS val#x] + +- OneRowRelation + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +CREATE SCHEMA sql_path_routines_b +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines_b] + + +-- !query +CREATE FUNCTION sql_path_routines_b.pick() RETURNS INT RETURN 11 +-- !query analysis +CreateSQLFunctionCommand spark_catalog.sql_path_routines_b.pick, INT, 11, false, false, false, false + + +-- !query +SET PATH = spark_catalog.sql_path_routines, spark_catalog.sql_path_routines_b, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(spark_catalog, sql_path_routines_b)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT pick() AS from_first_schema +-- !query analysis +Project [spark_catalog.sql_path_routines.pick() AS from_first_schema#x] ++- Project + +- OneRowRelation + + +-- !query +SET PATH = spark_catalog.sql_path_routines_b, spark_catalog.sql_path_routines, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines_b)), SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT pick() AS from_first_schema +-- !query analysis +Project [spark_catalog.sql_path_routines_b.pick() AS from_first_schema#x] ++- Project + +- OneRowRelation + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT pick() +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`pick`", + "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 13, + "fragment" : "pick()" + } ] +} + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +DROP FUNCTION sql_path_routines.pick +-- !query analysis +DropFunctionCommand spark_catalog.sql_path_routines.pick, false, false + + +-- !query +DROP FUNCTION sql_path_routines.pick_tvf +-- !query analysis +DropFunctionCommand spark_catalog.sql_path_routines.pick_tvf, false, false + + +-- !query +DROP FUNCTION sql_path_routines_b.pick +-- !query analysis +DropFunctionCommand spark_catalog.sql_path_routines_b.pick, false, false + + +-- !query +DROP SCHEMA sql_path_routines +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines] + + +-- !query +DROP SCHEMA sql_path_routines_b +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines_b] + + +-- !query +CREATE SCHEMA sql_path_relations_a +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_a] + + +-- !query +CREATE SCHEMA sql_path_relations_b +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_b] + + +-- !query +CREATE TABLE sql_path_relations_a.tbl USING parquet AS SELECT 1 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_relations_a`.`tbl`, ErrorIfExists, [id] + +- Project [1 AS id#x] + +- OneRowRelation + + +-- !query +CREATE TABLE sql_path_relations_b.tbl USING parquet AS SELECT 2 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_relations_b`.`tbl`, ErrorIfExists, [id] + +- Project [2 AS id#x] + +- OneRowRelation + + +-- !query +SET PATH = spark_catalog.sql_path_relations_a, spark_catalog.sql_path_relations_b, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_relations_a)), SchemaInPath(List(spark_catalog, sql_path_relations_b)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT id FROM tbl AS from_first_schema +-- !query analysis +Project [id#x] ++- SubqueryAlias from_first_schema + +- SubqueryAlias spark_catalog.sql_path_relations_a.tbl + +- Relation spark_catalog.sql_path_relations_a.tbl[id#x] parquet + + +-- !query +SET PATH = spark_catalog.sql_path_relations_b, spark_catalog.sql_path_relations_a, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_relations_b)), SchemaInPath(List(spark_catalog, sql_path_relations_a)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT id FROM tbl AS from_first_schema +-- !query analysis +Project [id#x] ++- SubqueryAlias from_first_schema + +- SubqueryAlias spark_catalog.sql_path_relations_b.tbl + +- Relation spark_catalog.sql_path_relations_b.tbl[id#x] parquet + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT id FROM tbl +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tbl`", + "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 18, + "fragment" : "tbl" + } ] +} + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +DROP TABLE sql_path_relations_a.tbl +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_relations_a.tbl + + +-- !query +DROP TABLE sql_path_relations_b.tbl +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_relations_b.tbl + + +-- !query +DROP SCHEMA sql_path_relations_a +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_a] + + +-- !query +DROP SCHEMA sql_path_relations_b +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_b] + + +-- !query +CREATE SCHEMA sql_path_views_a +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_a] + + +-- !query +CREATE SCHEMA sql_path_views_b +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_b] + + +-- !query +CREATE TABLE sql_path_views_a.frozen_t USING parquet AS SELECT 1 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_views_a`.`frozen_t`, ErrorIfExists, [id] + +- Project [1 AS id#x] + +- OneRowRelation + + +-- !query +CREATE TABLE sql_path_views_b.frozen_t USING parquet AS SELECT 2 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_views_b`.`frozen_t`, ErrorIfExists, [id] + +- Project [2 AS id#x] + +- OneRowRelation + + +-- !query +SET PATH = spark_catalog.sql_path_views_a, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_views_a)), SchemaInPath(List(system, builtin))] + + +-- !query +CREATE VIEW default.v_path_frozen AS SELECT id FROM frozen_t +-- !query analysis +CreateViewCommand `spark_catalog`.`default`.`v_path_frozen`, SELECT id FROM frozen_t, false, false, PersistedView, COMPENSATION, true + +- Project [id#x] + +- SubqueryAlias spark_catalog.sql_path_views_a.frozen_t + +- Relation spark_catalog.sql_path_views_a.frozen_t[id#x] parquet + + +-- !query +SET PATH = spark_catalog.sql_path_views_b, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_views_b)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT id FROM frozen_t AS bare_lookup_uses_live_path +-- !query analysis +Project [id#x] ++- SubqueryAlias bare_lookup_uses_live_path + +- SubqueryAlias spark_catalog.sql_path_views_b.frozen_t + +- Relation spark_catalog.sql_path_views_b.frozen_t[id#x] parquet + + +-- !query +SELECT id FROM default.v_path_frozen AS view_body_uses_frozen_path +-- !query analysis +Project [id#x] ++- SubqueryAlias view_body_uses_frozen_path + +- SubqueryAlias spark_catalog.default.v_path_frozen + +- View (`spark_catalog`.`default`.`v_path_frozen`, [id#x]) + +- Project [cast(id#x as int) AS id#x] + +- Project [id#x] + +- SubqueryAlias spark_catalog.sql_path_views_a.frozen_t + +- Relation spark_catalog.sql_path_views_a.frozen_t[id#x] parquet + + +-- !query +USE spark_catalog.sql_path_views_a +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_a] + + +-- !query +CREATE VIEW sql_path_views_a.v_ctx AS +SELECT current_schema() AS cs, current_path() AS cp +-- !query analysis +CreateViewCommand `spark_catalog`.`sql_path_views_a`.`v_ctx`, SELECT current_schema() AS cs, current_path() AS cp, false, false, PersistedView, COMPENSATION, true + +- Project [current_schema() AS cs#x, current_path() AS cp#x] + +- OneRowRelation + + +-- !query +USE spark_catalog.sql_path_views_b +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_b] + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SELECT cs, cp FROM sql_path_views_a.v_ctx +-- !query analysis +Project [cs#x, cp#x] ++- SubqueryAlias spark_catalog.sql_path_views_a.v_ctx + +- View (`spark_catalog`.`sql_path_views_a`.`v_ctx`, [cs#x, cp#x]) + +- Project [cast(cs#x as string) AS cs#x, cast(cp#x as string) AS cp#x] + +- Project [current_schema() AS cs#x, current_path() AS cp#x] + +- OneRowRelation + + +-- !query +USE spark_catalog.default +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default] + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +DROP VIEW default.v_path_frozen +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`v_path_frozen`, false, true, false + + +-- !query +DROP VIEW sql_path_views_a.v_ctx +-- !query analysis +DropTableCommand `spark_catalog`.`sql_path_views_a`.`v_ctx`, false, true, false + + +-- !query +DROP TABLE sql_path_views_a.frozen_t +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_views_a.frozen_t + + +-- !query +DROP TABLE sql_path_views_b.frozen_t +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_views_b.frozen_t + + +-- !query +DROP SCHEMA sql_path_views_a +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_a] + + +-- !query +DROP SCHEMA sql_path_views_b +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_b] + + +-- !query +CREATE SCHEMA sql_path_fn_a +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_a] + + +-- !query +CREATE SCHEMA sql_path_fn_b +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_b] + + +-- !query +CREATE TABLE sql_path_fn_a.frozen_t USING parquet AS SELECT 10 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_fn_a`.`frozen_t`, ErrorIfExists, [id] + +- Project [10 AS id#x] + +- OneRowRelation + + +-- !query +CREATE TABLE sql_path_fn_b.frozen_t USING parquet AS SELECT 20 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_fn_b`.`frozen_t`, ErrorIfExists, [id] + +- Project [20 AS id#x] + +- OneRowRelation + + +-- !query +SET PATH = spark_catalog.sql_path_fn_a, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_a)), SchemaInPath(List(system, builtin))] + + +-- !query +CREATE FUNCTION default.frozen_fn() +RETURNS INT +RETURN (SELECT MAX(id) FROM frozen_t) +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.frozen_fn, INT, (SELECT MAX(id) FROM frozen_t), false, false, false, false + + +-- !query +SET PATH = spark_catalog.sql_path_fn_b, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_b)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT MAX(id) FROM frozen_t AS bare_lookup_uses_live_path +-- !query analysis +Aggregate [max(id#x) AS max(id)#x] ++- SubqueryAlias bare_lookup_uses_live_path + +- SubqueryAlias spark_catalog.sql_path_fn_b.frozen_t + +- Relation spark_catalog.sql_path_fn_b.frozen_t[id#x] parquet + + +-- !query +SELECT default.frozen_fn() AS scalar_body_uses_frozen_path +-- !query analysis +Project [spark_catalog.default.frozen_fn() AS scalar_body_uses_frozen_path#x] +: +- Aggregate [max(id#x) AS max(id)#x] +: +- SubqueryAlias spark_catalog.sql_path_fn_a.frozen_t +: +- Relation spark_catalog.sql_path_fn_a.frozen_t[id#x] parquet ++- Project + +- OneRowRelation + + +-- !query +SET PATH = spark_catalog.sql_path_fn_a, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_a)), SchemaInPath(List(system, builtin))] + + +-- !query +CREATE FUNCTION default.frozen_tvf() +RETURNS TABLE(id INT) +RETURN SELECT MAX(id) AS id FROM frozen_t +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.frozen_tvf, id INT, SELECT MAX(id) AS id FROM frozen_t, true, false, false, false + + +-- !query +SET PATH = spark_catalog.sql_path_fn_b, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_b)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT * FROM default.frozen_tvf() AS table_body_uses_frozen_path +-- !query analysis +Project [id#x] ++- SubqueryAlias table_body_uses_frozen_path + +- SQLFunctionNode spark_catalog.default.frozen_tvf + +- SubqueryAlias frozen_tvf + +- Project [cast(id#x as int) AS id#x] + +- Aggregate [max(id#x) AS id#x] + +- SubqueryAlias spark_catalog.sql_path_fn_a.frozen_t + +- Relation spark_catalog.sql_path_fn_a.frozen_t[id#x] parquet + + +-- !query +USE spark_catalog.sql_path_fn_a +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_a] + + +-- !query +CREATE FUNCTION sql_path_fn_a.f_ctx() +RETURNS STRING +RETURN concat(current_schema(), '::', current_path()) +-- !query analysis +CreateSQLFunctionCommand spark_catalog.sql_path_fn_a.f_ctx, STRING, concat(current_schema(), '::', current_path()), false, false, false, false + + +-- !query +USE spark_catalog.sql_path_fn_b +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_b] + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SELECT sql_path_fn_a.f_ctx() AS invoker_context +-- !query analysis +Project [spark_catalog.sql_path_fn_a.f_ctx() AS invoker_context#x] ++- Project + +- OneRowRelation + + +-- !query +USE spark_catalog.default +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default] + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +DROP FUNCTION default.frozen_fn +-- !query analysis +DropFunctionCommand spark_catalog.default.frozen_fn, false, false + + +-- !query +DROP FUNCTION default.frozen_tvf +-- !query analysis +DropFunctionCommand spark_catalog.default.frozen_tvf, false, false + + +-- !query +DROP FUNCTION sql_path_fn_a.f_ctx +-- !query analysis +DropFunctionCommand spark_catalog.sql_path_fn_a.f_ctx, false, false + + +-- !query +DROP TABLE sql_path_fn_a.frozen_t +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_fn_a.frozen_t + + +-- !query +DROP TABLE sql_path_fn_b.frozen_t +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_fn_b.frozen_t + + +-- !query +DROP SCHEMA sql_path_fn_a +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_a] + + +-- !query +DROP SCHEMA sql_path_fn_b +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_b] + + +-- !query +SET spark.sql.defaultPath = system.session, system.builtin +-- !query analysis +SetCommand (spark.sql.defaultPath,Some(system.session, system.builtin)) + + +-- !query +SET PATH = system.builtin, system.session +-- !query analysis +SetPathCommand [SchemaInPath(List(system, builtin)), SchemaInPath(List(system, session))] + + +-- !query +SELECT current_path() AS explicit_set_path_wins_over_conf +-- !query analysis +Project [current_path() AS explicit_set_path_wins_over_conf#x] ++- OneRowRelation + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +RESET spark.sql.defaultPath +-- !query analysis +ResetCommand spark.sql.defaultPath + + +-- !query +SET spark.sql.defaultPath = system.session, system.builtin, current_schema +-- !query analysis +SetCommand (spark.sql.defaultPath,Some(system.session, system.builtin, current_schema)) + + +-- !query +USE spark_catalog.default +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default] + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SELECT current_path() AS default_path_expands_to_conf +-- !query analysis +Project [current_path() AS default_path_expands_to_conf#x] ++- OneRowRelation + + +-- !query +RESET spark.sql.defaultPath +-- !query analysis +ResetCommand spark.sql.defaultPath + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SET spark.sql.defaultPath = this is not a path +-- !query analysis +org.apache.spark.SparkIllegalArgumentException +{ + "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT", + "sqlState" : "22022", + "messageParameters" : { + "confName" : "spark.sql.defaultPath", + "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).", + "confValue" : "this is not a path" + } +} + + +-- !query +SET spark.sql.defaultPath = PATH, system.builtin +-- !query analysis +org.apache.spark.SparkIllegalArgumentException +{ + "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT", + "sqlState" : "22022", + "messageParameters" : { + "confName" : "spark.sql.defaultPath", + "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).", + "confValue" : "PATH, system.builtin" + } +} + + +-- !query +SET spark.sql.path.enabled = false +-- !query analysis +SetCommand (spark.sql.path.enabled,Some(false)) + + +-- !query +SELECT current_path() IS NOT NULL AS has_path +-- !query analysis +Project [isnotnull(current_path()) AS has_path#x] ++- OneRowRelation + + +-- !query +SET PATH = spark_catalog.default +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED", + "sqlState" : "0A000", + "messageParameters" : { + "config" : "spark.sql.path.enabled" + } +} diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out index fdb7c8adf2826..4b6fc5d450140 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out @@ -504,9 +504,58 @@ Project [scalar-subquery#x [title#x] AS scalarsubquery(title)#xL] -- !query -SET VARIABLE title = 'Test qualifiers - fail' +SET VARIABLE title = 'Dropped struct variable -- field access vs qualified name' -- !query analysis SetVariable [variablereference(system.session.title='Test variable in aggregate')] ++- Project [Dropped struct variable -- field access vs qualified name AS title#x] + +- OneRowRelation + + +-- !query +DECLARE OR REPLACE VARIABLE session STRUCT = NAMED_STRUCT('a', 1) +-- !query analysis +CreateVariable default(cast(named_struct(a, 1) as struct), sql='NAMED_STRUCT('a', 1)'), true ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.session + + +-- !query +SELECT session.a +-- !query analysis +Project [variablereference(system.session.session=NAMED_STRUCT('a', 1)).a AS a#x] ++- OneRowRelation + + +-- !query +DROP TEMPORARY VARIABLE session +-- !query analysis +DropVariable false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.session + + +-- !query +SELECT session.a +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`session`.`a`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 16, + "fragment" : "session.a" + } ] +} + + +-- !query +SET VARIABLE title = 'Test qualifiers - fail' +-- !query analysis +SetVariable [variablereference(system.session.title='Dropped struct variable -- field access vs qualified name')] +- Project [Test qualifiers - fail AS title#x] +- OneRowRelation @@ -519,9 +568,16 @@ org.apache.spark.sql.AnalysisException "errorClass" : "UNRESOLVED_VARIABLE", "sqlState" : "42883", "messageParameters" : { - "searchPath" : "`system`.`session`", + "searchPath" : "[`system`.`session`]", "variableName" : "`builtin`.`var1`" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 29, + "stopIndex" : 40, + "fragment" : "builtin.var1" + } ] } @@ -533,9 +589,16 @@ org.apache.spark.sql.AnalysisException "errorClass" : "UNRESOLVED_VARIABLE", "sqlState" : "42883", "messageParameters" : { - "searchPath" : "`system`.`session`", + "searchPath" : "[`system`.`session`]", "variableName" : "`system`.`sesion`.`var1`" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 29, + "stopIndex" : 46, + "fragment" : "system.sesion.var1" + } ] } @@ -547,9 +610,16 @@ org.apache.spark.sql.AnalysisException "errorClass" : "UNRESOLVED_VARIABLE", "sqlState" : "42883", "messageParameters" : { - "searchPath" : "`system`.`session`", + "searchPath" : "[`system`.`session`]", "variableName" : "`sys`.`session`.`var1`" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 29, + "stopIndex" : 44, + "fragment" : "sys.session.var1" + } ] } @@ -648,9 +718,16 @@ org.apache.spark.sql.AnalysisException "errorClass" : "UNRESOLVED_VARIABLE", "sqlState" : "42883", "messageParameters" : { - "searchPath" : "`SYSTEM`.`SESSION`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]", "variableName" : "`ses`.`var1`" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 14, + "stopIndex" : 21, + "fragment" : "ses.var1" + } ] } @@ -662,9 +739,16 @@ org.apache.spark.sql.AnalysisException "errorClass" : "UNRESOLVED_VARIABLE", "sqlState" : "42883", "messageParameters" : { - "searchPath" : "`SYSTEM`.`SESSION`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]", "variableName" : "`builtn`.`session`.`var1`" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 14, + "stopIndex" : 32, + "fragment" : "builtn.session.var1" + } ] } diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql new file mode 100644 index 0000000000000..e9d1d149e7fa3 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql @@ -0,0 +1,410 @@ +-- ============================================================================ +-- SQL Standard PATH golden coverage +-- ============================================================================ +-- +-- This file is the readable, SQL-level reference for what the PATH feature +-- does. It is the primary place to look up "how does SET PATH behave when +-- I write ..." before reaching for the Scala unit suites. Tests that need +-- features not expressible in pure SQL (multi-threaded execution, session +-- cloning, view-metadata inspection, Connect/PySpark plumbing) live in the +-- matching Scala / Python suites. +-- +-- Table of Contents +-- ----------------- +-- 1. Default path observability (no SET PATH issued) +-- 2. SET PATH grammar +-- 2.1 Literal schema entries; case preservation; backtick quoting +-- 2.2 DEFAULT_PATH shortcut +-- 2.3 SYSTEM_PATH shortcut +-- 2.4 PATH keyword (append to live path) +-- 2.5 current_schema / current_database shortcuts +-- 3. CURRENT_PATH() builtin +-- 3.1 ANSI no-parens form equals current_path() +-- 3.2 Argument-count validation +-- 4. Static error conditions at SET PATH +-- 4.1 Literal duplicate +-- 4.2 DEFAULT_PATH expansion duplicate +-- 4.3 SYSTEM_PATH expansion duplicate +-- 4.4 current_database vs current_schema cross-alias duplicate +-- 4.5 Single-part schema reference rejected +-- 5. Routine resolution via PATH +-- 5.1 Persistent scalar function follows PATH +-- 5.2 Persistent table function follows PATH +-- 5.3 First-match ordering across two schemas on PATH +-- 5.4 Unqualified miss when schema is not on PATH +-- 6. Relation resolution via PATH +-- 6.1 Table resolved via PATH; first-match ordering +-- 6.2 Unqualified miss when schema is not on PATH +-- 7. Persisted view frozen-path behavior +-- 7.1 View body resolves via creation-time PATH (not invoker PATH) +-- 7.2 current_schema / current_path in view body use invoker context +-- 8. SQL function frozen-path behavior +-- 8.1 Scalar function body resolves via creation-time PATH +-- 8.2 Table function body resolves via creation-time PATH +-- 8.3 current_schema / current_path in function body use invoker context +-- 9. DEFAULT_PATH conf (spark.sql.defaultPath) +-- 9.1 Explicit SET PATH overrides the conf +-- 9.2 SET PATH = DEFAULT_PATH expands to the conf value +-- 9.3 Invalid conf value rejected +-- 10. PATH disabled +-- 10.1 current_path() still resolves (regular builtin) +-- 10.2 SET PATH itself is rejected +-- ============================================================================ + +--SET spark.sql.path.enabled=true + + +-- ============================================================================ +-- 1. Default path observability (no SET PATH issued) +-- ============================================================================ + +-- The session was opened with PATH enabled and no `SET PATH` issued, so the +-- effective path is the spark-builtin default ordering with current_schema in +-- the catalog slot. +SELECT current_path(); + + +-- ============================================================================ +-- 2. SET PATH grammar +-- ============================================================================ + +-- 2.1 Literal schema entries; case preservation; backtick quoting ------------- + +SET PATH = spark_catalog.default, system.builtin; +SELECT current_path(); + +-- Case is preserved exactly as typed. +SET PATH = Spark_Catalog.Default, System.Builtin; +SELECT current_path(); + +-- Backtick-quoted identifiers that contain dots round-trip with quoting. +SET PATH = spark_catalog.`sch.b`, system.builtin; +SELECT current_path(); + +-- Multi-level namespace (3+ parts) is accepted by the grammar. The stored entry +-- is verified at the Scala layer (SetPathSuite) because the session catalog +-- only supports single-part namespaces, so calling current_path() while a +-- multi-level entry is on the path would surface that catalog limitation +-- rather than the PATH grammar property under test here. + +SET PATH = DEFAULT_PATH; + + +-- 2.2 DEFAULT_PATH shortcut --------------------------------------------------- + +SET PATH = DEFAULT_PATH; +SELECT current_path(); + + +-- 2.3 SYSTEM_PATH shortcut ---------------------------------------------------- + +SET PATH = SYSTEM_PATH; +SELECT current_path(); + + +-- 2.4 PATH keyword (append to live path) -------------------------------------- + +SET PATH = spark_catalog.default, system.builtin; +SET PATH = PATH, system.session; +SELECT current_path(); + + +-- 2.5 current_schema / current_database shortcuts ----------------------------- + +USE spark_catalog.default; +SET PATH = current_schema, system.builtin; +SELECT current_path(); + +-- current_database is a SQL alias for current_schema. +SET PATH = current_database, system.builtin; +SELECT current_path(); + +SET PATH = DEFAULT_PATH; + + +-- ============================================================================ +-- 3. CURRENT_PATH() builtin +-- ============================================================================ + +-- 3.1 ANSI no-parens form equals current_path() ------------------------------ + +SET PATH = spark_catalog.default, system.builtin; +SELECT CURRENT_PATH = current_path() AS ansi_form_matches; + + +-- 3.2 Argument-count validation ---------------------------------------------- + +SELECT current_path(1); + +SET PATH = DEFAULT_PATH; + + +-- ============================================================================ +-- 4. Static error conditions at SET PATH +-- ============================================================================ + +-- 4.1 Literal duplicate ------------------------------------------------------- + +SET PATH = spark_catalog.default, spark_catalog.default; + +-- Case-insensitive duplicate is still flagged. +SET PATH = spark_catalog.DEFAULT, spark_catalog.default; + + +-- 4.2 DEFAULT_PATH expansion duplicate ---------------------------------------- + +-- DEFAULT_PATH already contains system.builtin; listing it again is a duplicate +-- after expansion. +SET PATH = DEFAULT_PATH, system.builtin; + + +-- 4.3 SYSTEM_PATH expansion duplicate ----------------------------------------- + +SET PATH = SYSTEM_PATH, SYSTEM_PATH; + + +-- 4.4 current_database vs current_schema cross-alias duplicate ---------------- + +SET PATH = current_database, current_schema; + + +-- 4.5 Single-part schema reference rejected ----------------------------------- + +SET PATH = my_schema_no_catalog; + + +-- ============================================================================ +-- 5. Routine resolution via PATH +-- ============================================================================ + +-- 5.1 Persistent scalar function follows PATH --------------------------------- + +CREATE SCHEMA sql_path_routines; +CREATE FUNCTION sql_path_routines.pick() RETURNS INT RETURN 7; +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin; +SELECT pick(); +SET PATH = DEFAULT_PATH; + + +-- 5.2 Persistent table function follows PATH ---------------------------------- + +CREATE FUNCTION sql_path_routines.pick_tvf() +RETURNS TABLE(val INT) +RETURN SELECT 7 AS val; +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin; +SELECT * FROM pick_tvf(); +SET PATH = DEFAULT_PATH; + + +-- 5.3 First-match ordering across two schemas on PATH ------------------------ + +CREATE SCHEMA sql_path_routines_b; +CREATE FUNCTION sql_path_routines_b.pick() RETURNS INT RETURN 11; + +SET PATH = spark_catalog.sql_path_routines, spark_catalog.sql_path_routines_b, system.builtin; +SELECT pick() AS from_first_schema; +SET PATH = spark_catalog.sql_path_routines_b, spark_catalog.sql_path_routines, system.builtin; +SELECT pick() AS from_first_schema; +SET PATH = DEFAULT_PATH; + + +-- 5.4 Unqualified miss when schema is not on PATH ----------------------------- + +SET PATH = spark_catalog.default, system.builtin; +SELECT pick(); + +-- Cleanup section 5. +SET PATH = DEFAULT_PATH; +DROP FUNCTION sql_path_routines.pick; +DROP FUNCTION sql_path_routines.pick_tvf; +DROP FUNCTION sql_path_routines_b.pick; +DROP SCHEMA sql_path_routines; +DROP SCHEMA sql_path_routines_b; + + +-- ============================================================================ +-- 6. Relation resolution via PATH +-- ============================================================================ + +CREATE SCHEMA sql_path_relations_a; +CREATE SCHEMA sql_path_relations_b; +CREATE TABLE sql_path_relations_a.tbl USING parquet AS SELECT 1 AS id; +CREATE TABLE sql_path_relations_b.tbl USING parquet AS SELECT 2 AS id; + +-- 6.1 First-match ordering ---------------------------------------------------- + +SET PATH = spark_catalog.sql_path_relations_a, spark_catalog.sql_path_relations_b, system.builtin; +SELECT id FROM tbl AS from_first_schema; +SET PATH = spark_catalog.sql_path_relations_b, spark_catalog.sql_path_relations_a, system.builtin; +SELECT id FROM tbl AS from_first_schema; + + +-- 6.2 Unqualified miss when schema is not on PATH ----------------------------- + +SET PATH = spark_catalog.default, system.builtin; +SELECT id FROM tbl; + +-- Cleanup section 6. +SET PATH = DEFAULT_PATH; +DROP TABLE sql_path_relations_a.tbl; +DROP TABLE sql_path_relations_b.tbl; +DROP SCHEMA sql_path_relations_a; +DROP SCHEMA sql_path_relations_b; + + +-- ============================================================================ +-- 7. Persisted view frozen-path behavior +-- ============================================================================ + +CREATE SCHEMA sql_path_views_a; +CREATE SCHEMA sql_path_views_b; +CREATE TABLE sql_path_views_a.frozen_t USING parquet AS SELECT 1 AS id; +CREATE TABLE sql_path_views_b.frozen_t USING parquet AS SELECT 2 AS id; + +-- 7.1 View body resolves via creation-time PATH (not invoker PATH) ------------ + +SET PATH = spark_catalog.sql_path_views_a, system.builtin; +CREATE VIEW default.v_path_frozen AS SELECT id FROM frozen_t; + +-- Flip the live PATH; the view body's unqualified `frozen_t` must still +-- resolve through the schema captured at CREATE VIEW (sql_path_views_a, id=1). +-- A bare query against `frozen_t` from the session follows the LIVE PATH and +-- returns the other table's row (id=2). +SET PATH = spark_catalog.sql_path_views_b, system.builtin; +SELECT id FROM frozen_t AS bare_lookup_uses_live_path; +SELECT id FROM default.v_path_frozen AS view_body_uses_frozen_path; + + +-- 7.2 current_schema / current_path in view body use invoker context ---------- + +USE spark_catalog.sql_path_views_a; +CREATE VIEW sql_path_views_a.v_ctx AS +SELECT current_schema() AS cs, current_path() AS cp; + +USE spark_catalog.sql_path_views_b; +SET PATH = DEFAULT_PATH; +-- The view body re-evaluates current_schema() / current_path() on every +-- invocation against the INVOKER's context, not the creator's. The result +-- here must reflect sql_path_views_b (the invoker), not sql_path_views_a +-- (the creator's schema at CREATE VIEW). +SELECT cs, cp FROM sql_path_views_a.v_ctx; + +-- Cleanup section 7. +USE spark_catalog.default; +SET PATH = DEFAULT_PATH; +DROP VIEW default.v_path_frozen; +DROP VIEW sql_path_views_a.v_ctx; +DROP TABLE sql_path_views_a.frozen_t; +DROP TABLE sql_path_views_b.frozen_t; +DROP SCHEMA sql_path_views_a; +DROP SCHEMA sql_path_views_b; + + +-- ============================================================================ +-- 8. SQL function frozen-path behavior +-- ============================================================================ + +CREATE SCHEMA sql_path_fn_a; +CREATE SCHEMA sql_path_fn_b; +CREATE TABLE sql_path_fn_a.frozen_t USING parquet AS SELECT 10 AS id; +CREATE TABLE sql_path_fn_b.frozen_t USING parquet AS SELECT 20 AS id; + +-- 8.1 Scalar function body resolves via creation-time PATH -------------------- + +SET PATH = spark_catalog.sql_path_fn_a, system.builtin; +CREATE FUNCTION default.frozen_fn() +RETURNS INT +RETURN (SELECT MAX(id) FROM frozen_t); + +SET PATH = spark_catalog.sql_path_fn_b, system.builtin; +SELECT MAX(id) FROM frozen_t AS bare_lookup_uses_live_path; +SELECT default.frozen_fn() AS scalar_body_uses_frozen_path; + + +-- 8.2 Table function body resolves via creation-time PATH --------------------- + +SET PATH = spark_catalog.sql_path_fn_a, system.builtin; +CREATE FUNCTION default.frozen_tvf() +RETURNS TABLE(id INT) +RETURN SELECT MAX(id) AS id FROM frozen_t; + +SET PATH = spark_catalog.sql_path_fn_b, system.builtin; +SELECT * FROM default.frozen_tvf() AS table_body_uses_frozen_path; + + +-- 8.3 current_schema / current_path in function body use invoker context ----- + +USE spark_catalog.sql_path_fn_a; +CREATE FUNCTION sql_path_fn_a.f_ctx() +RETURNS STRING +RETURN concat(current_schema(), '::', current_path()); + +USE spark_catalog.sql_path_fn_b; +SET PATH = DEFAULT_PATH; +-- Like 7.2: current_schema() / current_path() in a SQL function body bind to +-- the INVOKER's context, not the creator's. +SELECT sql_path_fn_a.f_ctx() AS invoker_context; + +-- Cleanup section 8. +USE spark_catalog.default; +SET PATH = DEFAULT_PATH; +DROP FUNCTION default.frozen_fn; +DROP FUNCTION default.frozen_tvf; +DROP FUNCTION sql_path_fn_a.f_ctx; +DROP TABLE sql_path_fn_a.frozen_t; +DROP TABLE sql_path_fn_b.frozen_t; +DROP SCHEMA sql_path_fn_a; +DROP SCHEMA sql_path_fn_b; + + +-- ============================================================================ +-- 9. DEFAULT_PATH conf (spark.sql.defaultPath) +-- ============================================================================ +-- +-- The conf's RHS is captured as a raw string by the SQL `SET key = value` +-- form; keywords like `current_schema` and shortcut tokens like `SYSTEM_PATH` +-- must be written WITHOUT backticks so the conf's SET-PATH-grammar validator +-- recognizes them as path tokens rather than 1-part quoted identifiers. + +-- 9.1 Explicit SET PATH overrides the conf ------------------------------------ + +SET spark.sql.defaultPath = system.session, system.builtin; +SET PATH = system.builtin, system.session; +SELECT current_path() AS explicit_set_path_wins_over_conf; +SET PATH = DEFAULT_PATH; +RESET spark.sql.defaultPath; + + +-- 9.2 SET PATH = DEFAULT_PATH expands to the conf value ----------------------- + +SET spark.sql.defaultPath = system.session, system.builtin, current_schema; +USE spark_catalog.default; +SET PATH = DEFAULT_PATH; +SELECT current_path() AS default_path_expands_to_conf; +RESET spark.sql.defaultPath; +SET PATH = DEFAULT_PATH; + + +-- 9.3 Invalid conf value rejected at SET time --------------------------------- + +SET spark.sql.defaultPath = this is not a path; + +-- The PATH keyword is not allowed in the conf value (it would create a cycle). +SET spark.sql.defaultPath = PATH, system.builtin; + + +-- ============================================================================ +-- 10. PATH disabled +-- ============================================================================ + +SET spark.sql.path.enabled = false; + + +-- 10.1 current_path() still resolves (regular builtin) ------------------------ + +SELECT current_path() IS NOT NULL AS has_path; + + +-- 10.2 SET PATH itself is rejected -------------------------------------------- + +SET PATH = spark_catalog.default; diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql index 2e4eaa1f8f6ca..86cd70cfbf981 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql @@ -83,6 +83,19 @@ DROP TEMPORARY VARIABLE var1; SET VARIABLE title = 'Test variable in aggregate'; SELECT (SELECT MAX(id) FROM RANGE(10) WHERE id < title) FROM VALUES 1, 2 AS t(title); +SET VARIABLE title = 'Dropped struct variable -- field access vs qualified name'; +-- `session.a` is ambiguous: (a) 2-part qualified variable, or (b) field `a` of a 1-part +-- variable `session`. Variable resolution tries (a) first via longest match, falls back to +-- (b). With `session` declared as a struct, (b) succeeds. After the variable is dropped, +-- both interpretations fail and the SELECT falls through to column resolution, which +-- reports `UNRESOLVED_COLUMN`. Because either interpretation could have been intended, +-- the variable error path (when reached) must dump the full SQL path -- see +-- `VariableResolution.searchPathEntriesForError`. +DECLARE OR REPLACE VARIABLE session STRUCT = NAMED_STRUCT('a', 1); +SELECT session.a; +DROP TEMPORARY VARIABLE session; +SELECT session.a; + SET VARIABLE title = 'Test qualifiers - fail'; DECLARE OR REPLACE VARIABLE builtin.var1 INT; DECLARE OR REPLACE VARIABLE system.sesion.var1 INT; diff --git a/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out new file mode 100644 index 0000000000000..52d01ccb80bad --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out @@ -0,0 +1,1202 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +system.builtin,system.session,spark_catalog.default + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +spark_catalog.default,system.builtin + + +-- !query +SET PATH = Spark_Catalog.Default, System.Builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +Spark_Catalog.Default,System.Builtin + + +-- !query +SET PATH = spark_catalog.`sch.b`, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +spark_catalog.`sch.b`,system.builtin + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +system.builtin,system.session,spark_catalog.default + + +-- !query +SET PATH = SYSTEM_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +system.builtin,system.session + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = PATH, system.session +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +spark_catalog.default,system.builtin,system.session + + +-- !query +USE spark_catalog.default +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = current_schema, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +spark_catalog.default,system.builtin + + +-- !query +SET PATH = current_database, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +spark_catalog.default,system.builtin + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT CURRENT_PATH = current_path() AS ansi_form_matches +-- !query schema +struct +-- !query output +true + + +-- !query +SELECT current_path(1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "1", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "0", + "functionName" : "`current_path`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 22, + "fragment" : "current_path(1)" + } ] +} + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.default, spark_catalog.default +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "spark_catalog.default" + } +} + + +-- !query +SET PATH = spark_catalog.DEFAULT, spark_catalog.default +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "spark_catalog.default" + } +} + + +-- !query +SET PATH = DEFAULT_PATH, system.builtin +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "system.builtin" + } +} + + +-- !query +SET PATH = SYSTEM_PATH, SYSTEM_PATH +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "system.builtin" + } +} + + +-- !query +SET PATH = current_database, current_schema +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "current_schema" + } +} + + +-- !query +SET PATH = my_schema_no_catalog +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_PATH_SCHEMA_REFERENCE", + "sqlState" : "42601", + "messageParameters" : { + "qualifiedName" : "my_schema_no_catalog" + } +} + + +-- !query +CREATE SCHEMA sql_path_routines +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION sql_path_routines.pick() RETURNS INT RETURN 7 +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT pick() +-- !query schema +struct +-- !query output +7 + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION sql_path_routines.pick_tvf() +RETURNS TABLE(val INT) +RETURN SELECT 7 AS val +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM pick_tvf() +-- !query schema +struct +-- !query output +7 + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_routines_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION sql_path_routines_b.pick() RETURNS INT RETURN 11 +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_routines, spark_catalog.sql_path_routines_b, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT pick() AS from_first_schema +-- !query schema +struct +-- !query output +7 + + +-- !query +SET PATH = spark_catalog.sql_path_routines_b, spark_catalog.sql_path_routines, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT pick() AS from_first_schema +-- !query schema +struct +-- !query output +11 + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT pick() +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`pick`", + "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 13, + "fragment" : "pick()" + } ] +} + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION sql_path_routines.pick +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION sql_path_routines.pick_tvf +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION sql_path_routines_b.pick +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_routines +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_routines_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_relations_a +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_relations_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_relations_a.tbl USING parquet AS SELECT 1 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_relations_b.tbl USING parquet AS SELECT 2 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_relations_a, spark_catalog.sql_path_relations_b, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT id FROM tbl AS from_first_schema +-- !query schema +struct +-- !query output +1 + + +-- !query +SET PATH = spark_catalog.sql_path_relations_b, spark_catalog.sql_path_relations_a, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT id FROM tbl AS from_first_schema +-- !query schema +struct +-- !query output +2 + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT id FROM tbl +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tbl`", + "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 18, + "fragment" : "tbl" + } ] +} + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_relations_a.tbl +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_relations_b.tbl +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_relations_a +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_relations_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_views_a +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_views_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_views_a.frozen_t USING parquet AS SELECT 1 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_views_b.frozen_t USING parquet AS SELECT 2 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_views_a, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE VIEW default.v_path_frozen AS SELECT id FROM frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_views_b, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT id FROM frozen_t AS bare_lookup_uses_live_path +-- !query schema +struct +-- !query output +2 + + +-- !query +SELECT id FROM default.v_path_frozen AS view_body_uses_frozen_path +-- !query schema +struct +-- !query output +1 + + +-- !query +USE spark_catalog.sql_path_views_a +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE VIEW sql_path_views_a.v_ctx AS +SELECT current_schema() AS cs, current_path() AS cp +-- !query schema +struct<> +-- !query output + + + +-- !query +USE spark_catalog.sql_path_views_b +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT cs, cp FROM sql_path_views_a.v_ctx +-- !query schema +struct +-- !query output +sql_path_views_b system.builtin,system.session,spark_catalog.sql_path_views_b + + +-- !query +USE spark_catalog.default +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW default.v_path_frozen +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW sql_path_views_a.v_ctx +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_views_a.frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_views_b.frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_views_a +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_views_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_fn_a +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_fn_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_fn_a.frozen_t USING parquet AS SELECT 10 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_fn_b.frozen_t USING parquet AS SELECT 20 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_fn_a, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION default.frozen_fn() +RETURNS INT +RETURN (SELECT MAX(id) FROM frozen_t) +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_fn_b, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT MAX(id) FROM frozen_t AS bare_lookup_uses_live_path +-- !query schema +struct +-- !query output +20 + + +-- !query +SELECT default.frozen_fn() AS scalar_body_uses_frozen_path +-- !query schema +struct +-- !query output +10 + + +-- !query +SET PATH = spark_catalog.sql_path_fn_a, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION default.frozen_tvf() +RETURNS TABLE(id INT) +RETURN SELECT MAX(id) AS id FROM frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_fn_b, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM default.frozen_tvf() AS table_body_uses_frozen_path +-- !query schema +struct +-- !query output +10 + + +-- !query +USE spark_catalog.sql_path_fn_a +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION sql_path_fn_a.f_ctx() +RETURNS STRING +RETURN concat(current_schema(), '::', current_path()) +-- !query schema +struct<> +-- !query output + + + +-- !query +USE spark_catalog.sql_path_fn_b +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT sql_path_fn_a.f_ctx() AS invoker_context +-- !query schema +struct +-- !query output +sql_path_fn_b::system.builtin,system.session,spark_catalog.sql_path_fn_b + + +-- !query +USE spark_catalog.default +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION default.frozen_fn +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION default.frozen_tvf +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION sql_path_fn_a.f_ctx +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_fn_a.frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_fn_b.frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_fn_a +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_fn_b +-- !query schema +struct<> +-- !query output + + + +-- !query +SET spark.sql.defaultPath = system.session, system.builtin +-- !query schema +struct +-- !query output +spark.sql.defaultPath system.session, system.builtin + + +-- !query +SET PATH = system.builtin, system.session +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() AS explicit_set_path_wins_over_conf +-- !query schema +struct +-- !query output +system.builtin,system.session + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +RESET spark.sql.defaultPath +-- !query schema +struct<> +-- !query output + + + +-- !query +SET spark.sql.defaultPath = system.session, system.builtin, current_schema +-- !query schema +struct +-- !query output +spark.sql.defaultPath system.session, system.builtin, current_schema + + +-- !query +USE spark_catalog.default +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() AS default_path_expands_to_conf +-- !query schema +struct +-- !query output +system.session,system.builtin,spark_catalog.default + + +-- !query +RESET spark.sql.defaultPath +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SET spark.sql.defaultPath = this is not a path +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkIllegalArgumentException +{ + "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT", + "sqlState" : "22022", + "messageParameters" : { + "confName" : "spark.sql.defaultPath", + "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).", + "confValue" : "this is not a path" + } +} + + +-- !query +SET spark.sql.defaultPath = PATH, system.builtin +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkIllegalArgumentException +{ + "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT", + "sqlState" : "22022", + "messageParameters" : { + "confName" : "spark.sql.defaultPath", + "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).", + "confValue" : "PATH, system.builtin" + } +} + + +-- !query +SET spark.sql.path.enabled = false +-- !query schema +struct +-- !query output +spark.sql.path.enabled false + + +-- !query +SELECT current_path() IS NOT NULL AS has_path +-- !query schema +struct +-- !query output +true + + +-- !query +SET PATH = spark_catalog.default +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED", + "sqlState" : "0A000", + "messageParameters" : { + "config" : "spark.sql.path.enabled" + } +} diff --git a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out index de8d6743fc761..3357f2e526305 100644 --- a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out @@ -561,6 +561,60 @@ struct 1 +-- !query +SET VARIABLE title = 'Dropped struct variable -- field access vs qualified name' +-- !query schema +struct<> +-- !query output + + + +-- !query +DECLARE OR REPLACE VARIABLE session STRUCT = NAMED_STRUCT('a', 1) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT session.a +-- !query schema +struct +-- !query output +1 + + +-- !query +DROP TEMPORARY VARIABLE session +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT session.a +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`session`.`a`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 16, + "fragment" : "session.a" + } ] +} + + -- !query SET VARIABLE title = 'Test qualifiers - fail' -- !query schema @@ -579,9 +633,16 @@ org.apache.spark.sql.AnalysisException "errorClass" : "UNRESOLVED_VARIABLE", "sqlState" : "42883", "messageParameters" : { - "searchPath" : "`system`.`session`", + "searchPath" : "[`system`.`session`]", "variableName" : "`builtin`.`var1`" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 29, + "stopIndex" : 40, + "fragment" : "builtin.var1" + } ] } @@ -595,9 +656,16 @@ org.apache.spark.sql.AnalysisException "errorClass" : "UNRESOLVED_VARIABLE", "sqlState" : "42883", "messageParameters" : { - "searchPath" : "`system`.`session`", + "searchPath" : "[`system`.`session`]", "variableName" : "`system`.`sesion`.`var1`" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 29, + "stopIndex" : 46, + "fragment" : "system.sesion.var1" + } ] } @@ -611,9 +679,16 @@ org.apache.spark.sql.AnalysisException "errorClass" : "UNRESOLVED_VARIABLE", "sqlState" : "42883", "messageParameters" : { - "searchPath" : "`system`.`session`", + "searchPath" : "[`system`.`session`]", "variableName" : "`sys`.`session`.`var1`" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 29, + "stopIndex" : 44, + "fragment" : "sys.session.var1" + } ] } @@ -723,9 +798,16 @@ org.apache.spark.sql.AnalysisException "errorClass" : "UNRESOLVED_VARIABLE", "sqlState" : "42883", "messageParameters" : { - "searchPath" : "`SYSTEM`.`SESSION`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]", "variableName" : "`ses`.`var1`" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 14, + "stopIndex" : 21, + "fragment" : "ses.var1" + } ] } @@ -739,9 +821,16 @@ org.apache.spark.sql.AnalysisException "errorClass" : "UNRESOLVED_VARIABLE", "sqlState" : "42883", "messageParameters" : { - "searchPath" : "`SYSTEM`.`SESSION`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]", "variableName" : "`builtn`.`session`.`var1`" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 14, + "stopIndex" : 32, + "fragment" : "builtn.session.var1" + } ] } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala index d33a65fb73c86..238b52ab7cd93 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala @@ -17,8 +17,12 @@ package org.apache.spark.sql +import org.apache.spark.SparkIllegalArgumentException +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException +import org.apache.spark.sql.connector.catalog.InMemoryCatalog import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{IntegerType, LongType} /** * Tests for SET PATH command and session path management. @@ -211,7 +215,7 @@ class SetPathSuite extends SharedSparkSession { }, condition = "DUPLICATE_SQL_PATH_ENTRY", sqlState = Some("42732"), - parameters = Map("pathEntry" -> "spark_catalog.default")) + parameters = Map("pathEntry" -> "current_schema")) } } @@ -231,16 +235,17 @@ class SetPathSuite extends SharedSparkSession { } } - test("PATH enabled: duplicate after expanding CURRENT_SCHEMA") { + test("PATH enabled: literal + CURRENT_SCHEMA collision is tolerated (USE-state dependent)") { + // SET PATH only rejects static duplicates (literal-vs-literal, current_schema repeated). + // A literal that happens to match the live current_schema is not flagged: a later + // `USE SCHEMA` may make them diverge, and at lookup the first match wins anyway. + // `system.builtin` is included so `current_path()` itself remains resolvable. withPathEnabled { sql("USE spark_catalog.default") - checkError( - exception = intercept[AnalysisException] { - sql("SET PATH = spark_catalog.default, current_schema") - }, - condition = "DUPLICATE_SQL_PATH_ENTRY", - sqlState = Some("42732"), - parameters = Map("pathEntry" -> "spark_catalog.default")) + sql("SET PATH = spark_catalog.default, current_schema, system.builtin") + val entries = pathEntries(currentPath()) + assert(entries === Seq("spark_catalog.default", "spark_catalog.default", "system.builtin"), + s"Expected literal + resolved CURRENT_SCHEMA preserved; got: $entries") } } @@ -253,7 +258,7 @@ class SetPathSuite extends SharedSparkSession { }, condition = "DUPLICATE_SQL_PATH_ENTRY", sqlState = Some("42732"), - parameters = Map("pathEntry" -> "spark_catalog.default")) + parameters = Map("pathEntry" -> "current_schema")) } } @@ -372,22 +377,31 @@ class SetPathSuite extends SharedSparkSession { } } - test("PATH enabled: unqualified session variable lookup follows PATH") { + test("PATH enabled: unqualified SET VAR follows PATH; DDL on variables ignores PATH") { withPathEnabled { sql("DECLARE VARIABLE system.session.path_var_gate = 7") try { sql("SET PATH = spark_catalog.default") checkError( exception = intercept[AnalysisException] { - sql("DROP TEMPORARY VARIABLE path_var_gate") + sql("SET VAR path_var_gate = 8") }, condition = "UNRESOLVED_VARIABLE", sqlState = "42883", parameters = Map( "variableName" -> "`path_var_gate`", - "searchPath" -> "`SYSTEM`.`SESSION`")) + "searchPath" -> "[`spark_catalog`.`default`]"), + context = ExpectedContext("path_var_gate", 8, 20)) + sql("SET VAR system.session.path_var_gate = 9") + checkAnswer(sql("SELECT system.session.path_var_gate"), Row(9)) + + sql("DROP TEMPORARY VARIABLE path_var_gate") + + sql("DECLARE VARIABLE system.session.path_var_gate = 7") sql("SET PATH = spark_catalog.default, system.session") + sql("SET VAR path_var_gate = 11") + checkAnswer(sql("SELECT path_var_gate"), Row(11)) sql("DROP TEMPORARY VARIABLE path_var_gate") } finally { sql("DROP TEMPORARY VARIABLE IF EXISTS system.session.path_var_gate") @@ -395,6 +409,70 @@ class SetPathSuite extends SharedSparkSession { } } + test("PATH enabled: unqualified FETCH ... INTO follows PATH") { + withSQLConf( + SQLConf.PATH_ENABLED.key -> "true", + SQLConf.SQL_SCRIPTING_CURSOR_ENABLED.key -> "true") { + sql("DECLARE OR REPLACE VARIABLE path_fetch_target INT") + try { + // Sanity: FETCH INTO works under the default path (system.session is on it). + val ok = sql( + """ + |BEGIN + | DECLARE cur CURSOR FOR SELECT 42 AS val; + | OPEN cur; + | FETCH cur INTO path_fetch_target; + | CLOSE cur; + |END; + |""".stripMargin) + checkAnswer(ok, Seq.empty[Row]) + checkAnswer(sql("SELECT path_fetch_target"), Row(42)) + + // Set PATH to exclude system.session: unqualified FETCH INTO target now fails + // with the actual SQL path rendered as a bracketed list. + sql("SET PATH = spark_catalog.default") + checkError( + exception = intercept[AnalysisException] { + sql( + """ + |BEGIN + | DECLARE cur CURSOR FOR SELECT 99 AS val; + | OPEN cur; + | FETCH cur INTO path_fetch_target; + | CLOSE cur; + |END; + |""".stripMargin) + }, + condition = "UNRESOLVED_VARIABLE", + sqlState = "42883", + parameters = Map( + "variableName" -> "`path_fetch_target`", + "searchPath" -> "[`spark_catalog`.`default`]"), + context = ExpectedContext("path_fetch_target", -1, -1)) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TEMPORARY VARIABLE IF EXISTS path_fetch_target") + } + } + } + + test("PATH enabled: DECLARE / SET VAR / DROP cycle under non-default PATH") { + withPathEnabled { + sql("CREATE SCHEMA IF NOT EXISTS path_var_cycle") + try { + sql("SET PATH = spark_catalog.path_var_cycle, system.session") + sql("DECLARE OR REPLACE VARIABLE cycle_var = 1") + sql("SET VAR system.session.cycle_var = 2") + sql("SET VAR cycle_var = 3") + checkAnswer(sql("SELECT cycle_var"), Row(3)) + sql("DROP TEMPORARY VARIABLE cycle_var") + } finally { + sql("DROP TEMPORARY VARIABLE IF EXISTS system.session.cycle_var") + sql("DROP SCHEMA IF EXISTS path_var_cycle") + } + } + } + test("PATH enabled: current_path does not accept arguments") { withPathEnabled { // Ensure built-in function lookup succeeds so this assertion targets arg-count semantics. @@ -428,11 +506,110 @@ class SetPathSuite extends SharedSparkSession { } } - // TODO: cloneSession() constructs a new CatalogManager per forked session and - // explicitly copies only the stored session path via copySessionPathFrom. - // Other CatalogManager state propagation (current catalog/namespace, registered - // catalogs) on clone is currently incidental -- audit and pin down the intended - // semantics in a follow-up. + // --- cloneSession() propagation matrix -------------------------------------- + // The cloned session is built via `BaseSessionStateBuilder` from a parent + // `SessionState`. Per-component hand-offs on clone: + // - `SessionCatalog.copyStateTo` copies `currentDb` and `tempViews`, + // - `CatalogManager.copySessionPathFrom` copies the stored `_sessionPath`, + // - `functionRegistry.clone()` and `tableFunctionRegistry.clone()` copy + // temporary functions. + // What is NOT propagated: + // - the temp variable registry (new `TempVariableManager` per session), + // - the `CatalogManager` current-catalog / current-namespace (re-read from + // conf defaults in the child), + // - the registered v2 `catalogs` map (lazy-loaded per session). + // The tests below pin this observed behavior so any future change has to + // update the assertions. + + test("cloneSession: stored SET PATH propagates to the child session") { + withPathEnabled { + sql("SET PATH = spark_catalog.default, system.builtin") + try { + val child = spark.cloneSession() + val entries = pathEntries( + child.sql("SELECT current_path()").collect().head.getString(0)) + assert(entries === Seq("spark_catalog.default", "system.builtin"), + s"Cloned session should inherit stored SET PATH; got: $entries") + } finally { + sql("SET PATH = DEFAULT_PATH") + } + } + } + + test("cloneSession: USE SCHEMA on the parent propagates to the child") { + sql("CREATE SCHEMA IF NOT EXISTS path_clone_use") + try { + sql("USE spark_catalog.path_clone_use") + val child = spark.cloneSession() + val childDb = child.sql("SELECT current_database()").head().getString(0) + assert(childDb == "path_clone_use", + s"Cloned session should inherit the parent's current schema; got: $childDb") + } finally { + sql("USE spark_catalog.default") + sql("DROP SCHEMA IF EXISTS path_clone_use") + } + } + + test("cloneSession: temp views on the parent propagate to the child") { + sql("CREATE TEMPORARY VIEW path_clone_view AS SELECT 1 AS c") + try { + val child = spark.cloneSession() + checkAnswer(child.sql("SELECT c FROM path_clone_view"), Row(1)) + } finally { + sql("DROP VIEW IF EXISTS path_clone_view") + } + } + + test("cloneSession: temp functions on the parent propagate to the child (cloned " + + "functionRegistry)") { + sql("CREATE TEMPORARY FUNCTION path_clone_fn() RETURNS INT RETURN 42") + try { + val child = spark.cloneSession() + checkAnswer(child.sql("SELECT path_clone_fn()"), Row(42)) + // Snapshot semantics: dropping in the parent must not affect the already-cloned child. + sql("DROP TEMPORARY FUNCTION path_clone_fn") + checkAnswer(child.sql("SELECT path_clone_fn()"), Row(42)) + } finally { + sql("DROP TEMPORARY FUNCTION IF EXISTS path_clone_fn") + } + } + + test("cloneSession: temp variables on the parent are NOT propagated to the child") { + sql("DECLARE OR REPLACE VARIABLE path_clone_var INT DEFAULT 7") + try { + val child = spark.cloneSession() + val e = intercept[AnalysisException] { + child.sql("SELECT path_clone_var").collect() + } + // Either UNRESOLVED_VARIABLE or UNRESOLVED_COLUMN; both confirm the variable + // did not survive the clone. + assert( + e.getCondition == "UNRESOLVED_VARIABLE" || + e.getCondition.startsWith("UNRESOLVED_COLUMN"), + s"Temp variables should NOT propagate to the clone; got: ${e.getCondition}") + } finally { + sql("DROP TEMPORARY VARIABLE IF EXISTS path_clone_var") + } + } + + test("cloneSession: child SET PATH does not leak back to the parent") { + withPathEnabled { + sql("SET PATH = spark_catalog.default, system.builtin") + try { + val child = spark.cloneSession() + child.sql("SET PATH = system.session, system.builtin") + val parentEntries = pathEntries(currentPath()) + assert(parentEntries === Seq("spark_catalog.default", "system.builtin"), + s"Child SET PATH must not affect the parent; parent got: $parentEntries") + val childEntries = pathEntries( + child.sql("SELECT current_path()").collect().head.getString(0)) + assert(childEntries === Seq("system.session", "system.builtin"), + s"Child SET PATH should be visible only in the child; child got: $childEntries") + } finally { + sql("SET PATH = DEFAULT_PATH") + } + } + } // --- Resolution tests: verify SET PATH affects actual table/function lookup --- @@ -495,4 +672,523 @@ class SetPathSuite extends SharedSparkSession { } } } + + // --- spark.sql.defaultPath (SQLConf.DEFAULT_PATH) --- + // The conf carries the SET PATH grammar; sessionPathEntries falls back to it lazily + // when no `SET PATH` has been issued, mirroring how `currentCatalog` falls back to + // [[SQLConf.DEFAULT_CATALOG]]. + + test("DEFAULT_PATH conf: lazy fallback when no SET PATH issued") { + withSQLConf( + SQLConf.PATH_ENABLED.key -> "true", + SQLConf.DEFAULT_PATH.key -> "spark_catalog.default, system.builtin") { + val catalogManager = spark.sessionState.catalogManager + val priorSessionPath = catalogManager.storedSessionPathEntries + catalogManager.clearSessionPath() + try { + val entries = pathEntries(currentPath()) + assert(entries == Seq("spark_catalog.default", "system.builtin"), + s"Expected DEFAULT_PATH conf to drive current_path(); got: $entries") + assert(catalogManager.storedSessionPathEntries.isEmpty, + "DEFAULT_PATH lookup must not write to the in-memory stored session path") + } finally { + catalogManager.clearSessionPath() + priorSessionPath.foreach(catalogManager.setSessionPath) + } + } + } + + test("DEFAULT_PATH conf: explicit SET PATH overrides the conf") { + withSQLConf( + SQLConf.PATH_ENABLED.key -> "true", + SQLConf.DEFAULT_PATH.key -> "system.builtin, system.session") { + val catalogManager = spark.sessionState.catalogManager + val priorSessionPath = catalogManager.storedSessionPathEntries + try { + sql("SET PATH = system.session, system.builtin") + val entries = pathEntries(currentPath()) + assert(entries == Seq("system.session", "system.builtin"), + s"Expected SET PATH to win over DEFAULT_PATH conf; got: $entries") + } finally { + catalogManager.clearSessionPath() + priorSessionPath.foreach(catalogManager.setSessionPath) + } + } + } + + test("DEFAULT_PATH conf: SET PATH = DEFAULT_PATH expands to the conf value") { + withSQLConf( + SQLConf.PATH_ENABLED.key -> "true", + SQLConf.DEFAULT_PATH.key -> "system.session, system.builtin, current_schema") { + val catalogManager = spark.sessionState.catalogManager + val priorSessionPath = catalogManager.storedSessionPathEntries + try { + sql("SET PATH = DEFAULT_PATH") + val entries = pathEntries(currentPath()) + assert(entries.head.contains("system.session"), + s"DEFAULT_PATH expansion should follow conf order (session first); got: $entries") + assert(catalogManager.storedSessionPathEntries.isDefined, + "After SET PATH the in-memory stored session path should be populated") + } finally { + catalogManager.clearSessionPath() + priorSessionPath.foreach(catalogManager.setSessionPath) + } + } + } + + test("DEFAULT_PATH conf: cycle break -- inner DEFAULT_PATH falls back to builtin order") { + withSQLConf( + SQLConf.PATH_ENABLED.key -> "true", + SQLConf.DEFAULT_PATH.key -> "DEFAULT_PATH", + // Pin order conf to "first" so the spark-builtin default ordering is observable. + SQLConf.SESSION_FUNCTION_RESOLUTION_ORDER.key -> "first") { + val catalogManager = spark.sessionState.catalogManager + val priorSessionPath = catalogManager.storedSessionPathEntries + catalogManager.clearSessionPath() + try { + val entries = pathEntries(currentPath()) + assert(entries.head.contains("system.session"), + s"Inner DEFAULT_PATH should resolve to builtin order seeded by the order conf " + + s"('first' -> session leading); got: $entries") + } finally { + catalogManager.clearSessionPath() + priorSessionPath.foreach(catalogManager.setSessionPath) + } + } + } + + test("DEFAULT_PATH conf: invalid value rejected on SET spark.sql.defaultPath") { + withPathEnabled { + val e = intercept[SparkIllegalArgumentException] { + sql("SET spark.sql.defaultPath = this is not a path") + } + assert(e.getCondition.startsWith("INVALID_CONF_VALUE"), e.getMessage) + } + } + + test("DEFAULT_PATH conf: PATH keyword is rejected on SET spark.sql.defaultPath") { + withPathEnabled { + val e = intercept[SparkIllegalArgumentException] { + sql("SET spark.sql.defaultPath = PATH, system.builtin") + } + assert(e.getCondition.startsWith("INVALID_CONF_VALUE"), e.getMessage) + } + } + + test("DEFAULT_PATH conf: PATH disabled returns no fallback") { + withSQLConf( + SQLConf.PATH_ENABLED.key -> "false", + SQLConf.DEFAULT_PATH.key -> "system.session, system.builtin") { + val catalogManager = spark.sessionState.catalogManager + assert(catalogManager.sessionPathEntries.isEmpty, + "DEFAULT_PATH conf must not take effect when PATH is disabled") + } + } + + // --- Path-driven security check (built on the lazy DEFAULT_PATH fallback) --- + // The "block temp function shadowing builtin" check is now driven by the live PATH, so + // changes via SET PATH or DEFAULT_PATH take effect even when the legacy order conf is + // left at its default. + + test("path-driven security check: SET PATH putting session before builtin blocks temp " + + "function with a builtin name") { + withPathEnabled { + val catalogManager = spark.sessionState.catalogManager + val priorSessionPath = catalogManager.storedSessionPathEntries + try { + // Default `sessionFunctionResolutionOrder` is "second" (builtin first), but SET PATH + // overrides that to put session first. The security check must reflect the live path. + sql("SET PATH = system.session, system.builtin") + val e = intercept[AnalysisException] { + sql("CREATE TEMPORARY FUNCTION count() RETURNS INT RETURN 1") + } + assert(e.getCondition == "ROUTINE_ALREADY_EXISTS", e.getMessage) + } finally { + sql("DROP TEMPORARY FUNCTION IF EXISTS session.count") + catalogManager.clearSessionPath() + priorSessionPath.foreach(catalogManager.setSessionPath) + } + } + } + + test("path-driven security check: DEFAULT_PATH conf putting session before builtin " + + "blocks temp function with a builtin name (no SET PATH issued)") { + withSQLConf( + SQLConf.PATH_ENABLED.key -> "true", + SQLConf.DEFAULT_PATH.key -> "system.session, system.builtin") { + val catalogManager = spark.sessionState.catalogManager + val priorSessionPath = catalogManager.storedSessionPathEntries + catalogManager.clearSessionPath() + try { + // Order conf is left at its default ("second"). The path-driven gate must read + // DEFAULT_PATH and fire the security check for unqualified temp/builtin collisions. + val e = intercept[AnalysisException] { + sql("CREATE TEMPORARY FUNCTION count() RETURNS INT RETURN 1") + } + assert(e.getCondition == "ROUTINE_ALREADY_EXISTS", e.getMessage) + } finally { + sql("DROP TEMPORARY FUNCTION IF EXISTS session.count") + catalogManager.clearSessionPath() + priorSessionPath.foreach(catalogManager.setSessionPath) + } + } + } + + test("PATH enabled: SET PATH with only user schemas does not implicitly resolve builtins") { + withPathEnabled { + sql("CREATE SCHEMA IF NOT EXISTS only_user_on_path") + try { + sql("SET PATH = spark_catalog.only_user_on_path") + val e = intercept[AnalysisException] { + sql("SELECT abs(-1)").collect() + } + assert(e.getCondition == "UNRESOLVED_ROUTINE", e.getMessage) + } finally { + sql("DROP SCHEMA IF EXISTS only_user_on_path") + } + } + } + + test("PATH enabled: explicit SET PATH with system.session AFTER a user catalog still " + + "reaches temp functions") { + // Explicit paths are honored as written: placing `system.session` after a user catalog + // is the user's authorization for unqualified temp functions to resolve. Contrast with + // the implicit (no SET PATH, no DEFAULT_PATH) form, which preserves the security property + // of the seeded default path. + withPathEnabled { + sql("CREATE SCHEMA IF NOT EXISTS path_interleaved_user") + try { + sql("CREATE TEMPORARY FUNCTION path_interleaved_temp() RETURNS INT RETURN 7") + try { + sql("SET PATH = system.builtin, spark_catalog.path_interleaved_user, system.session") + checkAnswer(sql("SELECT path_interleaved_temp()"), Row(7)) + } finally { + sql("DROP TEMPORARY FUNCTION IF EXISTS path_interleaved_temp") + } + } finally { + sql("DROP SCHEMA IF EXISTS path_interleaved_user") + } + } + } + + test("PATH enabled: SET PATH with user schema before system.builtin still resolves builtins") { + // Exercises systemFunctionKindsFromPath with a user-catalog entry preceding + // system.builtin: the helper flat-scans the path, so Builtin still appears + // in the kinds list and unqualified `abs` resolves. + withPathEnabled { + sql("CREATE SCHEMA IF NOT EXISTS path_user_before_builtin") + try { + sql("SET PATH = spark_catalog.path_user_before_builtin, system.builtin") + // `abs` is a builtin; if Builtin did not appear in the kinds list, + // unqualified `abs(-1)` would fail with UNRESOLVED_ROUTINE. + checkAnswer(sql("SELECT abs(-1)"), Row(1)) + } finally { + sql("DROP SCHEMA IF EXISTS path_user_before_builtin") + } + } + } + + test("path-driven COUNT(*) rewrite gate: temp count shadowing builtin under SET PATH " + + "(session-first) suppresses the * -> 1 rewrite") { + // `Analyzer.matchesFunctionName` consults + // `FunctionResolution.isSessionBeforeBuiltinInPath` to decide whether COUNT(*) is the + // builtin (eligible for the COUNT(*) -> COUNT(1) shortcut) or a user-defined override. + // Default `sessionFunctionResolutionOrder` is "second", so creating a temp count while + // the default PATH is in effect passes the security check. Once SET PATH puts + // `system.session` before `system.builtin`, the rewrite must be suppressed and the + // star expansion must reach the temp `count`. + withPathEnabled { + sql("CREATE TEMPORARY FUNCTION count(x INT) RETURNS INT RETURN x + 100") + try { + // PATH still has builtin first: count(*) rewrites to count(1), which resolves to + // the builtin count and returns the row count of the input (1). + checkAnswer(sql("SELECT count(*) FROM VALUES (1) AS t(a)"), Row(1)) + + // Put session before builtin via SET PATH. The rewrite gate now reports + // `isSessionBeforeBuiltinInPath = true` AND a temp count exists, so the + // analyzer must NOT collapse `count(*)` to `count(1)`. The `*` then expands + // against the table's single column to `count(a)`, which resolves through + // the temp under the live path: 1 + 100 = 101. + sql("SET PATH = system.session, system.builtin") + checkAnswer(sql("SELECT count(*) FROM VALUES (1) AS t(a)"), Row(101)) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TEMPORARY FUNCTION IF EXISTS count") + } + } + } + + test("path-driven COUNT(*) rewrite gate: rewrite still applies for unrelated builtins") { + // The gate fires ONLY when a temp function with the same unqualified + // name as the builtin exists. A temp with a different name must not affect the + // COUNT(*) -> COUNT(1) shortcut even when session is searched before builtin. + withPathEnabled { + sql("CREATE TEMPORARY FUNCTION my_helper(x INT) RETURNS INT RETURN x + 1") + try { + sql("SET PATH = system.session, system.builtin") + // No temp `count` exists; the rewrite still fires and the builtin row counter + // returns the row count of the input (3). + checkAnswer(sql("SELECT count(*) FROM VALUES (1), (2), (3) AS t(a)"), Row(3)) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TEMPORARY FUNCTION IF EXISTS my_helper") + } + } + } + + test("path-driven COUNT(*) rewrite gate: single-pass resolver suppresses the rewrite " + + "under SET PATH (session-first)") { + // The single-pass resolver mirrors the fixed-point gate via + // `FunctionResolverUtils.isUnqualifiedCountShadowedByTemp`, which is wired into + // `isNonDistinctCount` and consulted by `handleStarInArguments`. + // + // Setup (`CREATE TEMPORARY FUNCTION`, `SET PATH`) and execution (Dataset collect via + // checkAnswer, which inserts a `DeserializeToObject` node the single-pass analyzer + // does not yet support) are run under the fixed-point analyzer; only the actual + // count(*) analysis is run under the single-pass analyzer, and we assert against the + // analyzed plan's output schema. The builtin count returns BIGINT (rewrite applied); + // the temp count(INT) returns INT (rewrite suppressed and the star expansion routes + // through the temp), so the schema's first-field dataType tells us which branch fired. + withPathEnabled { + sql("CREATE TEMPORARY FUNCTION count(x INT) RETURNS INT RETURN x + 100") + try { + val countStarSql = "SELECT count(*) FROM VALUES (1) AS t(a)" + + // PATH builtin-first: the single-pass gate reports + // `isUnqualifiedCountShadowedByTemp = false`, the shortcut fires, and the analyzed + // output is the BIGINT builtin count. + withSQLConf(SQLConf.ANALYZER_SINGLE_PASS_RESOLVER_ENABLED.key -> "true") { + val tpe = spark.sql(countStarSql).queryExecution.analyzed.schema.head.dataType + assert(tpe == LongType, + s"Expected BIGINT (builtin count rewrite); got: $tpe") + } + + sql("SET PATH = system.session, system.builtin") + + // PATH session-first: the gate reports true, the rewrite is suppressed, the star + // expands against `a`, and the temp count(INT) wins; analyzed output is INT. + withSQLConf(SQLConf.ANALYZER_SINGLE_PASS_RESOLVER_ENABLED.key -> "true") { + val tpe = spark.sql(countStarSql).queryExecution.analyzed.schema.head.dataType + assert(tpe == IntegerType, + s"Expected INT (temp count; rewrite suppressed); got: $tpe") + } + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TEMPORARY FUNCTION IF EXISTS count") + } + } + } + + test("SPARK-56939: concurrent USE SCHEMA / USE CATALOG and unqualified function lookups " + + "do not deadlock") { + // Regression for SPARK-56939. Prior to the fix, [[CatalogManager.setCurrentNamespace]] + // (driven by `USE SCHEMA`) and [[CatalogManager.setCurrentCatalog]] (driven by + // `USE CATALOG`) both held the manager's intrinsic lock while calling into + // [[SessionCatalog.setCurrentDatabase*]] (which takes the catalog's intrinsic lock), + // while concurrent unqualified function resolution acquired the catalog's intrinsic lock + // and then reached back into the manager via + // [[CatalogManager.sqlResolutionPathEntries]]. That lock-order inversion deadlocked the + // session whenever a `USE`-style command raced with any unqualified function reference. + // + // The hazard is independent of [[SQLConf.PATH_ENABLED]] and the resolution-order setting, + // so this test exercises the default configuration. Both `setCurrentNamespace` and + // `setCurrentCatalog` were rewritten with the same split-lock pattern, so the test + // exercises both arms symmetrically: one thread toggles `USE SCHEMA`, another toggles + // `USE CATALOG` between the session catalog and a registered v2 catalog. + val v2Catalog = "spark_56939_testcat" + spark.conf.set(s"spark.sql.catalog.$v2Catalog", classOf[InMemoryCatalog].getName) + sql("CREATE SCHEMA IF NOT EXISTS spark_56939_s1") + sql("CREATE SCHEMA IF NOT EXISTS spark_56939_s2") + try { + val budget = 200 + val iterations = new java.util.concurrent.atomic.AtomicInteger(0) + val barrier = new java.util.concurrent.CyclicBarrier(3) + val errors = new java.util.concurrent.ConcurrentLinkedQueue[Throwable]() + + val useSchemaThread = new Thread(() => { + try { + barrier.await() + var i = 0 + while (i < budget && errors.isEmpty) { + try { + sql(if ((i % 2) == 0) "USE SCHEMA spark_56939_s1" else "USE SCHEMA spark_56939_s2") + } catch { + // A concurrent `USE` from `useCatalogThread` may switch the current catalog + // to the v2 testcat, where these schemas don't exist; the resulting + // SCHEMA_NOT_FOUND is an expected interleaving and is unrelated to the + // deadlock this test guards against. + case _: NoSuchNamespaceException => () + } + i += 1 + } + } catch { + case t: Throwable => errors.add(t) + } + }, "SPARK-56939-use-schema") + + val useCatalogThread = new Thread(() => { + try { + barrier.await() + var i = 0 + while (i < budget && errors.isEmpty) { + // Toggle between the session catalog and a v2 catalog so each iteration + // exercises `setCurrentCatalog` -- the arm that previously held the manager + // lock across `v1SessionCatalog.setCurrentDatabase(default)`. The grammar + // accepts `USE identifierReference`; a single identifier resolves to a + // catalog when one is registered under that name. + sql(if ((i % 2) == 0) s"USE $v2Catalog" else "USE spark_catalog") + i += 1 + } + } catch { + case t: Throwable => errors.add(t) + } + }, "SPARK-56939-use-catalog") + + val lookupThread = new Thread(() => { + try { + barrier.await() + var i = 0 + while (i < budget && errors.isEmpty) { + // Unqualified `count(*)` exercises the kinds-order provider that resolves + // against the live PATH via [[CatalogManager]] -- the side of the cycle + // that previously acquired the catalog lock first and then the manager lock. + val n = sql("SELECT count(*) FROM VALUES (1), (2), (3) AS t(a)") + .head().getLong(0) + assert(n == 3L, s"unexpected count: $n at iteration $i") + iterations.incrementAndGet() + i += 1 + } + } catch { + case t: Throwable => errors.add(t) + } + }, "SPARK-56939-lookup") + + useSchemaThread.start() + useCatalogThread.start() + lookupThread.start() + + // Generous join: 30s is plenty for 200 cheap queries per thread and gives a + // clear failure signal if the implementation regresses into a deadlock. + val joinMillis = 30000L + useSchemaThread.join(joinMillis) + useCatalogThread.join(joinMillis) + lookupThread.join(joinMillis) + + assert(!useSchemaThread.isAlive, + "USE SCHEMA thread did not finish; lock-order inversion between SessionCatalog and " + + "CatalogManager likely regressed (SPARK-56939).") + assert(!useCatalogThread.isAlive, + "USE CATALOG thread did not finish; lock-order inversion between SessionCatalog and " + + "CatalogManager likely regressed (SPARK-56939).") + assert(!lookupThread.isAlive, + "Lookup thread did not finish; lock-order inversion between SessionCatalog and " + + "CatalogManager likely regressed (SPARK-56939).") + assert(errors.isEmpty, + s"Concurrent lookups raised unexpected errors: ${errors.toArray.mkString("; ")}") + assert(iterations.get() > 0, + "Lookup thread never completed a query; suspect contention or deadlock.") + } finally { + sql("USE spark_catalog") + sql("USE SCHEMA default") + sql("DROP SCHEMA IF EXISTS spark_56939_s1 CASCADE") + sql("DROP SCHEMA IF EXISTS spark_56939_s2 CASCADE") + spark.conf.unset(s"spark.sql.catalog.$v2Catalog") + } + } + + test("PATH enabled: concurrent SET PATH and unqualified lookups do not deadlock") { + // SessionCatalog.lookupBuiltinOrTempFunction is intentionally NOT + // synchronized on SessionCatalog because the path-driven kinds provider acquires + // CatalogManager.synchronized, and another thread holding that lock can call back + // into SessionCatalog (e.g. via setCurrentNamespace). This test hammers both sides + // concurrently: one thread flips SET PATH while another performs unqualified + // function lookups that go through the kinds provider. Within the budget we should + // observe no deadlock and no spurious analysis failures. + withPathEnabled { + val budget = 200 + val iterations = new java.util.concurrent.atomic.AtomicInteger(0) + val barrier = new java.util.concurrent.CyclicBarrier(2) + val errors = new java.util.concurrent.ConcurrentLinkedQueue[Throwable]() + + val setterThread = new Thread(() => { + try { + barrier.await() + var i = 0 + while (i < budget && errors.isEmpty) { + if ((i % 2) == 0) { + sql("SET PATH = spark_catalog.default, system.builtin") + } else { + sql("SET PATH = system.builtin, system.session, spark_catalog.default") + } + i += 1 + } + } catch { + case t: Throwable => errors.add(t) + } + }, "SetPathSuite-setter") + + val lookupThread = new Thread(() => { + try { + barrier.await() + var i = 0 + while (i < budget && errors.isEmpty) { + // Forces unqualified function resolution against the live PATH and triggers + // the session-kinds provider on the catalog-manager side. + val n = sql("SELECT count(*) FROM VALUES (1), (2), (3) AS t(a)") + .head().getLong(0) + assert(n == 3L, s"unexpected count: $n at iteration $i") + iterations.incrementAndGet() + i += 1 + } + } catch { + case t: Throwable => errors.add(t) + } + }, "SetPathSuite-lookup") + + setterThread.start() + lookupThread.start() + + // Generous join: 30s is plenty for 200 cheap queries on either side and gives a + // clear failure signal if the implementation regresses into a deadlock. + val joinMillis = 30000L + setterThread.join(joinMillis) + lookupThread.join(joinMillis) + + assert(!setterThread.isAlive, + "SET PATH thread did not finish; potential deadlock between SessionCatalog and " + + "CatalogManager synchronized blocks.") + assert(!lookupThread.isAlive, + "Lookup thread did not finish; potential deadlock between SessionCatalog and " + + "CatalogManager synchronized blocks.") + assert(errors.isEmpty, + s"Concurrent lookups raised unexpected errors: ${errors.toArray.mkString("; ")}") + assert(iterations.get() > 0, + "Lookup thread never completed a query; suspect contention or deadlock.") + sql("SET PATH = DEFAULT_PATH") + } + } + + test("DEFAULT_PATH conf: duplicate entries are tolerated (first-match resolution)") { + // Lookup uses first-match resolution, so redundant entries on DEFAULT_PATH are dead code + // rather than an error. (Contrast with SET PATH, which still rejects static duplicates as + // a user-input typo guard.) This avoids a UX cliff where a USE SCHEMA could later wedge + // every unqualified function lookup with DUPLICATE_SQL_PATH_ENTRY. + withSQLConf( + SQLConf.PATH_ENABLED.key -> "true", + SQLConf.DEFAULT_PATH.key -> "system.builtin, system.builtin") { + val catalogManager = spark.sessionState.catalogManager + val priorSessionPath = catalogManager.storedSessionPathEntries + catalogManager.clearSessionPath() + try { + val entries = pathEntries(currentPath()) + assert(entries == Seq("system.builtin", "system.builtin"), + s"DEFAULT_PATH duplicates should pass through to current_path(); got: $entries") + // Sanity: unqualified resolution still works (the second `system.builtin` is dead). + checkAnswer(sql("SELECT abs(-1)"), Row(1)) + } finally { + catalogManager.clearSessionPath() + priorSessionPath.foreach(catalogManager.setSessionPath) + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala new file mode 100644 index 0000000000000..9e365c720266a --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import java.util.Collections + +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryCatalog, SupportsNamespaces} +import org.apache.spark.sql.connector.catalog.functions.{ScalarFunction, UnboundFunction} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{DataType, IntegerType, StringType} + +/** + * End-to-end coverage of [[SQLConf.PATH_ENABLED]] resolution through non-session V2 catalogs. + * + * Other path tests live in `SetPathSuite` (session catalog) and `ProcedureSuite` + * (procedures via CALL). This suite specifically exercises: + * - unqualified table resolution across two V2 catalogs in SET PATH, + * - first-match ordering when both catalogs hold the same name, + * - unqualified V2 function resolution across two V2 catalogs in SET PATH, + * - the negative case where the unqualified name only lives in a catalog + * that is NOT on the path. + */ +class SqlPathV2CatalogSuite extends SharedSparkSession { + + private val emptyProps: java.util.Map[String, String] = Collections.emptyMap() + + override def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set("spark.sql.catalog.pathcat", classOf[InMemoryCatalog].getName) + spark.conf.set("spark.sql.catalog.pathcat2", classOf[InMemoryCatalog].getName) + } + + override def afterAll(): Unit = { + try { + spark.sessionState.catalogManager.reset() + spark.sessionState.conf.unsetConf("spark.sql.catalog.pathcat") + spark.sessionState.conf.unsetConf("spark.sql.catalog.pathcat2") + } finally { + super.afterAll() + } + } + + private def v2Catalog(name: String): InMemoryCatalog = + spark.sessionState.catalogManager.catalog(name).asInstanceOf[InMemoryCatalog] + + private def createV2Namespace(catalog: String, ns: String): Unit = { + v2Catalog(catalog).asInstanceOf[SupportsNamespaces] + .createNamespace(Array(ns), emptyProps) + } + + private def addV2Function( + catalog: String, + ns: String, + name: String, + fn: UnboundFunction): Unit = { + v2Catalog(catalog).createFunction(Identifier.of(Array(ns), name), fn) + } + + test("V2 catalogs on SET PATH: unqualified table follows first match") { + withSQLConf(SQLConf.PATH_ENABLED.key -> "true") { + // pathcat and pathcat2 each have a namespace `ns` and a table `path_v2_t` with + // different contents, so we can tell which catalog supplied the row. + createV2Namespace("pathcat", "ns") + createV2Namespace("pathcat2", "ns") + sql("CREATE TABLE pathcat.ns.path_v2_t (id INT) USING foo") + sql("INSERT INTO pathcat.ns.path_v2_t VALUES (10)") + sql("CREATE TABLE pathcat2.ns.path_v2_t (id INT) USING foo") + sql("INSERT INTO pathcat2.ns.path_v2_t VALUES (20)") + + try { + sql("SET PATH = pathcat.ns, pathcat2.ns, system.builtin") + checkAnswer(sql("SELECT id FROM path_v2_t"), Row(10)) + + sql("SET PATH = pathcat2.ns, pathcat.ns, system.builtin") + checkAnswer(sql("SELECT id FROM path_v2_t"), Row(20)) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TABLE IF EXISTS pathcat.ns.path_v2_t") + sql("DROP TABLE IF EXISTS pathcat2.ns.path_v2_t") + } + } + } + + test("V2 catalogs on SET PATH: unqualified table only in a non-path catalog is not found") { + withSQLConf(SQLConf.PATH_ENABLED.key -> "true") { + createV2Namespace("pathcat", "ns_only_here") + sql("CREATE TABLE pathcat.ns_only_here.hidden_t (id INT) USING foo") + try { + // Path does not include pathcat.ns_only_here; bare `hidden_t` must not resolve. + sql("SET PATH = pathcat2.ns, system.builtin") + val e = intercept[AnalysisException] { + sql("SELECT id FROM hidden_t").collect() + } + assert(e.getCondition == "TABLE_OR_VIEW_NOT_FOUND" || + e.getMessage.contains("TABLE_OR_VIEW_NOT_FOUND"), + s"Expected TABLE_OR_VIEW_NOT_FOUND; got: ${e.getCondition}: ${e.getMessage}") + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TABLE IF EXISTS pathcat.ns_only_here.hidden_t") + } + } + } + + test("V2 catalogs on SET PATH: unqualified function follows first match") { + withSQLConf(SQLConf.PATH_ENABLED.key -> "true") { + // Two V2 catalogs each register a `strlen` function under the same name but with + // distinguishable return values: pathcat returns the true length, pathcat2 returns + // the length times 100. The result distinguishes which catalog supplied the + // function for the same argument, so swapping the path order must change the row. + createV2Namespace("pathcat", "fns") + createV2Namespace("pathcat2", "fns") + addV2Function("pathcat", "fns", "strlen", StrLen(StrLenDefault)) + addV2Function("pathcat2", "fns", "strlen", StrLen(StrLenTimes100)) + try { + sql("SET PATH = pathcat.fns, pathcat2.fns, system.builtin") + checkAnswer(sql("SELECT strlen('abc')"), Row(3)) + + sql("SET PATH = pathcat2.fns, pathcat.fns, system.builtin") + checkAnswer(sql("SELECT strlen('abc')"), Row(300)) + } finally { + sql("SET PATH = DEFAULT_PATH") + v2Catalog("pathcat").clearFunctions() + v2Catalog("pathcat2").clearFunctions() + } + } + } +} + +/** + * A small distinguishable companion to `StrLenDefault` (in `DataSourceV2FunctionSuite.scala`): + * returns `s.length * 100` so V2-function resolution tests across catalogs can verify which + * catalog supplied the function from the result row alone. + */ +case object StrLenTimes100 extends ScalarFunction[Int] { + override def inputTypes(): Array[DataType] = Array(StringType) + override def resultType(): DataType = IntegerType + override def name(): String = "strlen_times_100" + override def produceResult(input: InternalRow): Int = input.getString(0).length * 100 +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index f6ace55849d26..3fb54d7c43d58 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -1453,6 +1453,106 @@ abstract class SQLViewSuite extends QueryTest { } } + test("SPARK-56853: stored view path is ignored when PATH is disabled at read time") { + // A view created with PATH enabled persists two things in metadata: the frozen + // resolution path AND the creator session's current catalog+namespace at CREATE + // VIEW time (the view's `viewCatalogAndNamespace` property). If the reader's + // session has `spark.sql.path.enabled=false`, the pinned entries are intentionally + // dropped (`CatalogManager.resolutionPathEntriesForAnalysis`); the view body's + // unqualified references fall back to that captured catalog+namespace, which is + // the creator's USE state at CREATE time -- NOT the schema the view physically + // lives in (the two coincide below only because the test runs + // `USE spark_catalog.compat_view_b` before CREATE VIEW). Verify both directions: + // - fully-qualified bodies keep working (qualification doesn't depend on PATH), + // - unqualified bodies that relied on the frozen path now resolve via the + // captured viewCatalogAndNamespace. + withDatabase("compat_view_a", "compat_view_b") { + sql("CREATE DATABASE compat_view_a") + sql("CREATE DATABASE compat_view_b") + withTable( + "compat_view_a.compat_t", + "compat_view_b.compat_t") { + sql("CREATE TABLE compat_view_a.compat_t USING parquet AS SELECT 1 AS id") + sql("CREATE TABLE compat_view_b.compat_t USING parquet AS SELECT 2 AS id") + withView( + "compat_view_b.v_unq_path", + "compat_view_b.v_fq_path") { + // Create both views with USE compat_view_b in effect so the stored + // viewCatalogAndNamespace points at compat_view_b, then SET PATH=a so the + // frozen path pins compat_view_a. + withSQLConf(PATH_ENABLED.key -> "true") { + try { + sql("USE spark_catalog.compat_view_b") + sql("SET PATH = spark_catalog.compat_view_a, system.builtin") + sql( + """ + |CREATE VIEW compat_view_b.v_unq_path AS + |SELECT id FROM compat_t + |""".stripMargin) + sql( + """ + |CREATE VIEW compat_view_b.v_fq_path AS + |SELECT id FROM spark_catalog.compat_view_a.compat_t + |""".stripMargin) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("USE spark_catalog.default") + } + } + + // Now read with PATH disabled. The fully-qualified view body is independent of + // PATH and must keep returning rows from compat_view_a. The unqualified-body view + // drops its frozen-path pin and falls back to viewCatalogAndNamespace + // (compat_view_b), so unqualified `compat_t` resolves to compat_view_b.compat_t. + withSQLConf(PATH_ENABLED.key -> "false") { + checkAnswer(sql("SELECT id FROM compat_view_b.v_fq_path"), Row(1)) + checkAnswer(sql("SELECT id FROM compat_view_b.v_unq_path"), Row(2)) + } + } + } + } + } + + test("SPARK-56853: stored view path with no fallback target fails clearly when PATH is off") { + // Same shape as the previous test, but the captured `viewCatalogAndNamespace` + // (the creator's USE state at CREATE VIEW time -- set here via + // `USE spark_catalog.compat_home_only`) does NOT contain the unqualified name. + // Under PATH disabled the analyzer cannot fall back anywhere, so the lookup + // must raise TABLE_OR_VIEW_NOT_FOUND against that captured catalog+namespace. + withDatabase("compat_home_only", "compat_referenced") { + sql("CREATE DATABASE compat_home_only") + sql("CREATE DATABASE compat_referenced") + withTable("compat_referenced.only_here") { + sql("CREATE TABLE compat_referenced.only_here USING parquet AS SELECT 7 AS id") + withView("compat_home_only.v_unq_home") { + withSQLConf(PATH_ENABLED.key -> "true") { + try { + sql("USE spark_catalog.compat_home_only") + sql("SET PATH = spark_catalog.compat_referenced, system.builtin") + sql( + """ + |CREATE VIEW compat_home_only.v_unq_home AS + |SELECT id FROM only_here + |""".stripMargin) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("USE spark_catalog.default") + } + } + + withSQLConf(PATH_ENABLED.key -> "false") { + val e = intercept[AnalysisException] { + sql("SELECT id FROM compat_home_only.v_unq_home").collect() + } + assert(e.getCondition == "TABLE_OR_VIEW_NOT_FOUND" || + e.getMessage.contains("TABLE_OR_VIEW_NOT_FOUND"), + s"Expected TABLE_OR_VIEW_NOT_FOUND; got: ${e.getCondition}: ${e.getMessage}") + } + } + } + } + } + // Regression guard: frozen resolution path must not leak into CURRENT_SCHEMA/CURRENT_PATH. test("SPARK-56639: current_schema/current_path in persisted view use invoker context") { withSQLConf(PATH_ENABLED.key -> "true") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala index 1a6dc178b6e51..c88ebb0d69ee7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala @@ -195,6 +195,9 @@ abstract class AlignAssignmentsSuiteBase extends AnalysisTest { .thenReturn(defaultPath) when(manager.sqlResolutionPathEntries(any[String], any[Seq[String]])) .thenReturn(defaultPath) + when(manager.resolutionPathEntriesForAnalysis( + any[Option[Seq[Seq[String]]]], any[Seq[String]])) + .thenReturn(defaultPath) manager } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 907aa895a562b..cd917a817f7f0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -243,6 +243,9 @@ class PlanResolutionSuite extends SharedSparkSession with AnalysisTest { .thenReturn(defaultPath) when(manager.sqlResolutionPathEntries(any[String], any[Seq[String]])) .thenReturn(defaultPath) + when(manager.resolutionPathEntriesForAnalysis( + any[Option[Seq[Seq[String]]]], any[Seq[String]])) + .thenReturn(defaultPath) manager } @@ -269,6 +272,9 @@ class PlanResolutionSuite extends SharedSparkSession with AnalysisTest { .thenReturn(defaultPath2) when(manager.sqlResolutionPathEntries(any[String], any[Seq[String]])) .thenReturn(defaultPath2) + when(manager.resolutionPathEntriesForAnalysis( + any[Option[Seq[Seq[String]]]], any[Seq[String]])) + .thenReturn(defaultPath2) manager } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala index 39e6e708403aa..6b11748565291 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala @@ -17,7 +17,53 @@ package org.apache.spark.sql.execution.command.v1 +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.command +import org.apache.spark.sql.internal.SQLConf class AlterViewSchemaBindingSuite - extends command.AlterViewSchemaBindingSuiteBase with ViewCommandSuiteBase + extends command.AlterViewSchemaBindingSuiteBase with ViewCommandSuiteBase { + + test("ALTER VIEW ... WITH SCHEMA preserves the frozen SQL path") { + // `generateViewProperties(captureNewPath = false)` is the documented behavior for + // ALTER VIEW WITH SCHEMA: the view's body resolution path must stay pinned to the + // create-time PATH, not the caller's current PATH. This test creates the view under + // PATH=a, then runs ALTER VIEW WITH SCHEMA EVOLUTION under PATH=b, and asserts that + // the persisted VIEW_RESOLUTION_PATH still reflects PATH=a. + withSQLConf(SQLConf.PATH_ENABLED.key -> "true") { + val viewName = "v_path_preserved_on_alter" + val view = s"$catalog.$namespace.$viewName" + sql(s"CREATE SCHEMA IF NOT EXISTS $catalog.alter_view_path_a") + try { + sql(s"SET PATH = $catalog.alter_view_path_a, system.builtin") + sql(s"CREATE VIEW $view AS SELECT 1 AS x") + val pathAfterCreate = spark.sessionState.catalog + .getTableMetadata(TableIdentifier(viewName, Some(namespace))) + .viewStoredResolutionPath + .getOrElse(fail("Expected the view to persist a frozen SQL path")) + val parsedCreate = CatalogManager.deserializePathEntries(pathAfterCreate) + .getOrElse(fail(s"Expected a valid serialized path, got: $pathAfterCreate")) + assert(parsedCreate.contains(Seq(catalog, "alter_view_path_a")), + s"Frozen path should include alter_view_path_a; got: $parsedCreate") + + // Switch the live PATH to something else and run ALTER VIEW WITH SCHEMA. + // The captureNewPath = false code path must NOT overwrite the frozen path. + sql(s"SET PATH = $catalog.default, system.builtin") + sql(s"ALTER VIEW $view WITH SCHEMA EVOLUTION") + + val pathAfterAlter = spark.sessionState.catalog + .getTableMetadata(TableIdentifier(viewName, Some(namespace))) + .viewStoredResolutionPath + .getOrElse(fail("Frozen SQL path was dropped by ALTER VIEW WITH SCHEMA")) + assert(pathAfterAlter == pathAfterCreate, + s"ALTER VIEW WITH SCHEMA must preserve the frozen path. " + + s"Before: $pathAfterCreate; after: $pathAfterAlter") + } finally { + sql("SET PATH = DEFAULT_PATH") + sql(s"DROP VIEW IF EXISTS $view") + sql(s"DROP SCHEMA IF EXISTS $catalog.alter_view_path_a") + } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala index 9a691d4430ef9..9e9991774992c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala @@ -3691,7 +3691,9 @@ class SqlScriptingExecutionSuite extends SharedSparkSession { sqlState = "42883", parameters = Map( "variableName" -> toSQLId("LOCALVAR"), - "searchPath" -> toSQLId("SYSTEM.SESSION")) + "searchPath" -> + "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"), + context = ExpectedContext("LOCALVAR", 54, 61) ) }