Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-51310][SQL] Resolve the type of default string producing expressions #50053

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, Literal}
import org.apache.spark.sql.catalyst.expressions.{Cast, DefaultStringProducingExpression, Expression, Literal}
import org.apache.spark.sql.catalyst.plans.logical.{AddColumns, AlterColumns, AlterColumnSpec, AlterTableCommand, AlterViewAs, ColumnDefinition, CreateTable, CreateView, LogicalPlan, QualifiedColType, ReplaceColumns, V2CreateTablePlan}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.connector.catalog.TableCatalog
Expand Down Expand Up @@ -100,11 +100,13 @@ object ResolveDDLCommandStringTypes extends Rule[LogicalPlan] {
* new type instead of the default string type.
*/
private def transformPlan(plan: LogicalPlan, newType: StringType): LogicalPlan = {
plan resolveExpressionsUp { expression =>
val transformedPlan = plan resolveExpressionsUp { expression =>
transformExpression
.andThen(_.apply(newType))
.applyOrElse(expression, identity[Expression])
}

castDefaultStringExpressions(transformedPlan, newType)
}

/**
Expand All @@ -121,6 +123,30 @@ object ResolveDDLCommandStringTypes extends Rule[LogicalPlan] {
newType => Literal(value, replaceDefaultStringType(dt, newType))
}

/**
* Casts [[DefaultStringProducingExpression]] in the plan to the `newType`.
*/
private def castDefaultStringExpressions(plan: LogicalPlan, newType: StringType): LogicalPlan = {
if (newType == StringType) return plan

def inner(ex: Expression): Expression = ex match {
// Skip if we already added a cast in the previous pass.
case cast @ Cast(e: DefaultStringProducingExpression, dt, _, _) if newType == dt =>
cast.copy(child = e.withNewChildren(e.children.map(inner)))

// Add cast on top of [[DefaultStringProducingExpression]].
case e: DefaultStringProducingExpression =>
Cast(e.withNewChildren(e.children.map(inner)), newType)

case other =>
other.withNewChildren(other.children.map(inner))
}

plan resolveOperators { operator =>
operator.mapExpressions(inner)
}
}

private def hasDefaultStringType(dataType: DataType): Boolean =
dataType.existsRecursively(isDefaultStringType)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ import org.apache.spark.sql.types.StringType

abstract class DefaultCollationTestSuite extends QueryTest with SharedSparkSession {

val defaultStringProducingExpressions: Seq[String] = Seq(
"current_timezone()", "current_database()", "md5('Spark' collate unicode)",
"soundex('Spark' collate unicode)", "url_encode('https://spark.apache.org' collate unicode)",
"url_decode('https%3A%2F%2Fspark.apache.org')", "uuid()", "chr(65)", "collation('UNICODE')",
"version()", "space(5)", "randstr(5, 123)"
)

def dataSource: String = "parquet"
def testTable: String = "test_tbl"
def testView: String = "test_view"
Expand Down Expand Up @@ -329,6 +336,58 @@ class DefaultCollationTestSuiteV1 extends DefaultCollationTestSuite {
}
}
}

test("view has utf8 binary collation by default") {
withView(testTable) {
sql(s"CREATE VIEW $testTable AS SELECT current_database() AS db")
assertTableColumnCollation(testTable, "db", "UTF8_BINARY")
}
}

test("default string producing expressions in view definition") {
val viewDefaultCollation = Seq(
"UTF8_BINARY", "UNICODE"
)

viewDefaultCollation.foreach { collation =>
withView(testTable) {

val columns = defaultStringProducingExpressions.zipWithIndex.map {
case (expr, index) => s"$expr AS c${index + 1}"
}.mkString(", ")

sql(
s"""
|CREATE view $testTable
|DEFAULT COLLATION $collation
|AS SELECT $columns
|""".stripMargin)

(1 to defaultStringProducingExpressions.length).foreach { index =>
assertTableColumnCollation(testTable, s"c$index", collation)
}
}
}
}

test("default string producing expressions in view definition - nested in expr tree") {
withView(testTable) {
sql(
s"""
|CREATE view $testTable
|DEFAULT COLLATION UNICODE AS SELECT
|SUBSTRING(current_database(), 1, 1) AS c1,
|SUBSTRING(SUBSTRING(current_database(), 1, 2), 1, 1) AS c2,
|SUBSTRING(current_database()::STRING, 1, 1) AS c3,
|SUBSTRING(CAST(current_database() AS STRING COLLATE UTF8_BINARY), 1, 1) AS c4
|""".stripMargin)

assertTableColumnCollation(testTable, "c1", "UNICODE")
assertTableColumnCollation(testTable, "c2", "UNICODE")
assertTableColumnCollation(testTable, "c3", "UNICODE")
assertTableColumnCollation(testTable, "c4", "UTF8_BINARY")
}
}
}

class DefaultCollationTestSuiteV2 extends DefaultCollationTestSuite with DatasourceV2SQLBase {
Expand Down