diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index c4abe8ad4e..48717ef1d1 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -274,7 +274,10 @@ object CometRight extends CometExpressionSerde[Right] { } } -object CometConcat extends CometScalarFunction[Concat]("concat") with CometTypeShim { +object CometConcat + extends CometScalarFunction[Concat]("concat") + with CometTypeShim + with CodegenDispatchFallback { private val unsupportedReason = "CONCAT supports only string input parameters" // Spark 4.0 widens Concat to accept collated strings and preserves the collation in the merged diff --git a/spark/src/test/resources/sql-tests/expressions/string/collation.sql b/spark/src/test/resources/sql-tests/expressions/string/collation.sql index 95abecbd1e..89dd95b9e7 100644 --- a/spark/src/test/resources/sql-tests/expressions/string/collation.sql +++ b/spark/src/test/resources/sql-tests/expressions/string/collation.sql @@ -32,9 +32,11 @@ SELECT collation('hello' COLLATE UTF8_BINARY) query SELECT collation(CAST(NULL AS STRING)) --- concat preserves a non-default collation in its result type, but Comet's native concat produces --- UTF8_BINARY, so it is Incompatible and falls back to Spark by default. -query expect_fallback(concat does not support non-UTF8_BINARY collations) +-- concat preserves a non-default collation in its result type, which Comet's native concat does +-- not, so concat is Incompatible. It is enrolled in the JVM codegen dispatcher, which runs Spark's +-- own doGenCode inside the Comet pipeline, so a collated concat is evaluated natively and matches +-- Spark. +query SELECT concat('Hello' COLLATE UTF8_LCASE, 'World' COLLATE UTF8_LCASE) -- reverse is enrolled in the JVM codegen dispatcher, which runs Spark's own doGenCode inside the @@ -42,9 +44,9 @@ SELECT concat('Hello' COLLATE UTF8_LCASE, 'World' COLLATE UTF8_LCASE) query SELECT reverse('Hello' COLLATE UTF8_LCASE) --- A standard ICU collation (UNICODE_CI) still falls back for concat, confirming the gate covers --- any non-UTF8_BINARY collation rather than just UTF8_LCASE. -query expect_fallback(concat does not support non-UTF8_BINARY collations) +-- A standard ICU collation (UNICODE_CI) also dispatches and matches Spark, confirming the path +-- covers any non-UTF8_BINARY collation rather than just UTF8_LCASE. +query SELECT concat('Hello' COLLATE UNICODE_CI, 'World' COLLATE UNICODE_CI) query