From caccc6d1edcd4a7a4de68b02d6da73be639effc2 Mon Sep 17 00:00:00 2001 From: Bhargava Vadlamani Date: Mon, 27 Apr 2026 10:18:14 -0700 Subject: [PATCH 1/3] fix_incorrect_compatiblity_msg_comet_sum --- spark/src/main/scala/org/apache/comet/serde/aggregates.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala index 6889fc02d9..35b08c47a1 100644 --- a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala +++ b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala @@ -204,9 +204,7 @@ object CometAverage extends CometAggregateExpressionSerde[Average] { } object CometSum extends CometAggregateExpressionSerde[Sum] { - - override def getIncompatibleReasons(): Seq[String] = Seq("Falls back to Spark in ANSI mode.") - + override def convert( aggExpr: AggregateExpression, sum: Sum, From 54a83cb58e87103fa42c0e8354d00223638e95b7 Mon Sep 17 00:00:00 2001 From: Bhargava Vadlamani Date: Mon, 27 Apr 2026 23:33:56 -0700 Subject: [PATCH 2/3] fix_incorrect_compatiblity_msg_comet_sum --- .../org/apache/comet/serde/aggregates.scala | 2 +- .../scala/org/apache/comet/serde/strings.scala | 16 +++++++++++++++- .../scala/org/apache/comet/serde/unixtime.scala | 14 +++++++++++--- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala index 35b08c47a1..944f067fe7 100644 --- a/spark/src/main/scala/org/apache/comet/serde/aggregates.scala +++ b/spark/src/main/scala/org/apache/comet/serde/aggregates.scala @@ -204,7 +204,7 @@ object CometAverage extends CometAggregateExpressionSerde[Average] { } object CometSum extends CometAggregateExpressionSerde[Sum] { - + override def convert( aggExpr: AggregateExpression, sum: Sum, diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index 968fe8cd69..7f0530f0cc 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -33,7 +33,7 @@ import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInter object CometStringRepeat extends CometExpressionSerde[StringRepeat] { - override def getCompatibleNotes(): Seq[String] = Seq( + override def getIncompatibleReasons(): Seq[String] = Seq( "A negative argument for the number of times to repeat throws an exception" + " instead of returning an empty string as Spark does") @@ -42,6 +42,14 @@ object CometStringRepeat extends CometExpressionSerde[StringRepeat] { inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { val children = expr.children + + children(1) match { + case Literal(count, _) if isNegativeNumber(count) => + withInfo(expr, "Negative repeat count is not supported") + return None + case _ => + } + val leftCast = Cast(children(0), StringType) val rightCast = Cast(children(1), LongType) val leftExpr = exprToProtoInternal(leftCast, inputs, binding) @@ -49,6 +57,12 @@ object CometStringRepeat extends CometExpressionSerde[StringRepeat] { val optExpr = scalarFunctionExprToProto("repeat", leftExpr, rightExpr) optExprWithInfo(optExpr, expr, leftCast, rightCast) } + + private def isNegativeNumber(value: Any): Boolean = value match { + case i: Int => i < 0 + case l: Long => l < 0 + case _ => false + } } class CometCaseConversionBase[T <: Expression](function: String) diff --git a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala index e5eeb5b848..6b3a707e5d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala @@ -29,12 +29,20 @@ import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithIn // https://github.com/apache/datafusion/issues/16594 object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] { + override def getUnsupportedReasons(): Seq[String] = Seq( + "Only supports the default datetime format pattern `yyyy-MM-dd HH:mm:ss`") + override def getIncompatibleReasons(): Seq[String] = Seq( - "Only supports the default datetime format pattern `yyyy-MM-dd HH:mm:ss`." + - " DataFusion's valid timestamp range differs from Spark" + + "DataFusion's valid timestamp range differs from Spark" + " (https://github.com/apache/datafusion/issues/16594)") - override def getSupportLevel(expr: FromUnixTime): SupportLevel = Incompatible(None) + override def getSupportLevel(expr: FromUnixTime): SupportLevel = { + if (expr.format == Literal(TimestampFormatter.defaultPattern)) { + Incompatible(None) + } else { + Unsupported(None) + } + } override def convert( expr: FromUnixTime, From 2fdb835a34c453009b04a126458ab02f402baba3 Mon Sep 17 00:00:00 2001 From: Bhargava Vadlamani Date: Wed, 29 Apr 2026 00:43:01 -0700 Subject: [PATCH 3/3] fix_incorrect_compatiblity_msg_comet_sum --- .../org/apache/comet/serde/unixtime.scala | 38 +++++++++++-------- .../expressions/datetime/from_unix_time.sql | 4 +- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala index 6b3a707e5d..e5e62d8fd9 100644 --- a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala @@ -37,10 +37,17 @@ object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] { " (https://github.com/apache/datafusion/issues/16594)") override def getSupportLevel(expr: FromUnixTime): SupportLevel = { - if (expr.format == Literal(TimestampFormatter.defaultPattern)) { - Incompatible(None) - } else { - Unsupported(None) + expr.format match { + case Literal(fmt, _) => + val formatStr = fmt.toString + val defaultPattern = TimestampFormatter.defaultPattern + if (formatStr == defaultPattern) { + Incompatible(None) + } else { + Unsupported(Some(s"Datetime pattern format: $formatStr is unsupported")) + } + case _ => + Unsupported(Some("Datetime pattern format is unsupported")) } } @@ -56,17 +63,18 @@ object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] { val formatExpr = exprToProtoInternal(Literal("%Y-%m-%d %H:%M:%S"), inputs, binding) val timeZone = exprToProtoInternal(Literal(expr.timeZoneId.orNull), inputs, binding) - if (expr.format != Literal(TimestampFormatter.defaultPattern)) { - withInfo(expr, "Datetime pattern format is unsupported") - None - } else if (secExpr.isDefined && formatExpr.isDefined) { - val timestampExpr = - scalarFunctionExprToProto("from_unixtime", Seq(secExpr, timeZone): _*) - val optExpr = scalarFunctionExprToProto("to_char", Seq(timestampExpr, formatExpr): _*) - optExprWithInfo(optExpr, expr, expr.sec, expr.format) - } else { - withInfo(expr, expr.sec, expr.format) - None + expr.format match { + case Literal(fmt, _) if fmt.toString != TimestampFormatter.defaultPattern => + withInfo(expr, "Datetime pattern format is unsupported") + None + case _ if secExpr.isDefined && formatExpr.isDefined => + val timestampExpr = + scalarFunctionExprToProto("from_unixtime", Seq(secExpr, timeZone): _*) + val optExpr = scalarFunctionExprToProto("to_char", Seq(timestampExpr, formatExpr): _*) + optExprWithInfo(optExpr, expr, expr.sec, expr.format) + case _ => + withInfo(expr, expr.sec, expr.format) + None } } } diff --git a/spark/src/test/resources/sql-tests/expressions/datetime/from_unix_time.sql b/spark/src/test/resources/sql-tests/expressions/datetime/from_unix_time.sql index a7b0960570..47c76d67aa 100644 --- a/spark/src/test/resources/sql-tests/expressions/datetime/from_unix_time.sql +++ b/spark/src/test/resources/sql-tests/expressions/datetime/from_unix_time.sql @@ -24,12 +24,12 @@ INSERT INTO test_from_unix_time VALUES (0), (1718451045), (-1), (NULL), (2147483 query expect_fallback(not fully compatible with Spark) SELECT from_unixtime(t) FROM test_from_unix_time -query expect_fallback(not fully compatible with Spark) +query expect_fallback(Datetime pattern format: yyyy-MM-dd is unsupported) SELECT from_unixtime(t, 'yyyy-MM-dd') FROM test_from_unix_time -- literal arguments query expect_fallback(not fully compatible with Spark) SELECT from_unixtime(0) -query expect_fallback(not fully compatible with Spark) +query expect_fallback(Datetime pattern format: yyyy-MM-dd is unsupported) SELECT from_unixtime(1718451045, 'yyyy-MM-dd')