diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index 3842148a43..df40933454 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -94,32 +94,33 @@ of expressions that be disabled. ## Date/Time Functions -| Expression | SQL | -| -------------- | ---------------------------- | -| DateAdd | `date_add` | -| DateDiff | `datediff` | -| DateFormat | `date_format` | -| DateSub | `date_sub` | -| DatePart | `date_part(field, source)` | -| Days | `days` | -| Extract | `extract(field FROM source)` | -| FromUnixTime | `from_unixtime` | -| Hour | `hour` | -| LastDay | `last_day` | -| Minute | `minute` | -| Second | `second` | -| TruncDate | `trunc` | -| TruncTimestamp | `date_trunc` | -| UnixDate | `unix_date` | -| UnixTimestamp | `unix_timestamp` | -| Year | `year` | -| Month | `month` | -| DayOfMonth | `day`/`dayofmonth` | -| DayOfWeek | `dayofweek` | -| WeekDay | `weekday` | -| DayOfYear | `dayofyear` | -| WeekOfYear | `weekofyear` | -| Quarter | `quarter` | +| Expression | SQL | +| -------------------------- | ---------------------------- | +| DateAdd | `date_add` | +| DateDiff | `datediff` | +| DateFormat | `date_format` | +| DateSub | `date_sub` | +| DatePart | `date_part(field, source)` | +| Days | `days` | +| Extract | `extract(field FROM source)` | +| FromUnixTime | `from_unixtime` | +| Hour | `hour` | +| LastDay | `last_day` | +| Minute | `minute` | +| Second | `second` | +| TruncDate | `trunc` | +| TruncTimestamp | `date_trunc` | +| UnixDate | `unix_date` | +| UnixTimestamp | `unix_timestamp` | +| Year | `year` | +| Month | `month` | +| DayOfMonth | `day`/`dayofmonth` | +| DayOfWeek | `dayofweek` | +| WeekDay | `weekday` | +| DayOfYear | `dayofyear` | +| WeekOfYear | `weekofyear` | +| Quarter | `quarter` | +| PreciseTimestampConversion | `window_time` | ## Math Expressions diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 3fc4adb623..6df5de1767 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -236,7 +236,8 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { classOf[WeekDay] -> CometWeekDay, classOf[DayOfYear] -> CometDayOfYear, classOf[WeekOfYear] -> CometWeekOfYear, - classOf[Quarter] -> CometQuarter) + classOf[Quarter] -> CometQuarter, + classOf[PreciseTimestampConversion] -> CometPreciseTimestampConversion) private val conversionExpressions: Map[Class[_ <: Expression], CometExpressionSerde[_]] = Map( classOf[Cast] -> CometCast) diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala b/spark/src/main/scala/org/apache/comet/serde/datetime.scala index cb3be75717..875c2cb06d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala @@ -21,7 +21,7 @@ package org.apache.comet.serde import java.util.Locale -import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, GetDateField, Hour, Hours, LastDay, Literal, MakeDate, Minute, Month, NextDay, Quarter, Second, SecondsToTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixTimestamp, WeekDay, WeekOfYear, Year} +import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, GetDateField, Hour, Hours, LastDay, Literal, MakeDate, Minute, Month, NextDay, PreciseTimestampConversion, Quarter, Second, SecondsToTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixTimestamp, WeekDay, WeekOfYear, Year} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{DateType, DoubleType, FloatType, IntegerType, LongType, StringType, TimestampNTZType, TimestampType} import org.apache.spark.unsafe.types.UTF8String @@ -635,6 +635,28 @@ object CometDateFormat extends CometExpressionSerde[DateFormatClass] { } } +object CometPreciseTimestampConversion extends CometExpressionSerde[PreciseTimestampConversion] { + override def getUnsupportedReasons(): Seq[String] = + Seq("Only conversions between TimestampType and LongType are supported") + + override def getSupportLevel(expr: PreciseTimestampConversion): SupportLevel = { + (expr.fromType, expr.toType) match { + case (TimestampType, LongType) | (LongType, TimestampType) => + Compatible() + case _ => + Unsupported(Some(s"PreciseTimestampConversion from ${expr.fromType} to ${expr.toType}")) + } + } + + override def convert( + expr: PreciseTimestampConversion, + inputs: Seq[Attribute], + binding: Boolean): Option[ExprOuterClass.Expr] = { + // Both types are i64 micros in Arrow, so no conversion is needed. + exprToProtoInternal(expr.child, inputs, binding) + } +} + /** * Converts a timestamp to the number of hours since Unix epoch (1970-01-01 00:00:00 UTC). This is * a V2 partition transform expression. diff --git a/spark/src/test/resources/sql-tests/expressions/datetime/window_time.sql b/spark/src/test/resources/sql-tests/expressions/datetime/window_time.sql new file mode 100644 index 0000000000..f524cc836c --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/datetime/window_time.sql @@ -0,0 +1,33 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + + +statement +CREATE TABLE test_window_time(time timestamp, value int) USING parquet + +statement +INSERT INTO test_window_time VALUES (timestamp('2023-01-01 12:00:00'), 1), (timestamp('2023-01-01 12:05:00'), 2), (timestamp('2023-01-01 12:15:00'), 3), (NULL, 4) + +-- spark_answer_only: window() is expanded by Spark with unsupported KnownNullable + +-- basic window_time with tumbling window +query spark_answer_only +SELECT max(window_time(window)), sum(value) FROM (SELECT window(time, '10 minutes') AS window, value FROM test_window_time) GROUP BY window + +-- window_time with sliding window +query spark_answer_only +SELECT max(window_time(window)), count(value) FROM (SELECT window(time, '10 minutes', '5 minutes') AS window, value FROM test_window_time) GROUP BY window