test("partialMerge - FIRST/LAST with distinct aggregates falls back") {
val numValues = 10000
Seq(100).foreach { numGroups =>
Seq(128).foreach { batchSize =>
withSQLConf(
SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "true",
CometConf.COMET_BATCH_SIZE.key -> batchSize.toString) {
withParquetTable(
(0 until numValues).map(i => (i, Random.nextInt() % numGroups)),
"tbl",
false) {
withView("v") {
sql("CREATE TEMP VIEW v AS SELECT _1, _2 FROM tbl ORDER BY _1")
checkSparkAnswerAndOperator(
"SELECT _2, FIRST(_1), LAST(_1), COUNT(DISTINCT _1)" +
" FROM v GROUP BY _2 ORDER BY 1, 2, 3, 4")
}
}
}
}
}
}
== Results ==
!== Spark Answer - 199 == == Comet Answer - 199 ==
struct<_2:int,first(_1):int,last(_1):int,count(DISTINCT _1):bigint> struct<_2:int,first(_1):int,last(_1):int,count(DISTINCT _1):bigint>
![-99,169,9603,61] [-99,169,9804,61]
![-98,307,9492,48] [-98,307,9870,48]
![-97,153,9197,47] [-97,153,9175,47]
![-96,571,9486,56] [-96,571,9762,56]
![-95,243,9720,50] [-95,243,9642,50]
[-94,234,9747,45] [-94,234,9747,45]
![-93,637,9020,45] [-93,637,9515,45]
[-92,13,9754,51] [-92,13,9754,51]
![-91,75,9643,47] [-91,75,9872,47]
![-90,106,9971,50] [-90,106,9911,50]
[-89,323,9937,49] [-89,323,9937,49]
![-88,49,9577,43] [-88,49,9756,43]
![-87,134,9960,45] [-87,134,9998,45]
![-86,12,9180,47] [-86,12,9607,47]
![-85,220,9565,58] [-85,220,9533,58]
![-84,815,9734,48] [-84,815,9919,48]
Describe the bug
Found in #4003
Steps to reproduce
Expected behavior
Should return correct values
Additional context
No response