From 39236415f3af6d85f375685fb6aeff6c216290a4 Mon Sep 17 00:00:00 2001 From: Junjia Ding Date: Thu, 19 Mar 2026 10:58:30 -0700 Subject: [PATCH 1/2] fix: move timezone string creation before startTimestamp to avoid nesting assertion FlatBuffers' `create(string:)` calls `notNested()` which asserts `!isNested`. In the `.timestamp` case of `toFBType()`, the timezone string was created inside the `startTimestamp`/`endTimestamp` table context, which has `isNested = true`. This causes a runtime assertion failure when writing any schema with `Timestamp` that has a timezone (e.g. `timestamp[us, UTC]`). The fix moves `fbb.create(string:)` before `startTimestamp()`, which is the standard FlatBuffers pattern: all child objects (strings, vectors, tables) must be created before starting their parent table. Made-with: Cursor --- Sources/Arrow/ArrowWriterHelper.swift | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Sources/Arrow/ArrowWriterHelper.swift b/Sources/Arrow/ArrowWriterHelper.swift index 7ecb3ab..4a95499 100644 --- a/Sources/Arrow/ArrowWriterHelper.swift +++ b/Sources/Arrow/ArrowWriterHelper.swift @@ -107,6 +107,7 @@ func toFBType( // swiftlint:disable:this cyclomatic_complexity function_body_len return .failure(.invalid("Unable to case to Time64")) case .timestamp: if let timestampType = arrowType as? ArrowTypeTimestamp { + let timezoneOffset = timestampType.timezone.map { fbb.create(string: $0) } let startOffset = org_apache_arrow_flatbuf_Timestamp.startTimestamp(&fbb) let fbUnit: org_apache_arrow_flatbuf_TimeUnit @@ -122,9 +123,8 @@ func toFBType( // swiftlint:disable:this cyclomatic_complexity function_body_len } org_apache_arrow_flatbuf_Timestamp.add(unit: fbUnit, &fbb) - if let timezone = timestampType.timezone { - let timezoneOffset = fbb.create(string: timezone) - org_apache_arrow_flatbuf_Timestamp.add(timezone: timezoneOffset, &fbb) + if let offset = timezoneOffset { + org_apache_arrow_flatbuf_Timestamp.add(timezone: offset, &fbb) } return .success(org_apache_arrow_flatbuf_Timestamp.endTimestamp(&fbb, start: startOffset)) From 67298bdb4b085e595a604b7dd9f2f18d2d36cf12 Mon Sep 17 00:00:00 2001 From: Junjia Ding Date: Fri, 20 Mar 2026 13:24:28 -0700 Subject: [PATCH 2/2] added tests to make sure timestamp with timezone will not crash --- Tests/ArrowTests/IPCTests.swift | 56 +++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/Tests/ArrowTests/IPCTests.swift b/Tests/ArrowTests/IPCTests.swift index 8496d50..58f973e 100644 --- a/Tests/ArrowTests/IPCTests.swift +++ b/Tests/ArrowTests/IPCTests.swift @@ -615,5 +615,61 @@ final class IPCFileReaderTests: XCTestCase { // swiftlint:disable:this type_body throw error } } + + func makeTimestampWithTimezoneDataset() throws -> (ArrowSchema, RecordBatch) { + let schema = ArrowSchema.Builder() + .addField("ts_utc", type: ArrowTypeTimestamp(.microseconds, timezone: "UTC"), isNullable: true) + .finish() + + let tsBuilder = try ArrowArrayBuilders.loadTimestampArrayBuilder(.microseconds, timezone: "UTC") + tsBuilder.append(1609459200000000) // 2021-01-01 00:00:00.000000 UTC + tsBuilder.append(nil) + tsBuilder.append(1609545600000000) // 2021-01-02 00:00:00.000000 UTC + + let tsHolder = ArrowArrayHolderImpl(try tsBuilder.finish()) + let result = RecordBatch.Builder() + .addColumn("ts_utc", arrowArray: tsHolder) + .finish() + switch result { + case .success(let recordBatch): + return (schema, recordBatch) + case .failure(let error): + throw error + } + } + + func testTimestampWithTimezoneInMemoryToFromStream() throws { + let dataset = try makeTimestampWithTimezoneDataset() + let writerInfo = ArrowWriter.Info(.recordbatch, schema: dataset.0, batches: [dataset.1]) + let arrowWriter = ArrowWriter() + switch arrowWriter.writeStreaming(writerInfo) { + case .success(let writeData): + let arrowReader = ArrowReader() + switch arrowReader.readStreaming(writeData) { + case .success(let result): + XCTAssertNotNil(result.schema) + let schema = result.schema! + XCTAssertEqual(schema.fields.count, 1) + XCTAssertEqual(schema.fields[0].name, "ts_utc") + XCTAssertEqual(schema.fields[0].type.info, ArrowType.ArrowTimestamp) + let tsType = schema.fields[0].type as? ArrowTypeTimestamp + XCTAssertNotNil(tsType) + XCTAssertEqual(tsType!.unit, .microseconds) + XCTAssertEqual(tsType!.timezone, "UTC") + XCTAssertEqual(result.batches.count, 1) + let recordBatch = result.batches[0] + XCTAssertEqual(recordBatch.length, 3) + let columns = recordBatch.columns + XCTAssertEqual(columns[0].nullCount, 1) + let tsVal = + "\((columns[0].array as! AsString).asString(0))" // swiftlint:disable:this force_cast + XCTAssertEqual(tsVal, "2021-01-01 00:00:00.000") + case .failure(let error): + throw error + } + case .failure(let error): + throw error + } + } } // swiftlint:disable:this file_length