diff --git a/CMakeLists.txt b/CMakeLists.txt index 76587cb8..2ee10db2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,11 @@ option( "Build the C/C++ tests with -fsanitize=address,undefined." OFF) +option( + QUESTDB_ENABLE_ARROW + "Build with Apache Arrow C Data Interface exports. Opt-in: pulls arrow-rs." + OFF) + # Build static and dynamic lib written in Rust by invoking `cargo`. # Imports `questdb_client` target. add_subdirectory(corrosion) @@ -81,6 +86,13 @@ endif() if(QUESTDB_ENABLE_INSECURE_SKIP_VERIFY) list(APPEND QUESTDB_CARGO_FEATURES insecure-skip-verify) endif() +if(QUESTDB_TESTS_AND_EXAMPLES AND NOT QUESTDB_ENABLE_ARROW) + message(STATUS "QUESTDB_TESTS_AND_EXAMPLES=ON: enabling QUESTDB_ENABLE_ARROW") + set(QUESTDB_ENABLE_ARROW ON) +endif() +if(QUESTDB_ENABLE_ARROW) + list(APPEND QUESTDB_CARGO_FEATURES arrow) +endif() if(QUESTDB_CARGO_FEATURES) corrosion_import_crate( MANIFEST_PATH questdb-rs-ffi/Cargo.toml @@ -94,6 +106,9 @@ endif() target_include_directories( questdb_client INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include) +if(QUESTDB_ENABLE_ARROW) + target_compile_definitions(questdb_client INTERFACE QUESTDB_CLIENT_ENABLE_ARROW) +endif() if(WIN32) set_target_properties( questdb_client-shared @@ -280,6 +295,26 @@ if (QUESTDB_TESTS_AND_EXAMPLES) compile_example( line_reader_c_example_columns examples/line_reader_c_example_columns.c) + compile_example( + line_reader_c_example_arrow + examples/line_reader_c_example_arrow.c) + + find_package(Arrow QUIET) + if(Arrow_FOUND) + compile_example( + line_sender_cpp_example_arrow + examples/line_sender_cpp_example_arrow.cpp) + target_link_libraries( + line_sender_cpp_example_arrow Arrow::arrow_shared) + compile_example( + line_reader_cpp_example_arrow + examples/line_reader_cpp_example_arrow.cpp) + target_link_libraries( + line_reader_cpp_example_arrow Arrow::arrow_shared) + else() + message(STATUS + "arrow-cpp not found; skipping line_{sender,reader}_cpp_example_arrow.") + endif() # Include Rust tests as part of the tests run add_test( @@ -358,6 +393,20 @@ if (QUESTDB_TESTS_AND_EXAMPLES) cpp_test/qwp_mock_server.cpp cpp_test/test_line_reader_mock.cpp) + # Apache Arrow C Data Interface tests. The fatal_error gate above + # forces QUESTDB_ENABLE_ARROW=ON when tests are enabled, so these + # always build alongside the rest of the suite. + compile_test( + test_arrow_c + cpp_test/test_arrow_c.c) + compile_test( + test_arrow_egress + cpp_test/qwp_mock_server.cpp + cpp_test/test_arrow_egress.cpp) + compile_test( + test_arrow_ingress + cpp_test/test_arrow_ingress.cpp) + # System testing Python3 script. # This will download the latest QuestDB instance from Github, # thus will also require a Java 11 installation to run the tests. diff --git a/ci/compile.yaml b/ci/compile.yaml index 735aca07..1205011f 100644 --- a/ci/compile.yaml +++ b/ci/compile.yaml @@ -1,4 +1,12 @@ steps: + - bash: | + df -h / + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc \ + /opt/hostedtoolcache/CodeQL /usr/local/share/boost || true + sudo docker image prune --all --force >/dev/null 2>&1 || true + df -h / + condition: eq(variables['imageName'], 'ubuntu-latest') + displayName: "Free disk space (Microsoft-hosted ubuntu)" - script: | rustup update $(toolchain) rustup default $(toolchain) @@ -6,14 +14,12 @@ steps: displayName: "Update and set Rust toolchain" - script: | brew install numpy + python3 -m pip install --break-system-packages pyarrow polars condition: eq(variables['imageName'], 'macos-latest') - displayName: "Install numpy via brew on macOS" + displayName: "Install numpy + pyarrow + polars on macOS" - script: | python -m pip install --upgrade pip - pip install numpy - # hetzner-incus provisions numpy via apt (python3-numpy) before this - # template runs because Ubuntu 24.04+ enforces PEP 668 and rejects - # pip into the system interpreter. + pip install numpy pyarrow polars condition: | and( ne(variables['imageName'], 'macos-latest'), diff --git a/ci/run_all_tests.py b/ci/run_all_tests.py index 5076e94f..f1c0a4a1 100644 --- a/ci/run_all_tests.py +++ b/ci/run_all_tests.py @@ -37,6 +37,9 @@ def main(): 'test_line_reader_mock', 'line_reader_c_smoke', 'test_line_reader', # live-broker; skips per-test when no broker reachable + 'test_arrow_c', + 'test_arrow_egress', + 'test_arrow_ingress', ] test_paths = [ (d, find_binary(d, name, exe_suffix)) @@ -64,7 +67,14 @@ def main(): '--', '--nocapture', cwd='questdb-rs') run_cmd('cargo', 'test', '--features=almost-all-features', '--', '--nocapture', cwd='questdb-rs') + run_cmd('cargo', 'test', + '--features=almost-all-features,arrow,polars', + '--', '--nocapture', cwd='questdb-rs') + run_cmd('cargo', 'test', '--no-default-features', + '--features=ring-crypto,tls-webpki-certs,sync-sender-qwp-ws,sync-reader-ws,arrow', + '--', '--nocapture', cwd='questdb-rs') run_cmd('cargo', 'test', cwd='questdb-rs-ffi') + run_cmd('cargo', 'test', '--features=arrow', cwd='questdb-rs-ffi') for _, path in test_paths: run_cmd(str(path)) run_cmd('python3', str(system_test_path), 'run', '--versions', qdb_v, '-v') diff --git a/ci/run_fuzz_pipeline.yaml b/ci/run_fuzz_pipeline.yaml index e667bc0a..4948a332 100644 --- a/ci/run_fuzz_pipeline.yaml +++ b/ci/run_fuzz_pipeline.yaml @@ -137,7 +137,8 @@ stages: - bash: | set -eux sudo apt-get update - sudo apt-get install -y --no-install-recommends cmake python3-numpy + sudo apt-get install -y --no-install-recommends cmake python3-numpy python3-pip + sudo python3 -m pip install --break-system-packages pyarrow polars # Image-provided JDK paths (see provision.sh's # `apt-get install -y openjdk-17-jdk openjdk-25-jdk maven`). JAVA_PATH_17="/usr/lib/jvm/java-17-openjdk-amd64" @@ -200,6 +201,13 @@ stages: - script: | python3 system_test/test.py run --repo ./questdb TestQwpWsFuzz -v displayName: "TestQwpWsFuzz" + - script: | + python3 system_test/test.py run --repo ./questdb \ + TestArrowEgressFuzz TestArrowIngressFuzz \ + TestArrowIngressExtraTypes TestArrowIngressUnsupportedTypes \ + TestArrowRoundTripFuzz TestArrowAlignment \ + TestArrowPolarsFuzz TestArrowPolarsPerDtype -v + displayName: "TestArrowFuzz" - task: ArchiveFiles@2 displayName: "Compress QuestDB server log on failure" condition: failed() @@ -277,6 +285,8 @@ stages: pool: vmImage: "ubuntu-latest" timeoutInMinutes: 30 + variables: + imageName: ubuntu-latest steps: - checkout: self fetchDepth: 1 diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml index 8d921133..5f1a49fd 100644 --- a/ci/run_tests_pipeline.yaml +++ b/ci/run_tests_pipeline.yaml @@ -43,7 +43,7 @@ stages: pool: name: $(poolName) vmImage: $(imageName) - timeoutInMinutes: 60 + timeoutInMinutes: 90 steps: - checkout: self fetchDepth: 1 @@ -181,7 +181,7 @@ stages: # debian-installed packages because the wheel RECORD file is # missing). --break-system-packages overrides PEP 668. sudo apt-get install -y --no-install-recommends cmake python3-pip - sudo python3 -m pip install --break-system-packages 'numpy>=2' + sudo python3 -m pip install --break-system-packages 'numpy>=2' pyarrow polars JAVA_PATH_17="/usr/lib/jvm/java-17-openjdk-amd64" JAVA_PATH_25="/usr/lib/jvm/java-25-openjdk-amd64" for p in "$JAVA_PATH_17" "$JAVA_PATH_25"; do @@ -360,7 +360,8 @@ stages: - bash: | set -eux sudo apt-get update - sudo apt-get install -y --no-install-recommends cmake python3-numpy + sudo apt-get install -y --no-install-recommends cmake python3-numpy python3-pip + sudo python3 -m pip install --break-system-packages pyarrow polars JAVA_PATH_17="/usr/lib/jvm/java-17-openjdk-amd64" JAVA_PATH_25="/usr/lib/jvm/java-25-openjdk-amd64" for p in "$JAVA_PATH_17" "$JAVA_PATH_25"; do @@ -414,6 +415,13 @@ stages: - script: | python3 system_test/test.py run --repo ./questdb TestQwpWsFuzz -v displayName: "TestQwpWsFuzz" + - script: | + python3 system_test/test.py run --repo ./questdb \ + TestArrowEgressFuzz TestArrowIngressFuzz \ + TestArrowIngressExtraTypes TestArrowIngressUnsupportedTypes \ + TestArrowRoundTripFuzz TestArrowAlignment \ + TestArrowPolarsFuzz TestArrowPolarsPerDtype -v + displayName: "TestArrowWsFuzz" - task: ArchiveFiles@2 displayName: "Compress QuestDB server log on failure" condition: failed() @@ -444,6 +452,8 @@ stages: pool: vmImage: "ubuntu-latest" timeoutInMinutes: 30 + variables: + imageName: ubuntu-latest steps: - checkout: self fetchDepth: 1 diff --git a/cpp_test/test_arrow_c.c b/cpp_test/test_arrow_c.c new file mode 100644 index 00000000..97c9f7b7 --- /dev/null +++ b/cpp_test/test_arrow_c.c @@ -0,0 +1,530 @@ +#include +#include + +#include +#include +#include +#include +#include + +/* --------------------------------------------------------------------------- + * Test harness. + * ------------------------------------------------------------------------- */ + +static int errors = 0; +static int tests = 0; + +#define TEST(name) static void name(void) + +#define CHECK(cond, msg) \ + do \ + { \ + bool check_pass_ = (cond); \ + if (!check_pass_) \ + { \ + fprintf(stderr, "FAIL [%s:%d]: %s\n", __FILE__, __LINE__, msg); \ + errors++; \ + } \ + } while (0) + +#define RUN(name) \ + do \ + { \ + int before = errors; \ + name(); \ + tests++; \ + if (errors == before) \ + { \ + fprintf(stderr, "PASS: %s\n", #name); \ + } \ + else \ + { \ + fprintf(stderr, "FAILED TEST: %s (%d new errors)\n", \ + #name, errors - before); \ + } \ + } while (0) + +struct PrivBytes +{ + void* values_buffer; + const void* buffers[3]; +}; + +static void release_array_with_priv(struct ArrowArray* arr) +{ + if (arr == NULL || arr->private_data == NULL) + return; + struct PrivBytes* pd = (struct PrivBytes*)arr->private_data; + free(pd->values_buffer); + free(pd); + arr->release = NULL; + arr->private_data = NULL; +} + +static void release_schema_noop(struct ArrowSchema* sch) +{ + if (sch == NULL) + return; + sch->release = NULL; +} + +static void build_primitive( + int64_t row_count, + size_t elem_size, + const void* values_bytes, + const char* format, + const char* name, + struct ArrowArray* out_arr, + struct ArrowSchema* out_sch) +{ + struct PrivBytes* pd = (struct PrivBytes*)calloc(1, sizeof(*pd)); + pd->values_buffer = malloc((size_t)row_count * elem_size); + memcpy(pd->values_buffer, values_bytes, (size_t)row_count * elem_size); + pd->buffers[0] = NULL; /* No validity bitmap. */ + pd->buffers[1] = pd->values_buffer; + pd->buffers[2] = NULL; + + memset(out_arr, 0, sizeof(*out_arr)); + out_arr->length = row_count; + out_arr->null_count = 0; + out_arr->offset = 0; + out_arr->n_buffers = 2; + out_arr->n_children = 0; + out_arr->buffers = pd->buffers; + out_arr->release = release_array_with_priv; + out_arr->private_data = pd; + + memset(out_sch, 0, sizeof(*out_sch)); + out_sch->format = format; + out_sch->name = name; + out_sch->flags = ARROW_FLAG_NULLABLE; + out_sch->release = release_schema_noop; +} + +static void build_bool_bitpacked( + int64_t row_count, + const bool* values, + const char* name, + struct ArrowArray* out_arr, + struct ArrowSchema* out_sch) +{ + size_t n_bytes = ((size_t)row_count + 7) / 8; + struct PrivBytes* pd = (struct PrivBytes*)calloc(1, sizeof(*pd)); + pd->values_buffer = calloc(1, n_bytes); + uint8_t* packed = (uint8_t*)pd->values_buffer; + for (int64_t i = 0; i < row_count; ++i) + if (values[i]) + packed[i / 8] |= (uint8_t)(1u << (i % 8)); + pd->buffers[0] = NULL; + pd->buffers[1] = pd->values_buffer; + pd->buffers[2] = NULL; + + memset(out_arr, 0, sizeof(*out_arr)); + out_arr->length = row_count; + out_arr->null_count = 0; + out_arr->offset = 0; + out_arr->n_buffers = 2; + out_arr->n_children = 0; + out_arr->buffers = pd->buffers; + out_arr->release = release_array_with_priv; + out_arr->private_data = pd; + + memset(out_sch, 0, sizeof(*out_sch)); + out_sch->format = "b"; + out_sch->name = name; + out_sch->flags = ARROW_FLAG_NULLABLE; + out_sch->release = release_schema_noop; +} + +static line_sender_table_name make_table(const char* name) +{ + line_sender_error* err = NULL; + line_sender_table_name tbl; + line_sender_table_name_init(&tbl, strlen(name), name, &err); + if (err) + line_sender_error_free(err); + return tbl; +} + +static line_sender_buffer* fresh_qwp_buffer(void) +{ + return line_sender_buffer_new_qwp_ws(); +} + +TEST(test_tristate_egress_enum_values) +{ + CHECK(line_reader_arrow_batch_ok == 0, "ok = 0"); + CHECK(line_reader_arrow_batch_end == 1, "end = 1"); + CHECK(line_reader_arrow_batch_error == 2, "error = 2"); +} + +TEST(test_appended_reader_error_codes_have_distinct_values) +{ + CHECK( + line_reader_error_schema_drift != line_reader_error_no_schema && + line_reader_error_no_schema != line_reader_error_arrow_export && + line_reader_error_arrow_export != line_reader_error_schema_drift, + "schema_drift / no_schema / arrow_export distinct"); + CHECK(line_reader_error_schema_drift > line_reader_error_failover_would_duplicate, + "schema_drift appended (not renumbered)"); +} + +TEST(test_appended_sender_error_codes_exist) +{ + CHECK(line_sender_error_arrow_unsupported_column_kind != + line_sender_error_arrow_ingest, + "sender error codes distinct"); +} + +TEST(test_egress_null_cursor_returns_error_tristate) +{ + struct ArrowArray arr; + struct ArrowSchema sch; + line_reader_error* err = NULL; + line_reader_arrow_batch_result rc = + line_reader_cursor_next_arrow_batch(NULL, &arr, &sch, &err); + CHECK(rc == line_reader_arrow_batch_error, "NULL cursor → error"); + CHECK(err != NULL, "err_out populated"); + if (err) + line_reader_error_free(err); +} + +TEST(test_egress_null_out_array_returns_error_tristate) +{ + struct ArrowSchema sch; + line_reader_error* err = NULL; + /* Even with a non-NULL cursor the contract is: out_array/out_schema + * must be non-NULL. We pass NULL cursor too here — the implementation + * is allowed to short-circuit on the first NULL it sees. */ + line_reader_arrow_batch_result rc = + line_reader_cursor_next_arrow_batch(NULL, NULL, &sch, &err); + CHECK(rc == line_reader_arrow_batch_error, "NULL out_array → error"); + if (err) + line_reader_error_free(err); +} + +TEST(test_ingress_null_buffer_returns_false) +{ + struct ArrowArray arr; + struct ArrowSchema sch; + memset(&arr, 0, sizeof(arr)); + memset(&sch, 0, sizeof(sch)); + line_sender_error* err = NULL; + line_sender_table_name tbl = make_table("t"); + bool ok = line_sender_buffer_append_arrow(NULL, tbl, &arr, &sch, &err); + CHECK(!ok, "NULL buffer → false"); + CHECK(err != NULL, "err_out populated"); + if (err) + line_sender_error_free(err); +} + +TEST(test_ingress_null_array_returns_false) +{ + line_sender_buffer* buf = fresh_qwp_buffer(); + struct ArrowSchema sch; + memset(&sch, 0, sizeof(sch)); + line_sender_error* err = NULL; + bool ok = + line_sender_buffer_append_arrow(buf, make_table("t"), NULL, &sch, &err); + CHECK(!ok, "NULL array → false"); + CHECK(err != NULL, "err_out populated"); + if (err) + line_sender_error_free(err); + line_sender_buffer_free(buf); +} + +TEST(test_ingress_null_schema_returns_false) +{ + line_sender_buffer* buf = fresh_qwp_buffer(); + struct ArrowArray arr; + memset(&arr, 0, sizeof(arr)); + line_sender_error* err = NULL; + bool ok = + line_sender_buffer_append_arrow(buf, make_table("t"), &arr, NULL, &err); + CHECK(!ok, "NULL schema → false"); + CHECK(err != NULL, "err_out populated"); + if (err) + line_sender_error_free(err); + line_sender_buffer_free(buf); +} + +TEST(test_ingress_at_column_null_buffer_returns_false) +{ + struct ArrowArray arr; + struct ArrowSchema sch; + memset(&arr, 0, sizeof(arr)); + memset(&sch, 0, sizeof(sch)); + line_sender_error* err = NULL; + line_sender_column_name ts_col; + bool name_ok = + line_sender_column_name_init(&ts_col, strlen("ts"), "ts", &err); + CHECK(name_ok, "column name init"); + bool ok = line_sender_buffer_append_arrow_at_column( + NULL, make_table("t"), &arr, &sch, ts_col, &err); + CHECK(!ok, "NULL buffer → false"); + CHECK(err != NULL, "err_out populated"); + if (err) + line_sender_error_free(err); +} + +TEST(test_ingress_at_column_null_array_returns_false) +{ + line_sender_buffer* buf = fresh_qwp_buffer(); + struct ArrowSchema sch; + memset(&sch, 0, sizeof(sch)); + line_sender_error* err = NULL; + line_sender_column_name ts_col; + bool name_ok = + line_sender_column_name_init(&ts_col, strlen("ts"), "ts", &err); + CHECK(name_ok, "column name init"); + bool ok = line_sender_buffer_append_arrow_at_column( + buf, make_table("t"), NULL, &sch, ts_col, &err); + CHECK(!ok, "NULL array → false"); + CHECK(err != NULL, "err_out populated"); + if (err) + line_sender_error_free(err); + line_sender_buffer_free(buf); +} + +TEST(test_ingress_at_column_null_schema_returns_false) +{ + line_sender_buffer* buf = fresh_qwp_buffer(); + struct ArrowArray arr; + memset(&arr, 0, sizeof(arr)); + line_sender_error* err = NULL; + line_sender_column_name ts_col; + bool name_ok = + line_sender_column_name_init(&ts_col, strlen("ts"), "ts", &err); + CHECK(name_ok, "column name init"); + bool ok = line_sender_buffer_append_arrow_at_column( + buf, make_table("t"), &arr, NULL, ts_col, &err); + CHECK(!ok, "NULL schema → false"); + CHECK(err != NULL, "err_out populated"); + if (err) + line_sender_error_free(err); + line_sender_buffer_free(buf); +} + +static void run_append_strict_ok( + line_sender_buffer* buf, + line_sender_table_name tbl, + struct ArrowArray* arr, + struct ArrowSchema* sch, + const char* label) +{ + line_sender_error* err = NULL; + bool ok = line_sender_buffer_append_arrow(buf, tbl, arr, sch, &err); + if (!ok) + { + if (err) + { + size_t msg_len = 0; + const char* msg = line_sender_error_msg(err, &msg_len); + fprintf(stderr, "STRICT %s: %.*s\n", label, (int)msg_len, msg); + line_sender_error_free(err); + } + CHECK(ok, label); + if (arr->release) + arr->release(arr); + } + if (sch->release) + sch->release(sch); +} + +TEST(test_ingress_boolean_column) +{ + bool values[10] = { + true, false, true, false, true, false, true, false, true, false}; + struct ArrowArray arr; + struct ArrowSchema sch; + build_bool_bitpacked(10, values, "flag", &arr, &sch); + line_sender_buffer* buf = fresh_qwp_buffer(); + run_append_strict_ok( + buf, make_table("bool_t"), &arr, &sch, "bit-packed boolean strict ok"); + line_sender_buffer_free(buf); +} + +TEST(test_ingress_int8_int16_int32_int64_columns) +{ + /* Int8 */ + { + int8_t values[3] = {-1, 0, 127}; + struct ArrowArray arr; + struct ArrowSchema sch; + build_primitive(3, sizeof(int8_t), values, "c", "byte_col", &arr, &sch); + line_sender_buffer* buf = fresh_qwp_buffer(); + run_append_strict_ok( + buf, make_table("i8_t"), &arr, &sch, "int8 strict ok"); + line_sender_buffer_free(buf); + } + /* Int16 */ + { + int16_t values[3] = {-1234, 0, 31000}; + struct ArrowArray arr; + struct ArrowSchema sch; + build_primitive( + 3, sizeof(int16_t), values, "s", "short_col", &arr, &sch); + line_sender_buffer* buf = fresh_qwp_buffer(); + run_append_strict_ok( + buf, make_table("i16_t"), &arr, &sch, "int16 strict ok"); + line_sender_buffer_free(buf); + } + /* Int32 */ + { + int32_t values[3] = {-1, 0, 0x7FFFFFFF}; + struct ArrowArray arr; + struct ArrowSchema sch; + build_primitive(3, sizeof(int32_t), values, "i", "int_col", &arr, &sch); + line_sender_buffer* buf = fresh_qwp_buffer(); + run_append_strict_ok( + buf, make_table("i32_t"), &arr, &sch, "int32 strict ok"); + line_sender_buffer_free(buf); + } + /* Int64 */ + { + int64_t values[3] = {100, 200, 300}; + struct ArrowArray arr; + struct ArrowSchema sch; + build_primitive( + 3, sizeof(int64_t), values, "l", "long_col", &arr, &sch); + line_sender_buffer* buf = fresh_qwp_buffer(); + run_append_strict_ok( + buf, make_table("i64_t"), &arr, &sch, "int64 strict ok"); + line_sender_buffer_free(buf); + } +} + +TEST(test_ingress_float32_float64_columns) +{ + /* Float32 */ + { + float values[3] = {1.5f, -2.5f, 3.14f}; + struct ArrowArray arr; + struct ArrowSchema sch; + build_primitive(3, sizeof(float), values, "f", "f32_col", &arr, &sch); + line_sender_buffer* buf = fresh_qwp_buffer(); + run_append_strict_ok( + buf, make_table("f32_t"), &arr, &sch, "float32 strict ok"); + line_sender_buffer_free(buf); + } + /* Float64 */ + { + double values[3] = {1.5, -2.5, 3.14159}; + struct ArrowArray arr; + struct ArrowSchema sch; + build_primitive(3, sizeof(double), values, "g", "f64_col", &arr, &sch); + line_sender_buffer* buf = fresh_qwp_buffer(); + run_append_strict_ok( + buf, make_table("f64_t"), &arr, &sch, "float64 strict ok"); + line_sender_buffer_free(buf); + } +} + +TEST(test_ingress_timestamp_microseconds) +{ + /* Apache Arrow Timestamp(µs) format: "tsu:" or "tsu:UTC". */ + int64_t values[2] = {1700000000000000LL, 1700000000000001LL}; + struct ArrowArray arr; + struct ArrowSchema sch; + build_primitive(2, sizeof(int64_t), values, "tsu:UTC", "ts", &arr, &sch); + line_sender_buffer* buf = fresh_qwp_buffer(); + run_append_strict_ok( + buf, make_table("ts_t"), &arr, &sch, "timestamp(µs) strict ok"); + line_sender_buffer_free(buf); +} + +TEST(test_ingress_default_and_at_column_dispatch) +{ + int64_t values[2] = {10, 20}; + + /* Default append: server stamps each row on arrival. */ + { + struct ArrowArray arr; + struct ArrowSchema sch; + build_primitive(2, sizeof(int64_t), values, "l", "v", &arr, &sch); + line_sender_buffer* buf = fresh_qwp_buffer(); + line_sender_error* err = NULL; + bool ok = line_sender_buffer_append_arrow( + buf, make_table("dts_default"), &arr, &sch, &err); + if (!ok) + { + CHECK(err != NULL, "err_out populated on failure"); + if (err) + line_sender_error_free(err); + if (arr.release) + arr.release(&arr); + } + if (sch.release) + sch.release(&sch); + line_sender_buffer_free(buf); + } + + /* at_column variant: a missing ts column must be rejected as arrow_ingest. */ + { + struct ArrowArray arr; + struct ArrowSchema sch; + build_primitive(2, sizeof(int64_t), values, "l", "v", &arr, &sch); + line_sender_buffer* buf = fresh_qwp_buffer(); + line_sender_error* err = NULL; + line_sender_column_name ts_col; + bool name_ok = + line_sender_column_name_init(&ts_col, strlen("missing"), "missing", &err); + CHECK(name_ok, "column name init"); + bool ok = line_sender_buffer_append_arrow_at_column( + buf, make_table("dts_at_col"), &arr, &sch, ts_col, &err); + CHECK(!ok, "missing ts column → false"); + if (err) + { + CHECK(line_sender_error_get_code(err) == line_sender_error_arrow_ingest, + "missing ts column → arrow_ingest"); + line_sender_error_free(err); + } + if (arr.release) + arr.release(&arr); + if (sch.release) + sch.release(&sch); + line_sender_buffer_free(buf); + } +} + +TEST(test_error_codes_survive_ffi_boundary) +{ + /* Triggering a real `arrow_unsupported_column_kind` from C alone + * would require constructing a complex unsupported type. Instead we + * verify the integer values are visible from C — the actual flow is + * exercised in the C++ ingress tests. */ + int sender_code = (int)line_sender_error_arrow_unsupported_column_kind; + int ingest_code = (int)line_sender_error_arrow_ingest; + int drift_code = (int)line_reader_error_schema_drift; + int no_schema_code = (int)line_reader_error_no_schema; + int export_code = (int)line_reader_error_arrow_export; + CHECK(sender_code != ingest_code, "sender codes distinct"); + CHECK(drift_code != no_schema_code, "reader codes distinct"); + CHECK(no_schema_code != export_code, "reader codes distinct"); +} + +int main(void) +{ + RUN(test_tristate_egress_enum_values); + RUN(test_appended_reader_error_codes_have_distinct_values); + RUN(test_appended_sender_error_codes_exist); + RUN(test_egress_null_cursor_returns_error_tristate); + RUN(test_egress_null_out_array_returns_error_tristate); + RUN(test_ingress_null_buffer_returns_false); + RUN(test_ingress_null_array_returns_false); + RUN(test_ingress_null_schema_returns_false); + RUN(test_ingress_at_column_null_buffer_returns_false); + RUN(test_ingress_at_column_null_array_returns_false); + RUN(test_ingress_at_column_null_schema_returns_false); + RUN(test_ingress_boolean_column); + RUN(test_ingress_int8_int16_int32_int64_columns); + RUN(test_ingress_float32_float64_columns); + RUN(test_ingress_timestamp_microseconds); + RUN(test_ingress_default_and_at_column_dispatch); + RUN(test_error_codes_survive_ffi_boundary); + + fprintf(stderr, + "\ntest_arrow_c: ran %d tests, %d failure(s)\n", + tests, errors); + return errors == 0 ? 0 : 1; +} diff --git a/cpp_test/test_arrow_egress.cpp b/cpp_test/test_arrow_egress.cpp new file mode 100644 index 00000000..32cf1a88 --- /dev/null +++ b/cpp_test/test_arrow_egress.cpp @@ -0,0 +1,649 @@ +// Mock-server-driven exhaustive tests for the Arrow C Data Interface +// egress export. Drives `line_reader_cursor_next_arrow_batch` against +// `qwp_mock_server` (the same in-process WebSocket+QWP1 mock used by +// `test_line_reader_mock.cpp`) so every assertion runs without a live +// QuestDB instance. + +#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN +#include "doctest.h" + +#include "qwp_mock_server.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace qm = qwp_mock; +namespace egress = questdb::egress; +namespace ingress = questdb::ingress; + +namespace +{ + +template +std::vector pack_le(const std::vector& vs) +{ + std::vector out; + out.reserve(vs.size() * sizeof(T)); + for (T v : vs) + { + const uint8_t* p = reinterpret_cast(&v); + out.insert(out.end(), p, p + sizeof(T)); + } + return out; +} + +// `reader + cursor` pair against an in-process mock. Move-only; both +// members RAII-release through their C++ wrappers. +struct ReaderHandles +{ + egress::reader reader; + egress::cursor cursor; +}; + +ReaderHandles open_cursor(const qm::MockServer& srv, const char* sql) +{ + const std::string conf = "ws::addr=" + srv.addr() + ";"; + egress::reader r{ingress::utf8_view{conf.data(), conf.size()}}; + auto c = r.execute(ingress::utf8_view{sql, std::strlen(sql)}); + return {std::move(r), std::move(c)}; +} + +// Depth-first sanity check that every child in the array/schema tree has +// a release callback set. +void assert_release_chain_present(ArrowArray* a, ArrowSchema* s) +{ + REQUIRE(static_cast(a->release)); + REQUIRE(static_cast(s->release)); + for (int64_t i = 0; i < a->n_children; ++i) + { + REQUIRE(a->children[i] != nullptr); + REQUIRE(static_cast(a->children[i]->release)); + } + for (int64_t i = 0; i < s->n_children; ++i) + { + REQUIRE(s->children[i] != nullptr); + REQUIRE(static_cast(s->children[i]->release)); + } +} + +void release_pair(ArrowArray* a, ArrowSchema* s) +{ + if (a->release) + a->release(a); + if (s->release) + s->release(s); +} + +} // namespace + +// --------------------------------------------------------------------------- +// Smoke — handshake + empty result drives tristate to `_end` cleanly. +// --------------------------------------------------------------------------- + +TEST_CASE("arrow egress: empty stream returns _end without touching out_*") +{ + qm::Script s = { + qm::ActionSendServerInfo{qm::ROLE_PRIMARY, "tc", "n1"}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select 1 from t"); + + // `next_arrow_batch` snapshots schema eagerly. With ZERO batches the + // adapter must EITHER: + // - throw `line_reader_error_no_schema` (when QWP protocol path + // reaches `as_arrow_reader` with no first batch), OR + // - return `nullopt` directly (when the inner pump terminates + // first). + try + { + auto b = h.cursor.next_arrow_batch(); + CHECK(!b.has_value()); + } + catch (const egress::line_reader_error&) + { + // _error path acceptable per the doc. + } +} + +// --------------------------------------------------------------------------- +// Single batch — Long column. Walk ArrowArray and ArrowSchema field-by-field +// and verify the release-callback chain. +// --------------------------------------------------------------------------- + +TEST_CASE("arrow egress: single Long batch — struct layout + release order") +{ + qm::ColumnSpec col_v{ + "v", qm::COL_LONG, + qm::fixed_column_bytes(3, pack_le({10, 20, 30}))}; + + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[col_v](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 3, {col_v}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select v from t"); + + auto _b = h.cursor.next_arrow_batch(); + REQUIRE(_b.has_value()); + auto& arr = _b->array; + auto& sch = _b->schema; + + // The egress export wraps the RecordBatch as a StructArray, so the + // outer ArrowArray represents the struct with N children. + CHECK(arr.length == 3); + CHECK(arr.n_children == 1); + REQUIRE(arr.children != nullptr); + REQUIRE(arr.children[0] != nullptr); + CHECK(arr.children[0]->length == 3); + CHECK(arr.children[0]->n_buffers == 2); // validity + values + + REQUIRE(sch.format != nullptr); + CHECK(std::string(sch.format) == "+s"); // struct format code + CHECK(sch.n_children == 1); + REQUIRE(sch.children != nullptr); + REQUIRE(sch.children[0] != nullptr); + CHECK(std::string(sch.children[0]->format) == "l"); // Int64 + + assert_release_chain_present(&arr, &sch); + + // Subsequent call returns _end. + CHECK(!h.cursor.next_arrow_batch().has_value()); + + release_pair(&arr, &sch); +} + +// --------------------------------------------------------------------------- +// Per-kind coverage — drive a batch with every primitive kind in one +// schema and verify each child's format code. +// --------------------------------------------------------------------------- + +TEST_CASE("arrow egress: mixed kinds — Bool / Byte / Short / Int / Long / Float / Double") +{ + std::vector bool_body; + bool_body.push_back(0x00); + bool_body.push_back(0b00000010); // row0=false, row1=true + + qm::ColumnSpec c_bool{"b", qm::COL_BOOLEAN, std::move(bool_body)}; + qm::ColumnSpec c_byte{ + "by", qm::COL_BYTE, qm::fixed_column_bytes(2, pack_le({-1, 1}))}; + qm::ColumnSpec c_short{ + "sh", qm::COL_SHORT, qm::fixed_column_bytes(2, pack_le({-2, 2}))}; + qm::ColumnSpec c_int{ + "in", qm::COL_INT, qm::fixed_column_bytes(2, pack_le({-3, 3}))}; + qm::ColumnSpec c_long{ + "lo", qm::COL_LONG, qm::fixed_column_bytes(2, pack_le({-4, 4}))}; + qm::ColumnSpec c_f32{ + "f3", qm::COL_FLOAT, qm::fixed_column_bytes(2, pack_le({1.5f, -2.5f}))}; + qm::ColumnSpec c_f64{ + "f6", qm::COL_DOUBLE, qm::fixed_column_bytes(2, pack_le({1.5, -2.5}))}; + + auto cols = std::vector{ + c_bool, c_byte, c_short, c_int, c_long, c_f32, c_f64}; + + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[cols](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 2, cols); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select * from t"); + + auto _b = h.cursor.next_arrow_batch(); + REQUIRE(_b.has_value()); + auto& arr = _b->array; + auto& sch = _b->schema; + + CHECK(arr.length == 2); + CHECK(arr.n_children == 7); + CHECK(sch.n_children == 7); + + const char* expected_formats[] = {"b", "c", "s", "i", "l", "f", "g"}; + for (int i = 0; i < 7; ++i) + { + REQUIRE(sch.children[i] != nullptr); + CHECK(std::string(sch.children[i]->format) == expected_formats[i]); + CHECK(arr.children[i]->length == 2); + } + + release_pair(&arr, &sch); +} + +TEST_CASE("arrow egress: TIMESTAMP / TIMESTAMP_NS / DATE — timezone-carrying format codes") +{ + qm::ColumnSpec c_ts{ + "ts", qm::COL_TIMESTAMP, + qm::fixed_column_bytes(2, pack_le({1700000000000000LL, 1700000000000001LL}))}; + qm::ColumnSpec c_ts_ns{ + "tn", qm::COL_TIMESTAMP_NANOS, + qm::fixed_column_bytes(2, pack_le({1700000000000000000LL, 1700000000000000001LL}))}; + qm::ColumnSpec c_date{ + "dt", qm::COL_DATE, + qm::fixed_column_bytes(2, pack_le({1700000000000LL, 1700000000001LL}))}; + + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[=](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 2, {c_ts, c_ts_ns, c_date}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select * from t"); + + auto _b = h.cursor.next_arrow_batch(); + REQUIRE(_b.has_value()); + auto& arr = _b->array; + auto& sch = _b->schema; + + CHECK(sch.n_children == 3); + REQUIRE(sch.children[0]->format != nullptr); + REQUIRE(sch.children[1]->format != nullptr); + REQUIRE(sch.children[2]->format != nullptr); + // Apache Arrow timestamp format codes: tsu:UTC / tsn:UTC / tsm:UTC. + CHECK(std::string(sch.children[0]->format).find("tsu") == 0); + CHECK(std::string(sch.children[1]->format).find("tsn") == 0); + CHECK(std::string(sch.children[2]->format).find("tsm") == 0); + + release_pair(&arr, &sch); +} + +TEST_CASE("arrow egress: VARCHAR + BINARY — variable-length format codes") +{ + qm::ColumnSpec c_v{ + "v", qm::COL_VARCHAR, + qm::varlen_column_bytes({{'a'}, {}, {'b', 'c'}})}; + qm::ColumnSpec c_b{ + "b", qm::COL_BINARY, + qm::varlen_column_bytes({{0x01}, {}, {0xFF, 0x00}})}; + + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[=](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 3, {c_v, c_b}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select * from t"); + + auto _b = h.cursor.next_arrow_batch(); + REQUIRE(_b.has_value()); + auto& arr = _b->array; + auto& sch = _b->schema; + + CHECK(sch.n_children == 2); + CHECK(std::string(sch.children[0]->format) == "u"); // Utf8 + CHECK(std::string(sch.children[1]->format) == "z"); // Binary + + // VARCHAR / BINARY arrays have 3 buffers: validity, offsets, values. + CHECK(arr.children[0]->n_buffers == 3); + CHECK(arr.children[1]->n_buffers == 3); + + release_pair(&arr, &sch); +} + +TEST_CASE("arrow egress: UUID — FixedSizeBinary(16) with arrow.uuid extension metadata") +{ + std::vector raw; + for (int i = 0; i < 32; ++i) + raw.push_back(static_cast(i)); + qm::ColumnSpec c_uuid{"id", qm::COL_UUID, qm::fixed_column_bytes(2, raw)}; + + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[=](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 2, {c_uuid}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select id from t"); + + auto _b = h.cursor.next_arrow_batch(); + REQUIRE(_b.has_value()); + auto& arr = _b->array; + auto& sch = _b->schema; + + REQUIRE(sch.children[0]->format != nullptr); + CHECK(std::string(sch.children[0]->format) == "w:16"); // FixedSizeBinary(16) + + // Metadata is encoded as a length-prefixed byte buffer in the spec. We + // don't decode it here exhaustively — but it MUST be non-NULL because + // the egress side stamps `ARROW:extension:name=arrow.uuid` on UUID + // fields. + CHECK(sch.children[0]->metadata != nullptr); + + release_pair(&arr, &sch); +} + +TEST_CASE("arrow egress: LONG256 — FixedSizeBinary(32)") +{ + std::vector raw(64, 0xAA); + qm::ColumnSpec c_l256{"l", qm::COL_LONG256, qm::fixed_column_bytes(2, raw)}; + + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[=](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 2, {c_l256}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select l from t"); + + auto _b = h.cursor.next_arrow_batch(); + REQUIRE(_b.has_value()); + auto& arr = _b->array; + auto& sch = _b->schema; + CHECK(std::string(sch.children[0]->format) == "w:32"); + + release_pair(&arr, &sch); +} + +TEST_CASE("arrow egress: SYMBOL — Dictionary(UInt32, Utf8) with questdb.symbol metadata") +{ + qm::ColumnSpec c_sym{ + "sym", qm::COL_SYMBOL, + qm::symbol_column_bytes({0u, 1u, 0u})}; + + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[=](int64_t rid) { + return qm::result_batch_frame_with_dict( + rid, 0, 1, 3, {c_sym}, + /*dict_delta_start=*/0, + {"alpha", "beta"}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select sym from t"); + + auto _b = h.cursor.next_arrow_batch(); + REQUIRE(_b.has_value()); + auto& arr = _b->array; + auto& sch = _b->schema; + + REQUIRE(sch.children[0]->format != nullptr); + // Dictionary-encoded — Arrow encodes the keys' format ("I" for UInt32) + // and exposes the values dictionary via .dictionary. + REQUIRE(sch.children[0]->dictionary != nullptr); + REQUIRE(arr.children[0]->dictionary != nullptr); + CHECK(std::string(sch.children[0]->dictionary->format) == "u"); // Utf8 + + release_pair(&arr, &sch); +} + +TEST_CASE("arrow egress: DECIMAL64 / DECIMAL128 / DECIMAL256 — decimal format codes") +{ + qm::ColumnSpec c_d64{"d64", qm::COL_DECIMAL64, + qm::decimal64_column_bytes({12345, 6789}, 2)}; + + std::vector> dec128_values(2); + qm::ColumnSpec c_d128{"d128", qm::COL_DECIMAL128, + qm::decimal128_column_bytes(dec128_values, 5)}; + + std::vector> dec256_values(2); + qm::ColumnSpec c_d256{"d256", qm::COL_DECIMAL256, + qm::decimal256_column_bytes(dec256_values, 7)}; + + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[=](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 2, {c_d64, c_d128, c_d256}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select * from t"); + + auto _b = h.cursor.next_arrow_batch(); + REQUIRE(_b.has_value()); + auto& arr = _b->array; + auto& sch = _b->schema; + + // Arrow decimal format: "d:precision,scale" or "d:precision,scale,bitwidth". + REQUIRE(sch.children[0]->format != nullptr); + REQUIRE(sch.children[1]->format != nullptr); + REQUIRE(sch.children[2]->format != nullptr); + CHECK(std::string(sch.children[0]->format).rfind("d:", 0) == 0); + CHECK(std::string(sch.children[1]->format).rfind("d:", 0) == 0); + CHECK(std::string(sch.children[2]->format).rfind("d:", 0) == 0); + + release_pair(&arr, &sch); +} + +TEST_CASE("arrow egress: DOUBLE_ARRAY — nested List(Float64)") +{ + std::vector> rows = { + qm::ArrayRow{{3}, pack_le({1.0, 2.0, 3.0})}, + qm::ArrayRow{{2}, pack_le({10.0, 20.0})}, + }; + qm::ColumnSpec c_arr{"a", qm::COL_DOUBLE_ARRAY, + qm::array_column_bytes(rows)}; + + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[=](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 2, {c_arr}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select a from t"); + + auto _b = h.cursor.next_arrow_batch(); + REQUIRE(_b.has_value()); + auto& arr = _b->array; + auto& sch = _b->schema; + + // List(Float64) — format "+l" with a single child of format "g". + REQUIRE(sch.children[0]->format != nullptr); + CHECK(std::string(sch.children[0]->format) == "+l"); + REQUIRE(sch.children[0]->n_children == 1); + REQUIRE(sch.children[0]->children[0] != nullptr); + CHECK(std::string(sch.children[0]->children[0]->format) == "g"); + + release_pair(&arr, &sch); +} + +// --------------------------------------------------------------------------- +// Tristate contract — on _end / _error the out_array / out_schema MUST +// stay untouched. +// --------------------------------------------------------------------------- + +TEST_CASE("arrow egress: stream exhaustion — second call returns nullopt") +{ + qm::ColumnSpec c{"v", qm::COL_LONG, + qm::fixed_column_bytes(1, pack_le({42}))}; + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[=](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 1, {c}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select v from t"); + + auto first = h.cursor.next_arrow_batch(); + REQUIRE(first.has_value()); + release_pair(&first->array, &first->schema); + + CHECK(!h.cursor.next_arrow_batch().has_value()); +} + +TEST_CASE("arrow egress: schema drift — dtype change between batches throws schema_drift") +{ + qm::ColumnSpec b1_col{ + "v", qm::COL_LONG, + qm::fixed_column_bytes(2, pack_le({10, 20}))}; + qm::ColumnSpec b2_col{ + "v", qm::COL_INT, + qm::fixed_column_bytes(2, pack_le({30, 40}))}; + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[b1_col](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 2, {b1_col}); + }}, + qm::ActionSendBuilt{[b2_col](int64_t rid) { + return qm::result_batch_frame(rid, 1, 2, 2, {b2_col}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select v from t"); + + auto first = h.cursor.next_arrow_batch(); + REQUIRE(first.has_value()); + CHECK(first->array.length == 2); + CHECK(std::string(first->schema.children[0]->format) == "l"); + release_pair(&first->array, &first->schema); + + try + { + (void)h.cursor.next_arrow_batch(); + FAIL("expected schema_drift on second batch with changed dtype"); + } + catch (const egress::line_reader_error& e) + { + CHECK(e.code() == egress::error_code::schema_drift); + } +} + +TEST_CASE("arrow egress: schema drift — column rename between batches throws schema_drift") +{ + qm::ColumnSpec b1_col{ + "v", qm::COL_LONG, + qm::fixed_column_bytes(1, pack_le({1}))}; + qm::ColumnSpec b2_col{ + "w", qm::COL_LONG, + qm::fixed_column_bytes(1, pack_le({2}))}; + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[b1_col](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 1, {b1_col}); + }}, + qm::ActionSendBuilt{[b2_col](int64_t rid) { + return qm::result_batch_frame(rid, 1, 2, 1, {b2_col}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select v from t"); + + auto first = h.cursor.next_arrow_batch(); + REQUIRE(first.has_value()); + release_pair(&first->array, &first->schema); + + try + { + (void)h.cursor.next_arrow_batch(); + FAIL("expected schema_drift on column rename"); + } + catch (const egress::line_reader_error& e) + { + CHECK(e.code() == egress::error_code::schema_drift); + } +} + +TEST_CASE("arrow egress: schema drift — column count change throws schema_drift") +{ + qm::ColumnSpec b1_v{ + "v", qm::COL_LONG, + qm::fixed_column_bytes(1, pack_le({1}))}; + qm::ColumnSpec b2_v{ + "v", qm::COL_LONG, + qm::fixed_column_bytes(1, pack_le({2}))}; + qm::ColumnSpec b2_extra{ + "extra", qm::COL_INT, + qm::fixed_column_bytes(1, pack_le({3}))}; + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[b1_v](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 1, {b1_v}); + }}, + qm::ActionSendBuilt{[b2_v, b2_extra](int64_t rid) { + return qm::result_batch_frame(rid, 1, 2, 1, {b2_v, b2_extra}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select * from t"); + + auto first = h.cursor.next_arrow_batch(); + REQUIRE(first.has_value()); + release_pair(&first->array, &first->schema); + + try + { + (void)h.cursor.next_arrow_batch(); + FAIL("expected schema_drift on column count change"); + } + catch (const egress::line_reader_error& e) + { + CHECK(e.code() == egress::error_code::schema_drift); + } +} + +TEST_CASE("arrow egress: schema drift — same schema across batches does NOT drift") +{ + qm::ColumnSpec b_col{ + "v", qm::COL_LONG, + qm::fixed_column_bytes(2, pack_le({10, 20}))}; + qm::Script s = { + qm::ActionSendServerInfo{}, + qm::ActionAwaitQueryRequest{}, + qm::ActionSendBuilt{[b_col](int64_t rid) { + return qm::result_batch_frame(rid, 0, 1, 2, {b_col}); + }}, + qm::ActionSendBuilt{[b_col](int64_t rid) { + return qm::result_batch_frame(rid, 1, 2, 2, {b_col}); + }}, + qm::ActionSendResultEnd{}, + }; + qm::MockServer srv({s}); + auto h = open_cursor(srv, "select v from t"); + + auto first = h.cursor.next_arrow_batch(); + REQUIRE(first.has_value()); + release_pair(&first->array, &first->schema); + + auto second = h.cursor.next_arrow_batch(); + REQUIRE(second.has_value()); + CHECK(second->array.length == 2); + release_pair(&second->array, &second->schema); + + CHECK(!h.cursor.next_arrow_batch().has_value()); +} + +// Tristate / NULL-pointer contract tests for the C ABI live in +// `test_arrow_c.c`. The C++ wrapper returns `std::optional` +// directly, so those cases are unrepresentable at the call site. diff --git a/cpp_test/test_arrow_ingress.cpp b/cpp_test/test_arrow_ingress.cpp new file mode 100644 index 00000000..0be693dc --- /dev/null +++ b/cpp_test/test_arrow_ingress.cpp @@ -0,0 +1,450 @@ +// Exhaustive tests for the Arrow C Data Interface ingress export +// (`line_sender_buffer_append_arrow`). The buffer-level path is +// network-free — we construct ArrowArray / ArrowSchema in-process and +// validate Buffer accumulation via `line_sender_buffer_size` and the +// new error codes (`arrow_unsupported_column_kind` / +// `arrow_ingest`). + +#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN +#include "doctest.h" + +#include + +#include +#include +#include +#include +#include + +namespace +{ + +// Owner for heap allocations referenced by a hand-built ArrowArray. We +// register `release_owner` as the array's release callback; arrow-rs's +// `from_ffi` calls it when the imported ArrayData is dropped (consumed +// by `append_arrow`). +struct Owner +{ + std::vector>> buffers_storage; + std::vector buffer_ptrs; + std::vector> children_storage; + std::vector children_ptrs; +}; + +void release_owner(ArrowArray* arr) +{ + if (!arr || !arr->private_data) + return; + delete static_cast(arr->private_data); + arr->release = nullptr; + arr->private_data = nullptr; +} + +void schema_release_noop(ArrowSchema* sch) +{ + if (sch) + sch->release = nullptr; +} + +// Materialize an owner-backed ArrowArray. `validity` is optional; if +// absent the validity buffer slot is NULL and `null_count = 0`. +ArrowArray make_array( + int64_t length, + int64_t null_count, + std::vector>> buffers) +{ + auto owner = std::make_unique(); + owner->buffers_storage = std::move(buffers); + for (auto& buf : owner->buffers_storage) + { + owner->buffer_ptrs.push_back(buf ? buf->data() : nullptr); + } + + ArrowArray arr; + std::memset(&arr, 0, sizeof(arr)); + arr.length = length; + arr.null_count = null_count; + arr.n_buffers = static_cast(owner->buffer_ptrs.size()); + arr.buffers = owner->buffer_ptrs.data(); + arr.release = release_owner; + arr.private_data = owner.release(); + return arr; +} + +ArrowSchema make_schema(const char* format, const char* name) +{ + ArrowSchema sch; + std::memset(&sch, 0, sizeof(sch)); + sch.format = format; + sch.name = name; + sch.flags = ARROW_FLAG_NULLABLE; + sch.release = schema_release_noop; + return sch; +} + +template +std::shared_ptr> pack_le(const std::vector& vs) +{ + auto out = std::make_shared>(); + out->reserve(vs.size() * sizeof(T)); + for (T v : vs) + { + const uint8_t* p = reinterpret_cast(&v); + out->insert(out->end(), p, p + sizeof(T)); + } + return out; +} + +namespace qdb = questdb::ingress; + +void append_ok( + qdb::line_sender_buffer& buf, + qdb::table_name_view tbl, + ArrowArray& arr, + ArrowSchema& sch) +{ + const size_t size_before = buf.size(); + const size_t row_count_before = buf.row_count(); + try + { + buf.append_arrow(tbl, arr, sch); + } + catch (const qdb::line_sender_error& e) + { + FAIL("append_arrow threw: " << e.what()); + } + if (sch.release) + sch.release(&sch); + CHECK(buf.size() > size_before); + CHECK(buf.row_count() > row_count_before); +} + +void append_expect_error( + qdb::line_sender_buffer& buf, + qdb::table_name_view tbl, + ArrowArray& arr, + ArrowSchema& sch, + qdb::line_sender_error_code expected_code) +{ + bool thrown = false; + try + { + buf.append_arrow(tbl, arr, sch); + } + catch (const qdb::line_sender_error& e) + { + thrown = true; + CHECK(e.code() == expected_code); + } + REQUIRE(thrown); + if (arr.release) + arr.release(&arr); + if (sch.release) + sch.release(&sch); +} + +} // namespace + +// NULL-pointer / contract tests for the C ABI live in `test_arrow_c.c`. +// The C++ wrapper takes references and validated views, so equivalents +// here would be untestable at compile time. + +// --------------------------------------------------------------------------- +// Primitive type dispatch — each Arrow format code routes to the right +// QuestDB column setter. +// --------------------------------------------------------------------------- + +TEST_CASE("arrow ingress: Boolean column") +{ + auto buf = qdb::line_sender_buffer::qwp_ws(); + // Boolean values are bit-packed in Arrow C ABI: 1 byte per 8 rows. + auto values = std::make_shared>(std::vector{0b00000101}); + auto arr = make_array(3, 0, {nullptr, values}); + auto sch = make_schema("b", "flag"); + append_ok(buf, "t_bool", arr, sch); +} + +TEST_CASE("arrow ingress: Int8 / Int16 / Int32 / Int64 columns") +{ + { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({-1, 0, 127}); + auto arr = make_array(3, 0, {nullptr, col}); + auto sch = make_schema("c", "by"); + append_ok(buf, "t_i8", arr, sch); + } + { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({-1234, 0, 31000}); + auto arr = make_array(3, 0, {nullptr, col}); + auto sch = make_schema("s", "sh"); + append_ok(buf, "t_i16", arr, sch); + } + { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({-1, 0, 0x7FFFFFFF}); + auto arr = make_array(3, 0, {nullptr, col}); + auto sch = make_schema("i", "in"); + append_ok(buf, "t_i32", arr, sch); + } + { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({-1, 0, 0x7FFFFFFF'FFFFFFFFLL}); + auto arr = make_array(3, 0, {nullptr, col}); + auto sch = make_schema("l", "lo"); + append_ok(buf, "t_i64", arr, sch); + } +} + +TEST_CASE("arrow ingress: Float32 / Float64 columns") +{ + { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({1.5f, -2.5f, 3.14f}); + auto arr = make_array(3, 0, {nullptr, col}); + auto sch = make_schema("f", "f3"); + append_ok(buf, "t_f32", arr, sch); + } + { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({1.5, -2.5, 3.14159}); + auto arr = make_array(3, 0, {nullptr, col}); + auto sch = make_schema("g", "f6"); + append_ok(buf, "t_f64", arr, sch); + } +} + +TEST_CASE("arrow ingress: UInt16 + questdb.column_type=char routes to column_char") +{ + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({0x41, 0x42, 0x43}); + auto arr = make_array(3, 0, {nullptr, col}); + auto sch = make_schema("S", "c"); // Arrow "S" = UInt16 + // Build an Arrow-spec metadata blob with one key/value: + // {key: "questdb.column_type", value: "char"}. + // Arrow spec layout: i32 n_keys, then per pair: i32 key_len, key bytes, i32 val_len, val bytes. + // We use a static buffer that outlives the call. + static const char md[] = + "\x01\x00\x00\x00" // n=1 + "\x13\x00\x00\x00" + "questdb.column_type" + "\x04\x00\x00\x00" + "char"; + sch.metadata = md; + append_ok(buf, "t_char", arr, sch); +} + +TEST_CASE("arrow ingress: UInt32 + questdb.column_type=ipv4 routes to column_ipv4") +{ + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({0x0A000001u, 0xC0A80001u}); + auto arr = make_array(2, 0, {nullptr, col}); + auto sch = make_schema("I", "ip"); + static const char md[] = + "\x01\x00\x00\x00" + "\x13\x00\x00\x00questdb.column_type" + "\x04\x00\x00\x00ipv4"; + sch.metadata = md; + append_ok(buf, "t_ipv4", arr, sch); +} + +TEST_CASE("arrow ingress: Utf8 / Binary / LargeUtf8 / LargeBinary") +{ + auto build_utf8 = []() { + auto offsets = std::make_shared>(); + for (int32_t off : {0, 5, 5, 7}) + { + const uint8_t* p = reinterpret_cast(&off); + offsets->insert(offsets->end(), p, p + 4); + } + auto data = std::make_shared>( + std::vector{'h', 'e', 'l', 'l', 'o', 'y', 'o'}); + return std::make_pair(offsets, data); + }; + + { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto pair = build_utf8(); + auto arr = make_array(3, 0, {nullptr, pair.first, pair.second}); + auto sch = make_schema("u", "name"); + append_ok(buf, "t_utf8", arr, sch); + } + { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto pair = build_utf8(); + auto arr = make_array(3, 0, {nullptr, pair.first, pair.second}); + auto sch = make_schema("z", "blob"); + append_ok(buf, "t_binary", arr, sch); + } +} + +TEST_CASE("arrow ingress: FixedSizeBinary(16) + arrow.uuid extension → column_uuid") +{ + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto data = std::make_shared>(); + for (int i = 0; i < 32; ++i) + data->push_back(static_cast(i)); + auto arr = make_array(2, 0, {nullptr, data}); + auto sch = make_schema("w:16", "id"); + static const char md[] = + "\x01\x00\x00\x00" + "\x14\x00\x00\x00" + "ARROW:extension:name" + "\x0A\x00\x00\x00" + "arrow.uuid"; + sch.metadata = md; + append_ok(buf, "t_uuid", arr, sch); +} + +TEST_CASE("arrow ingress: FixedSizeBinary(16) without UUID metadata → ArrowUnsupportedColumnKind") +{ + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto data = std::make_shared>(std::vector(16, 0)); + auto arr = make_array(1, 0, {nullptr, data}); + auto sch = make_schema("w:16", "id"); + append_expect_error( + buf, + "t_unsup", + arr, + sch, + qdb::line_sender_error_code::arrow_unsupported_column_kind); +} + +TEST_CASE("arrow ingress: FixedSizeBinary(32) → column_long256") +{ + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto data = std::make_shared>(std::vector(64, 0xAB)); + auto arr = make_array(2, 0, {nullptr, data}); + auto sch = make_schema("w:32", "l256"); + append_ok(buf, "t_l256", arr, sch); +} + +TEST_CASE("arrow ingress: Timestamp(µs) / Timestamp(ns) / Timestamp(ms)") +{ + auto build_ts_col = [](const char* fmt, int64_t v0, int64_t v1) { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({v0, v1}); + auto arr = make_array(2, 0, {nullptr, col}); + auto sch = make_schema(fmt, "ts"); + append_ok(buf, "t_ts", arr, sch); + }; + build_ts_col("tsu:UTC", 1700000000000000LL, 1700000000000001LL); + build_ts_col("tsn:UTC", 1700000000000000000LL, 1700000000000000001LL); + build_ts_col("tsm:UTC", 1700000000000LL, 1700000000001LL); +} + +// --------------------------------------------------------------------------- +// Designated-timestamp dispatch. +// --------------------------------------------------------------------------- + +TEST_CASE("arrow ingress: DTS=Column picks per-row ts from the named ts column") +{ + auto buf = qdb::line_sender_buffer::qwp_ws(); + + // Two columns: ts (Timestamp µs UTC) + v (Int64). + auto ts_col = pack_le({1700000000000000LL, 1700000000000001LL}); + auto v_col = pack_le({10, 20}); + + auto ts_arr = std::make_unique(make_array(2, 0, {nullptr, ts_col})); + auto v_arr = std::make_unique(make_array(2, 0, {nullptr, v_col})); + + auto ts_sch = std::make_unique(make_schema("tsu:UTC", "ts")); + auto v_sch = std::make_unique(make_schema("l", "v")); + + // Build the outer struct. + Owner* outer_owner = new Owner; + outer_owner->children_storage.push_back(std::move(ts_arr)); + outer_owner->children_storage.push_back(std::move(v_arr)); + outer_owner->children_ptrs.push_back(outer_owner->children_storage[0].get()); + outer_owner->children_ptrs.push_back(outer_owner->children_storage[1].get()); + + ArrowArray outer_arr; + std::memset(&outer_arr, 0, sizeof(outer_arr)); + outer_arr.length = 2; + outer_arr.n_buffers = 1; // struct has 1 buffer: the validity bitmap + outer_arr.n_children = 2; + outer_arr.children = outer_owner->children_ptrs.data(); + outer_arr.release = release_owner; + outer_arr.private_data = outer_owner; + static const void* outer_buf_slot[1] = {nullptr}; + outer_arr.buffers = outer_buf_slot; + + ArrowSchema outer_sch; + std::memset(&outer_sch, 0, sizeof(outer_sch)); + outer_sch.format = "+s"; + outer_sch.n_children = 2; + static ArrowSchema* child_schema_ptrs[2]; + child_schema_ptrs[0] = ts_sch.get(); + child_schema_ptrs[1] = v_sch.get(); + outer_sch.children = child_schema_ptrs; + outer_sch.release = schema_release_noop; + + try + { + buf.append_arrow( + "t_dts_col", outer_arr, outer_sch, qdb::column_name_view{"ts"}); + } + catch (const qdb::line_sender_error& e) + { + FAIL("DTS=Column failed: " << e.what()); + } + ts_sch->release = nullptr; + v_sch->release = nullptr; +} + +TEST_CASE("arrow ingress: default append omits per-row timestamp (server stamps)") +{ + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({10, 20}); + auto arr = make_array(2, 0, {nullptr, col}); + auto sch = make_schema("l", "v"); + append_ok(buf, "t_dts_default", arr, sch); +} + +// --------------------------------------------------------------------------- +// Decimal dispatch — verifies wire-through to column_dec64 / dec128 / dec. +// --------------------------------------------------------------------------- + +TEST_CASE("arrow ingress: Decimal64 / Decimal128 / Decimal256") +{ + // Decimal64 (i64 mantissa, scale=2). + // Format must carry explicit ",64" — Arrow C Data Interface defaults + // `"d:p,s"` (no bitwidth) to Decimal128, not Decimal64. + { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({12345, 67890}); + auto arr = make_array(2, 0, {nullptr, col}); + auto sch = make_schema("d:18,2,64", "d64"); + append_ok(buf, "t_d64", arr, sch); + } + // Decimal128 (i128 mantissa, scale=3). + { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto data = std::make_shared>(std::vector(32, 0)); + auto arr = make_array(2, 0, {nullptr, data}); + auto sch = make_schema("d:38,3", "d128"); + append_ok(buf, "t_d128", arr, sch); + } + // Decimal256 (i256 mantissa, scale=5). + { + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto data = std::make_shared>(std::vector(64, 0)); + auto arr = make_array(2, 0, {nullptr, data}); + auto sch = make_schema("d:76,5,256", "d256"); + append_ok(buf, "t_d256", arr, sch); + } +} + +TEST_CASE("arrow ingress: Int32 + questdb.geohash_bits routes to column_geohash") +{ + auto buf = qdb::line_sender_buffer::qwp_ws(); + auto col = pack_le({0x1FFFF, 0x10000}); + auto arr = make_array(2, 0, {nullptr, col}); + auto sch = make_schema("i", "g"); + static const char md[] = + "\x01\x00\x00\x00" + "\x14\x00\x00\x00" "questdb.geohash_bits" + "\x02\x00\x00\x00" "20"; + sch.metadata = md; + append_ok(buf, "t_geo", arr, sch); +} diff --git a/examples/line_reader_c_example_arrow.c b/examples/line_reader_c_example_arrow.c new file mode 100644 index 00000000..1684a141 --- /dev/null +++ b/examples/line_reader_c_example_arrow.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include + +static void print_batch(const struct ArrowArray* arr, const struct ArrowSchema* sch) +{ + for (int64_t c = 0; c < sch->n_children; ++c) + { + if (c != 0) + printf("\t"); + printf("%s", sch->children[c]->name ? sch->children[c]->name : ""); + } + printf("\n"); + + for (int64_t r = 0; r < arr->length; ++r) + { + for (int64_t c = 0; c < arr->n_children; ++c) + { + const struct ArrowArray* col = arr->children[c]; + const char* fmt = sch->children[c]->format; + if (c != 0) + printf("\t"); + + if (strcmp(fmt, "l") == 0 || strcmp(fmt, "i") == 0) + { + int64_t v; + if (fmt[0] == 'l') + v = ((const int64_t*)col->buffers[1])[r + col->offset]; + else + v = ((const int32_t*)col->buffers[1])[r + col->offset]; + printf("%" PRId64, v); + } + else if (strcmp(fmt, "g") == 0 || strcmp(fmt, "f") == 0) + { + double v; + if (fmt[0] == 'g') + v = ((const double*)col->buffers[1])[r + col->offset]; + else + v = ((const float*)col->buffers[1])[r + col->offset]; + printf("%g", v); + } + else + { + printf("(format=%s)", fmt); + } + } + printf("\n"); + } +} + +int main(int argc, const char* argv[]) +{ + (void)argc; + (void)argv; + + line_reader_error* err = NULL; + line_reader* reader = NULL; + line_reader_cursor* cursor = NULL; + + line_sender_utf8 conf = QDB_UTF8_LITERAL("ws::addr=localhost:9000;"); + reader = line_reader_from_conf(conf, &err); + if (!reader) + goto on_error; + + line_sender_utf8 sql = QDB_UTF8_LITERAL( + "SELECT x AS n, x * 1.5 AS d FROM long_sequence(5)"); + cursor = line_reader_execute(reader, sql, &err); + if (!cursor) + goto on_error; + + for (;;) + { + struct ArrowArray arr; + struct ArrowSchema sch; + line_reader_arrow_batch_result rc = + line_reader_cursor_next_arrow_batch(cursor, &arr, &sch, &err); + if (rc == line_reader_arrow_batch_end) + break; + if (rc == line_reader_arrow_batch_error) + goto on_error; + + print_batch(&arr, &sch); + + if (arr.release) + arr.release(&arr); + if (sch.release) + sch.release(&sch); + } + + line_reader_cursor_free(cursor); + line_reader_close(reader); + return 0; + +on_error:; + size_t err_len = 0; + const char* err_msg = line_reader_error_msg(err, &err_len); + fprintf(stderr, "Error: %.*s\n", (int)err_len, err_msg); + line_reader_error_free(err); + line_reader_cursor_free(cursor); + line_reader_close(reader); + return 1; +} diff --git a/examples/line_reader_cpp_example_arrow.cpp b/examples/line_reader_cpp_example_arrow.cpp new file mode 100644 index 00000000..95d4e6d9 --- /dev/null +++ b/examples/line_reader_cpp_example_arrow.cpp @@ -0,0 +1,67 @@ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace { + +namespace egress = questdb::egress; +namespace ingress = questdb::ingress; + +bool example() +{ + try + { + egress::reader reader{ingress::utf8_view{"ws::addr=localhost:9000;"}}; + auto cursor = reader.execute(ingress::utf8_view{ + "SELECT x AS n, x * 1.5 AS d FROM long_sequence(5)"}); + + while (auto batch = cursor.next_arrow_batch()) + { + // `arrow::ImportRecordBatch` consumes the release callbacks on + // success; both `batch->array.release` and + // `batch->schema.release` are zeroed by Arrow afterwards. + auto rb_res = + arrow::ImportRecordBatch(&batch->array, &batch->schema); + if (!rb_res.ok()) + { + std::fprintf( + stderr, "ImportRecordBatch: %s\n", + rb_res.status().ToString().c_str()); + if (batch->array.release) + batch->array.release(&batch->array); + if (batch->schema.release) + batch->schema.release(&batch->schema); + return false; + } + const auto& rb = *rb_res; + std::cout << rb->schema()->ToString() << "\n"; + auto pp = arrow::PrettyPrint(*rb, {}, &std::cout); + (void)pp; + std::cout << "\n"; + } + return true; + } + catch (const egress::line_reader_error& e) + { + std::fprintf(stderr, "Error: %s\n", e.what()); + return false; + } +} + +} // namespace + +int main(int argc, const char* argv[]) +{ + (void)argc; + (void)argv; + return example() ? 0 : 1; +} diff --git a/examples/line_sender_cpp_example_arrow.cpp b/examples/line_sender_cpp_example_arrow.cpp new file mode 100644 index 00000000..032858ff --- /dev/null +++ b/examples/line_sender_cpp_example_arrow.cpp @@ -0,0 +1,81 @@ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace { + +namespace qdb = questdb::ingress; + +std::shared_ptr build_batch() +{ + auto pool = arrow::default_memory_pool(); + arrow::TimestampBuilder ts_b( + arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"), pool); + arrow::DoubleBuilder price_b(pool); + + constexpr int64_t base = 1700000000000000LL; + ts_b.AppendValues({base, base + 1, base + 2}).ok(); + price_b.AppendValues({2615.54, 2615.55, 2615.50}).ok(); + + std::shared_ptr ts_arr, price_arr; + ts_b.Finish(&ts_arr).ok(); + price_b.Finish(&price_arr).ok(); + + auto schema = arrow::schema( + {arrow::field("ts", ts_arr->type()), + arrow::field("price", arrow::float64())}); + return arrow::RecordBatch::Make(schema, ts_arr->length(), {ts_arr, price_arr}); +} + +bool example(const std::string& host, const std::string& port) +{ + try + { + const std::string conf_str = "qwpws::addr=" + host + ":" + port + ";"; + auto sender = qdb::line_sender::from_conf(conf_str); + auto buffer = sender.new_buffer(); + + auto batch = build_batch(); + ArrowArray c_arr{}; + ArrowSchema c_sch{}; + auto st = arrow::ExportRecordBatch(*batch, &c_arr, &c_sch); + if (!st.ok()) + { + std::fprintf(stderr, "ExportRecordBatch: %s\n", st.ToString().c_str()); + return false; + } + + // Designated timestamp pulled from the "ts" column. `c_arr` is + // consumed by the call; `c_sch` is borrowed (we release it). + buffer.append_arrow( + "cpp_arrow_trades", c_arr, c_sch, qdb::column_name_view{"ts"}); + if (c_sch.release) + c_sch.release(&c_sch); + + sender.flush(buffer); + return true; + } + catch (const qdb::line_sender_error& e) + { + std::fprintf(stderr, "Error: %s\n", e.what()); + return false; + } +} + +} // namespace + +int main(int argc, const char* argv[]) +{ + const std::string host = (argc >= 2) ? argv[1] : "localhost"; + const std::string port = (argc >= 3) ? argv[2] : "9000"; + return example(host, port) ? 0 : 1; +} diff --git a/include/questdb/egress/line_reader.h b/include/questdb/egress/line_reader.h index a58eecdd..87dffd34 100644 --- a/include/questdb/egress/line_reader.h +++ b/include/questdb/egress/line_reader.h @@ -35,7 +35,7 @@ extern "C" { /* Reuse `line_sender_utf8` for validated UTF-8 strings, and the `QUESTDB_CLIENT_API` / `QUESTDB_CLIENT_DYN_LIB` linkage macros. */ -#include "../ingress/line_sender.h" +#include /////////// Thread safety. // @@ -193,6 +193,23 @@ typedef enum line_reader_error_code * connect failover (before any batch is yielded) is unaffected * and remains transparent. */ line_reader_error_failover_would_duplicate = 21, + /** Streaming Arrow adapter saw a mid-stream schema change. The + * cursor remains usable; its pinned schema snapshot is cleared + * by this error, so the next + * `line_reader_cursor_next_arrow_batch` call snapshots the new + * schema and resumes streaming. The batch that triggered the + * drift is discarded — re-issue the query if you need it. Only + * emitted when the `arrow` feature is enabled. */ + line_reader_error_schema_drift = 22, + /** `line_reader_cursor_next_arrow_batch` was called on a stream + * that terminated before any batch was produced — no schema to + * snapshot. Only emitted when the `arrow` feature is enabled. */ + line_reader_error_no_schema = 23, + /** Arrow C Data Interface export failed (arrow-rs rejected the + * produced `ArrayData`'s invariants). Indicates a client bug — + * not user-recoverable. Only emitted when the `arrow` feature + * is enabled. */ + line_reader_error_arrow_export = 24, } line_reader_error_code; /** @@ -1748,6 +1765,46 @@ static inline bool line_reader_column_data_get_symbol( return true; } +#ifdef QUESTDB_CLIENT_ENABLE_ARROW + +/** + * Tri-state return for `line_reader_cursor_next_arrow_batch`. + */ +typedef enum line_reader_arrow_batch_result +{ + /** A batch was decoded and `out_array` / `out_schema` are populated. */ + line_reader_arrow_batch_ok = 0, + /** End of stream; `out_*` are unchanged and no error was produced. */ + line_reader_arrow_batch_end = 1, + /** Decode failed; `out_*` are unchanged and `out_err` is populated. */ + line_reader_arrow_batch_error = 2, +} line_reader_arrow_batch_result; + +/** + * Advance the cursor by one RESULT_BATCH and export it as an Arrow + * C Data Interface array + schema. `out_array` / `out_schema` must be + * caller-allocated AND uninitialised on each call: either zero-initialised + * memory or storage whose previous `release` callback has already been + * invoked. The implementation overwrites the slots without inspecting + * their prior contents, so a non-released previous result would leak its + * buffers. On `_ok` the slots are filled in place and the caller owns + * the new release callback contract. On `_end` / `_error` they are left + * untouched. + * + * Mid-stream schema drift (the underlying QuestDB table altered between + * batches) surfaces as `line_reader_error_schema_drift` (= 22) on the + * call that detects it; the cursor's pinned schema snapshot is then + * cleared so the next call snapshots the new schema and resumes. The + * batch that triggered the drift is discarded. + */ +QUESTDB_CLIENT_API +line_reader_arrow_batch_result line_reader_cursor_next_arrow_batch( + line_reader_cursor* cursor, + struct ArrowArray* out_array, + struct ArrowSchema* out_schema, + line_reader_error** err_out); +#endif /* QUESTDB_CLIENT_ENABLE_ARROW */ + #ifdef __cplusplus } #endif diff --git a/include/questdb/egress/line_reader.hpp b/include/questdb/egress/line_reader.hpp index 3260c17f..99b0273e 100644 --- a/include/questdb/egress/line_reader.hpp +++ b/include/questdb/egress/line_reader.hpp @@ -96,6 +96,21 @@ enum class error_code : int server_limit_exceeded = ::line_reader_error_server_limit_exceeded, cancelled = ::line_reader_error_cancelled, failover_would_duplicate = ::line_reader_error_failover_would_duplicate, + + /** Streaming Arrow adapter observed a mid-stream schema change. The + * cursor is still usable; re-call `next_arrow_batch` after dropping + * any partial state to snapshot the new schema. Only raised with + * the `arrow` feature enabled. */ + schema_drift = ::line_reader_error_schema_drift, + /** `next_arrow_batch` was called on a stream that terminated before + * any batch was produced — no schema to snapshot. Only raised with + * the `arrow` feature enabled. */ + no_schema = ::line_reader_error_no_schema, + /** Arrow C Data Interface export failed (arrow-rs rejected the + * produced `ArrayData`'s invariants). Indicates a client bug — + * not user-recoverable. Only raised with the `arrow` feature + * enabled. */ + arrow_export = ::line_reader_error_arrow_export, }; /** @@ -2447,6 +2462,107 @@ class cursor return egress::batch{p}; } +#ifdef QUESTDB_CLIENT_ENABLE_ARROW + /** + * Result of `next_arrow_batch`. Aggregate of the two Apache Arrow + * C Data Interface structs the C entry point fills in. + * + * Ownership: the caller of `next_arrow_batch` owns the `array` and + * `schema` returned here. After processing, the caller MUST either: + * - Invoke `array.release(&array)` and `schema.release(&schema)` + * directly, or + * - Transfer ownership to an Arrow consumer such as + * `arrow::ImportRecordBatch(&array, &schema)`, which zeros the + * release callbacks on success so subsequent manual release + * calls become no-ops. + */ + struct arrow_batch + { + ::ArrowArray array; + ::ArrowSchema schema; + + arrow_batch() noexcept : array{}, schema{} {} + arrow_batch(const arrow_batch&) = delete; + arrow_batch& operator=(const arrow_batch&) = delete; + + arrow_batch(arrow_batch&& other) noexcept + : array(other.array), schema(other.schema) + { + // Zero the source so its destructor skips release() and so + // any post-move access (`other.array.length`, `.buffers[0]`, + // children, etc.) reads zeros instead of pointers that now + // alias destination-owned memory. + std::memset(&other.array, 0, sizeof(other.array)); + std::memset(&other.schema, 0, sizeof(other.schema)); + } + + arrow_batch& operator=(arrow_batch&& other) noexcept + { + if (this != &other) + { + release_in_place(); + array = other.array; + schema = other.schema; + std::memset(&other.array, 0, sizeof(other.array)); + std::memset(&other.schema, 0, sizeof(other.schema)); + } + return *this; + } + + ~arrow_batch() noexcept { release_in_place(); } + + private: + void release_in_place() noexcept + { + if (array.release) + { + array.release(&array); + array.release = nullptr; + } + if (schema.release) + { + schema.release(&schema); + schema.release = nullptr; + } + } + }; + + /** + * Advance to the next batch and export it via the Apache Arrow + * C Data Interface. + * + * @return `std::nullopt` when the stream terminates normally + * (no further batches). + * @return An owned `arrow_batch` on success. See the struct's + * documentation for release responsibilities. + * @throws line_reader_error on transport / protocol failure or any + * Arrow-specific error (`schema_drift`, `no_schema`, + * `arrow_export`). + * + * Unlike `next_batch`, the returned `arrow_batch` is NOT invalidated + * by subsequent cursor operations — it owns its release callbacks + * and is independent of the cursor lifetime. + */ + std::optional next_arrow_batch() + { + ensure_impl(); + ::line_reader_error* c_err{nullptr}; + arrow_batch out{}; + const auto rc = ::line_reader_cursor_next_arrow_batch( + _impl, &out.array, &out.schema, &c_err); + switch (rc) + { + case ::line_reader_arrow_batch_ok: + return out; + case ::line_reader_arrow_batch_end: + return std::nullopt; + case ::line_reader_arrow_batch_error: + default: + throw line_reader_error::from_c(c_err); + } + } +#endif /* QUESTDB_CLIENT_ENABLE_ARROW */ + // ---- Introspection ----------------------------------------------------- /** @throws line_reader_error if this cursor has been moved from. */ diff --git a/include/questdb/ingress/line_sender.h b/include/questdb/ingress/line_sender.h index 3658f855..c44b083f 100644 --- a/include/questdb/ingress/line_sender.h +++ b/include/questdb/ingress/line_sender.h @@ -79,53 +79,66 @@ extern "C" { /** An error that occurred when using the line sender. */ typedef struct line_sender_error line_sender_error; -/** Category of error. */ +/** Category of error. + * + * Append-only: reordering or inserting in the middle breaks ABI. */ typedef enum line_sender_error_code { /** The host, port, or interface was incorrect. */ - line_sender_error_could_not_resolve_addr, + line_sender_error_could_not_resolve_addr = 0, /** Called methods in the wrong order. E.g. `symbol` after `column`. */ - line_sender_error_invalid_api_call, + line_sender_error_invalid_api_call = 1, /** A network error connecting or flushing data out. */ - line_sender_error_socket_error, + line_sender_error_socket_error = 2, /** The string or symbol field is not encoded in valid UTF-8. */ - line_sender_error_invalid_utf8, + line_sender_error_invalid_utf8 = 3, /** The table name or column name contains bad characters. */ - line_sender_error_invalid_name, + line_sender_error_invalid_name = 4, /** The supplied timestamp is invalid. */ - line_sender_error_invalid_timestamp, + line_sender_error_invalid_timestamp = 5, /** Error during the authentication process. */ - line_sender_error_auth_error, + line_sender_error_auth_error = 6, /** Error during TLS handshake. */ - line_sender_error_tls_error, + line_sender_error_tls_error = 7, /** The server does not support ILP over HTTP. */ - line_sender_error_http_not_supported, + line_sender_error_http_not_supported = 8, /** Error sent back from the server during flush. */ - line_sender_error_server_flush_error, + line_sender_error_server_flush_error = 9, /** Bad configuration. */ - line_sender_error_config_error, + line_sender_error_config_error = 10, /** There was an error serializing an array. */ - line_sender_error_array_error, + line_sender_error_array_error = 11, /** Line sender protocol version error. */ - line_sender_error_protocol_version_error, + line_sender_error_protocol_version_error = 12, /** The supplied decimal is invalid. */ - line_sender_error_invalid_decimal, + line_sender_error_invalid_decimal = 13, /** QWP/WebSocket server rejection or terminal protocol violation. */ - line_sender_error_server_rejection, + line_sender_error_server_rejection = 14, + + /** Arrow column whose kind cannot be persisted (e.g. + * `FixedSizeBinary(16)` without `arrow.uuid` extension metadata; + * `ARRAY(LONG, N-D)` is egress-only; nested-list leaf must be + * `Float64`). `arrow` feature only. */ + line_sender_error_arrow_unsupported_column_kind = 15, + + /** RecordBatch failed client-side structural validation + * (column count, name encoding, C Data Interface contract). + * `arrow` feature only. */ + line_sender_error_arrow_ingest = 16, } line_sender_error_code; /** The protocol used to connect with. */ @@ -428,6 +441,14 @@ QUESTDB_CLIENT_API line_sender_buffer* line_sender_buffer_new_qwp_with_max_name_len( size_t max_name_len); +/** + * Construct a QWP/WebSocket columnar `line_sender_buffer` with a 127-byte + * name length limit. This is the buffer kind required by + * `line_sender_buffer_append_arrow`. + */ +QUESTDB_CLIENT_API +line_sender_buffer* line_sender_buffer_new_qwp_ws(void); + /** Release the `line_sender_buffer` object. */ QUESTDB_CLIENT_API void line_sender_buffer_free(line_sender_buffer* buffer); @@ -1975,6 +1996,91 @@ int64_t line_sender_now_nanos(void); QUESTDB_CLIENT_API int64_t line_sender_now_micros(void); +#ifdef QUESTDB_CLIENT_ENABLE_ARROW +/* Apache Arrow C Data Interface (feature: arrow). + * https://arrow.apache.org/docs/format/CDataInterface.html */ + +#ifndef ARROW_C_DATA_INTERFACE +# define ARROW_C_DATA_INTERFACE + +# define ARROW_FLAG_DICTIONARY_ORDERED 1 +# define ARROW_FLAG_NULLABLE 2 +# define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema +{ + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + void (*release)(struct ArrowSchema*); + void* private_data; +}; + +struct ArrowArray +{ + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + void (*release)(struct ArrowArray*); + void* private_data; +}; + +#endif /* ARROW_C_DATA_INTERFACE */ + +/** + * Append every row of a `RecordBatch` (Arrow C Data Interface) to `buffer`. + * The per-row designated timestamp is not sent — the server stamps each row + * on arrival (same semantics as `line_sender_buffer_at_now`). + * + * `array` may be either: + * - A Struct array (one child per column, the standard RecordBatch shape), or + * - A non-Struct (single-column) array whose `schema->name` becomes the + * column name. + * + * Ownership: `array` is consumed once input validation passes + * (non-NULL pointers, schema depth within bounds) — `array->release` + * is cleared and the imported buffers are dropped on every subsequent + * return path. If validation fails first (NULL or over-deep schema), + * `array->release` is left untouched. `schema` is always borrowed. + * + * Server-side type-mismatch surfaces from the next `line_sender_flush`. + */ +QUESTDB_CLIENT_API +bool line_sender_buffer_append_arrow( + line_sender_buffer* buffer, + line_sender_table_name table, + struct ArrowArray* array, + const struct ArrowSchema* schema, + line_sender_error** err_out); + +/** + * Append every row of a `RecordBatch`, sourcing the per-row designated + * timestamp from a named `Timestamp(_)` column inside the batch. + * + * Same ownership and shape contract as `line_sender_buffer_append_arrow`. + * `ts_column` must be initialised via `line_sender_column_name_init` and + * name a `Timestamp(Microsecond | Nanosecond | Millisecond, _)` column + * with no null rows. + */ +QUESTDB_CLIENT_API +bool line_sender_buffer_append_arrow_at_column( + line_sender_buffer* buffer, + line_sender_table_name table, + struct ArrowArray* array, + const struct ArrowSchema* schema, + line_sender_column_name ts_column, + line_sender_error** err_out); +#endif /* QUESTDB_CLIENT_ENABLE_ARROW */ + #ifdef __cplusplus } #endif diff --git a/include/questdb/ingress/line_sender.hpp b/include/questdb/ingress/line_sender.hpp index 7bc3fd15..c321d20c 100644 --- a/include/questdb/ingress/line_sender.hpp +++ b/include/questdb/ingress/line_sender.hpp @@ -98,7 +98,41 @@ class line_sender_buffer protocol_version::v1, init_buf_size, max_name_len, - true}; + _backend_kind::qwp_udp}; + } + + /** + * Construct a standalone QWP/WebSocket columnar buffer. Required + * by `append_arrow`; also accepts the row-by-row `table` / + * `symbol` / `column` / `at` API. + * + * For protocol-neutral construction tied to a sender instance, + * prefer `line_sender::new_buffer()`. + * + * @param init_buf_size Hint passed to `line_sender_buffer_reserve` + * for the initial capacity of the underlying + * column storage. + * @throws line_sender_error if the initial reserve fails. + */ + static line_sender_buffer qwp_ws(size_t init_buf_size = 64 * 1024) + { + auto* raw_buffer = ::line_sender_buffer_new_qwp_ws(); + try + { + line_sender_error::wrapped_call( + ::line_sender_buffer_reserve, raw_buffer, init_buf_size); + } + catch (...) + { + ::line_sender_buffer_free(raw_buffer); + throw; + } + return line_sender_buffer{ + raw_buffer, + protocol_version::v1, + init_buf_size, + 127, + _backend_kind::qwp_ws}; } line_sender_buffer(const line_sender_buffer& other) @@ -110,7 +144,7 @@ class line_sender_buffer , _protocol_version{other._protocol_version} , _init_buf_size{other._init_buf_size} , _max_name_len{other._max_name_len} - , _is_qwp{other._is_qwp} + , _backend{other._backend} { } @@ -120,7 +154,7 @@ class line_sender_buffer , _protocol_version{other._protocol_version} , _init_buf_size{other._init_buf_size} , _max_name_len{other._max_name_len} - , _is_qwp{other._is_qwp} + , _backend{other._backend} { other._impl = nullptr; @@ -142,7 +176,7 @@ class line_sender_buffer _init_buf_size = other._init_buf_size; _max_name_len = other._max_name_len; _protocol_version = other._protocol_version; - _is_qwp = other._is_qwp; + _backend = other._backend; } return *this; } @@ -156,7 +190,7 @@ class line_sender_buffer _init_buf_size = other._init_buf_size; _max_name_len = other._max_name_len; _protocol_version = other._protocol_version; - _is_qwp = other._is_qwp; + _backend = other._backend; other._impl = nullptr; } return *this; @@ -1117,6 +1151,58 @@ class line_sender_buffer line_sender_error::wrapped_call(::line_sender_buffer_at_now, _impl); } +#ifdef QUESTDB_CLIENT_ENABLE_ARROW + /** + * Append every row of an Apache Arrow `RecordBatch` to the buffer. + * Per-row timestamp is not sent; the server stamps each row on + * arrival (same semantics as `at_now()`). + * + * Requires a QWP/WebSocket buffer. `schema` is borrowed. + * `array` is consumed once control reaches the underlying C call; + * if `may_init()` throws first (e.g. lazy buffer reserve fails), + * `array` is left untouched and the caller retains ownership. + * `array` may be a Struct top-level array or a non-Struct + * single-column array. + * + * @throws line_sender_error on validation or classification failure. + */ + void append_arrow( + table_name_view table, + ::ArrowArray& array, + const ::ArrowSchema& schema) + { + may_init(); + line_sender_error::wrapped_call( + ::line_sender_buffer_append_arrow, + _impl, + table._impl, + &array, + &schema); + } + + /** + * Append an Arrow `RecordBatch`, sourcing the per-row designated + * timestamp from a named column inside the batch. The column must + * be `Timestamp(Microsecond | Nanosecond | Millisecond, _)` with + * no null rows. + */ + void append_arrow( + table_name_view table, + ::ArrowArray& array, + const ::ArrowSchema& schema, + column_name_view ts_column) + { + may_init(); + line_sender_error::wrapped_call( + ::line_sender_buffer_append_arrow_at_column, + _impl, + table._impl, + &array, + &schema, + ts_column._impl); + } +#endif /* QUESTDB_CLIENT_ENABLE_ARROW */ + void check_can_flush() const { if (!_impl) @@ -1137,17 +1223,24 @@ class line_sender_buffer } private: + enum class _backend_kind + { + ilp, + qwp_udp, + qwp_ws + }; + line_sender_buffer( ::line_sender_buffer* impl, protocol_version version, size_t init_buf_size, size_t max_name_len, - bool is_qwp = false) noexcept + _backend_kind backend = _backend_kind::ilp) noexcept : _impl{impl} , _protocol_version{version} , _init_buf_size{init_buf_size} , _max_name_len{max_name_len} - , _is_qwp{is_qwp} + , _backend{backend} { } @@ -1156,17 +1249,21 @@ class line_sender_buffer if (!_impl) { ::line_sender_buffer* tmp = nullptr; - if (_is_qwp) + switch (_backend) { + case _backend_kind::qwp_ws: + tmp = ::line_sender_buffer_new_qwp_ws(); + break; + case _backend_kind::qwp_udp: tmp = ::line_sender_buffer_new_qwp_with_max_name_len( _max_name_len); - } - else - { + break; + case _backend_kind::ilp: tmp = ::line_sender_buffer_with_max_name_len( static_cast<::line_sender_protocol_version>( static_cast(_protocol_version)), _max_name_len); + break; } try { @@ -1186,7 +1283,7 @@ class line_sender_buffer protocol_version _protocol_version; size_t _init_buf_size; size_t _max_name_len; - bool _is_qwp{false}; + _backend_kind _backend{_backend_kind::ilp}; friend class line_sender; }; @@ -1801,9 +1898,13 @@ class line_sender auto version = this->protocol_version(); auto max_name_len = ::line_sender_get_max_name_len(_impl); auto sender_protocol = this->protocol(); - bool is_qwp = sender_protocol == protocol::qwpudp || + auto backend = line_sender_buffer::_backend_kind::ilp; + if (sender_protocol == protocol::qwpudp) + backend = line_sender_buffer::_backend_kind::qwp_udp; + else if ( sender_protocol == protocol::qwpws || - sender_protocol == protocol::qwpwss; + sender_protocol == protocol::qwpwss) + backend = line_sender_buffer::_backend_kind::qwp_ws; auto* raw_buffer = ::line_sender_buffer_new_for_sender(_impl); try { @@ -1816,11 +1917,7 @@ class line_sender throw; } return line_sender_buffer{ - raw_buffer, - version, - init_buf_size, - max_name_len, - is_qwp}; + raw_buffer, version, init_buf_size, max_name_len, backend}; } /** diff --git a/include/questdb/ingress/line_sender_core.hpp b/include/questdb/ingress/line_sender_core.hpp index 85c166b2..b22627d2 100644 --- a/include/questdb/ingress/line_sender_core.hpp +++ b/include/questdb/ingress/line_sender_core.hpp @@ -96,6 +96,16 @@ enum class line_sender_error_code /** QWP/WebSocket server rejection or terminal protocol violation. */ server_rejection, + + /** `line_sender_buffer::append_arrow` was passed a column whose Arrow + * type / metadata combination has no QuestDB ingress mapping. + * Only raised with the `arrow` feature enabled. */ + arrow_unsupported_column_kind, + + /** `line_sender_buffer::append_arrow` rejected a `RecordBatch` at the + * contract layer (invalid format, structural error against the Arrow + * C Data Interface). Only raised with the `arrow` feature enabled. */ + arrow_ingest, }; /** The protocol used to connect with. */ diff --git a/questdb-rs-ffi/Cargo.lock b/questdb-rs-ffi/Cargo.lock index a241b3e5..08ac217e 100644 --- a/questdb-rs-ffi/Cargo.lock +++ b/questdb-rs-ffi/Cargo.lock @@ -13,12 +13,215 @@ dependencies = [ "cpufeatures 0.2.17", ] +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.3", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "aligned-vec" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b" +dependencies = [ + "equator", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anyhow" version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "arrow" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num-traits", +] + +[[package]] +name = "arrow-array" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.17.1", + "num-complex", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-buffer" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0" +dependencies = [ + "bytes", + "half", + "num-bigint", + "num-traits", +] + +[[package]] +name = "arrow-cast" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-ord", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "half", + "lexical-core", + "num-traits", + "ryu", +] + +[[package]] +name = "arrow-data" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-ord" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", +] + +[[package]] +name = "arrow-row" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed" +dependencies = [ + "bitflags", +] + +[[package]] +name = "arrow-select" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num-traits", +] + +[[package]] +name = "arrow-string" +version = "58.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num-traits", + "regex", + "regex-syntax", +] + [[package]] name = "asn1-rs" version = "0.5.2" @@ -96,6 +299,15 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -138,6 +350,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bumpalo" +version = "3.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" + [[package]] name = "bytes" version = "1.11.1" @@ -182,6 +400,17 @@ dependencies = [ "rand_core 0.10.1", ] +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "num-traits", + "windows-link", +] + [[package]] name = "cipher" version = "0.4.4" @@ -210,6 +439,26 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "tiny-keccak", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -253,6 +502,12 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.7" @@ -359,6 +614,26 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "equator" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc" +dependencies = [ + "equator-macro", +] + +[[package]] +name = "equator-macro" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -389,6 +664,30 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -436,6 +735,18 @@ dependencies = [ "wasip3", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -489,6 +800,30 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "id-arena" version = "2.3.0" @@ -550,6 +885,18 @@ dependencies = [ "libc", ] +[[package]] +name = "js-sys" +version = "0.3.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -562,12 +909,75 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" +[[package]] +name = "lexical-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" +dependencies = [ + "lexical-parse-integer", + "lexical-util", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "lexical-util" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" + +[[package]] +name = "lexical-write-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" +dependencies = [ + "lexical-util", + "lexical-write-integer", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" +dependencies = [ + "lexical-util", +] + [[package]] name = "libc" version = "0.2.176" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + [[package]] name = "log" version = "0.4.28" @@ -615,6 +1025,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.2.1" @@ -637,6 +1056,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -708,6 +1128,12 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + [[package]] name = "pkcs12" version = "0.1.0" @@ -797,6 +1223,12 @@ dependencies = [ name = "questdb-rs" version = "7.0.0" dependencies = [ + "aligned-vec", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "base64ct", "bytes", "crc32c", @@ -829,6 +1261,8 @@ dependencies = [ name = "questdb-rs-ffi" version = "7.0.0" dependencies = [ + "arrow", + "arrow-array", "libc", "questdb-confstr-ffi", "questdb-rs", @@ -910,6 +1344,35 @@ dependencies = [ "cipher", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + [[package]] name = "ring" version = "0.17.14" @@ -989,6 +1452,12 @@ dependencies = [ "untrusted", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "ryu" version = "1.0.20" @@ -1124,6 +1593,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "slugify" version = "0.1.0" @@ -1275,6 +1750,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "typenum" version = "1.20.0" @@ -1375,6 +1859,51 @@ dependencies = [ "wit-bindgen 0.51.0", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.106", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" +dependencies = [ + "unicode-ident", +] + [[package]] name = "wasm-encoder" version = "0.244.0" @@ -1418,12 +1947,65 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "windows-core" +version = "0.62.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6844ee5416b285084d3d3fffd743b925a6c9385455f64f6d4fa3031c4c2749a9" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "windows-link" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +[[package]] +name = "windows-result" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/questdb-rs-ffi/Cargo.toml b/questdb-rs-ffi/Cargo.toml index 4503a8e2..662ce63e 100644 --- a/questdb-rs-ffi/Cargo.toml +++ b/questdb-rs-ffi/Cargo.toml @@ -11,6 +11,8 @@ crate-type = ["cdylib", "staticlib"] [dependencies] libc = "0.2" questdb-confstr-ffi = { version = "0.1.1", optional = true } +arrow = { version = "58", optional = true, default-features = false, features = ["ffi"] } +arrow-array = { version = "58", optional = true, default-features = false } [dependencies.questdb-rs] path = "../questdb-rs" @@ -40,6 +42,19 @@ confstr-ffi = ["dep:questdb-confstr-ffi"] # dependency. The in-tree CMake build enables it via # `corrosion_import_crate(FEATURES sync-reader-ws ...)`. sync-reader-ws = ["questdb-rs/sync-reader-ws", "questdb-rs/compression-zstd"] + +# Apache Arrow integration (egress + ingress over QWP/WS). Adds the +# `line_reader_cursor_next_arrow_batch` and +# `line_sender_buffer_append_arrow` C exports plus the Arrow +# C Data Interface struct declarations. See +# `doc/QUESTDB_ARROW_INTEGRATION_DESIGN.md`. +arrow = [ + "sync-reader-ws", + "questdb-rs/arrow", + "questdb-rs/sync-sender-qwp-ws", + "dep:arrow", + "dep:arrow-array", +] # Compile in support for the `tls_verify=unsafe_off` connect-string knob. # Off by default: a shipped C ABI binary should not silently allow # downstream callers to disable certificate verification. Distributions diff --git a/questdb-rs-ffi/src/egress.rs b/questdb-rs-ffi/src/egress.rs index 7dc43efa..e068e71d 100644 --- a/questdb-rs-ffi/src/egress.rs +++ b/questdb-rs-ffi/src/egress.rs @@ -118,6 +118,19 @@ pub enum line_reader_error_code { /// `line_reader_query_on_failover_reset` to opt in to replays, or /// re-execute the query from scratch. line_reader_error_failover_would_duplicate = 21, + /// Streaming Arrow adapter saw a mid-stream schema change. The cursor + /// is still usable; re-wrap with `line_reader_cursor_next_arrow_batch` + /// after dropping any partial state to snapshot the new schema. Only + /// emitted with the `arrow` feature enabled. + line_reader_error_schema_drift = 22, + /// `line_reader_cursor_next_arrow_batch` was called on a stream that + /// terminated before any batch was produced — no schema to snapshot. + /// Only emitted with the `arrow` feature enabled. + line_reader_error_no_schema = 23, + /// Arrow C Data Interface export failed (arrow-rs rejected the + /// produced `ArrayData`'s invariants). Indicates a client bug — not + /// user-recoverable. Only emitted with the `arrow` feature enabled. + line_reader_error_arrow_export = 24, } impl From for line_reader_error_code { @@ -144,6 +157,9 @@ impl From for line_reader_error_code { ErrorCode::ServerLimitExceeded => line_reader_error_server_limit_exceeded, ErrorCode::Cancelled => line_reader_error_cancelled, ErrorCode::FailoverWouldDuplicate => line_reader_error_failover_would_duplicate, + ErrorCode::SchemaDrift => line_reader_error_schema_drift, + ErrorCode::NoSchema => line_reader_error_no_schema, + ErrorCode::ArrowExport => line_reader_error_arrow_export, // ErrorCode is `#[non_exhaustive]`. Any future variant added // upstream that the C ABI hasn't been taught about falls // back to ProtocolError so callers see *something* rather @@ -1941,6 +1957,8 @@ pub unsafe extern "C" fn line_reader_query_execute( Box::into_raw(Box::new(line_reader_cursor { cursor: ManuallyDrop::new(cursor_static), current_batch: None, + #[cfg(feature = "arrow")] + arrow_schema_pin: None, reader, })) } @@ -2018,6 +2036,8 @@ pub unsafe extern "C" fn line_reader_execute( Box::into_raw(Box::new(line_reader_cursor { cursor: ManuallyDrop::new(cursor_static), current_batch: None, + #[cfg(feature = "arrow")] + arrow_schema_pin: None, reader, })) } @@ -2433,6 +2453,9 @@ pub struct line_reader_cursor { /// for the same reason as `cursor`. See the struct-level safety note — /// this field MUST be `None` whenever `&mut self.cursor` is exposed. current_batch: Option>, + /// Pins the first Arrow batch's schema for mid-stream drift detection. + #[cfg(feature = "arrow")] + arrow_schema_pin: Option, /// Backpointer to the originating reader, used to clear its `active` /// flag on `_cursor_free`. Always non-NULL for a valid cursor. reader: *mut line_reader, @@ -2444,7 +2467,25 @@ impl line_reader_cursor { /// "no-`current_batch`-while-`&mut cursor`" invariant documented on /// `line_reader_cursor`. Mutating cursor ops MUST go through here /// instead of taking `&mut self.cursor` directly. + /// + /// Also clears any Arrow schema pin — switching back from the raw + /// `BatchView` path to `_next_arrow_batch` should re-snapshot the + /// schema, not compare against a stale one from before the detour. fn cursor_for_mut(&mut self) -> &mut Cursor<'static> { + self.current_batch = None; + debug_assert!(self.current_batch.is_none()); + #[cfg(feature = "arrow")] + { + self.arrow_schema_pin = None; + } + &mut self.cursor + } + + /// Like `cursor_for_mut` but preserves any Arrow schema pin. For + /// auxiliary cursor ops (`cancel`, `add_credit`) that do not advance + /// the stream and therefore must not lose the drift-detection + /// snapshot established by a prior `_next_arrow_batch`. + fn cursor_for_aux(&mut self) -> &mut Cursor<'static> { self.current_batch = None; debug_assert!(self.current_batch.is_none()); &mut self.cursor @@ -2868,13 +2909,10 @@ pub unsafe extern "C" fn line_reader_cursor_cancel( ); return false; } - // Routes through `cursor_for_mut` to maintain the BatchView / - // &mut Cursor exclusion invariant — see line_reader_cursor docs. - // `cancel()` runs the drain loop which can panic (decoder paths). - // The `catch_unwind` + abort below is a no-op in shipped builds - // under `panic = abort` and active in test builds; see - // `panic_guard` docstring. - let inner = (*cursor).cursor_for_mut(); + // `cursor_for_aux` keeps the Arrow schema pin intact — `cancel` + // is a terminal op so the pin is about to be irrelevant, but + // sharing the helper with `add_credit` keeps the contract uniform. + let inner = (*cursor).cursor_for_aux(); let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| inner.cancel())); let res = match result { Ok(r) => r, @@ -2907,11 +2945,10 @@ pub unsafe extern "C" fn line_reader_cursor_add_credit( ); return false; } - // Routes through `cursor_for_mut` — see line_reader_cursor docs. - // The `catch_unwind` + abort below is a no-op in shipped builds - // under `panic = abort` and active in test builds; see - // `panic_guard` docstring. - let inner = (*cursor).cursor_for_mut(); + // `cursor_for_aux` keeps the Arrow schema pin intact across this + // flow-control call; otherwise a subsequent `_next_arrow_batch` + // would lose its drift snapshot. + let inner = (*cursor).cursor_for_aux(); let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { inner.add_credit(additional_bytes) })); @@ -3674,6 +3711,9 @@ mod tests { ErrorCode::ServerLimitExceeded, ErrorCode::Cancelled, ErrorCode::FailoverWouldDuplicate, + ErrorCode::SchemaDrift, + ErrorCode::NoSchema, + ErrorCode::ArrowExport, ]; for code in codes { let c: line_reader_error_code = code.into(); @@ -3687,6 +3727,24 @@ mod tests { } } + #[test] + fn line_reader_error_code_arrow_discriminants_are_abi_stable() { + // Pin numeric values for the Arrow-related variants exposed to C/FFI + // consumers. Append-only past the existing tail at 21. + assert_eq!( + line_reader_error_code::line_reader_error_schema_drift as u32, + 22 + ); + assert_eq!( + line_reader_error_code::line_reader_error_no_schema as u32, + 23 + ); + assert_eq!( + line_reader_error_code::line_reader_error_arrow_export as u32, + 24 + ); + } + #[test] fn column_kind_round_trips_for_every_variant() { let pairs = [ @@ -3896,3 +3954,93 @@ mod tests { // is a no-op when the C callback slot is empty. } } + +#[cfg(feature = "arrow")] +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum line_reader_arrow_batch_result { + line_reader_arrow_batch_ok = 0, + line_reader_arrow_batch_end = 1, + line_reader_arrow_batch_error = 2, +} + +#[cfg(feature = "arrow")] +#[unsafe(no_mangle)] +pub unsafe extern "C" fn line_reader_cursor_next_arrow_batch( + cursor: *mut line_reader_cursor, + out_array: *mut arrow::ffi::FFI_ArrowArray, + out_schema: *mut arrow::ffi::FFI_ArrowSchema, + err_out: *mut *mut line_reader_error, +) -> line_reader_arrow_batch_result { + use arrow_array::{Array, StructArray}; + unsafe { + if cursor.is_null() { + set_reader_err( + err_out, + ErrorCode::InvalidApiCall, + "line_reader_cursor_next_arrow_batch: cursor is NULL", + ); + return line_reader_arrow_batch_result::line_reader_arrow_batch_error; + } + if out_array.is_null() || out_schema.is_null() { + set_reader_err( + err_out, + ErrorCode::InvalidApiCall, + "line_reader_cursor_next_arrow_batch: out_array or out_schema is NULL", + ); + return line_reader_arrow_batch_result::line_reader_arrow_batch_error; + } + enum NextArrow { + Ok( + arrow::ffi::FFI_ArrowArray, + arrow::ffi::FFI_ArrowSchema, + arrow::datatypes::SchemaRef, + ), + End, + Err(Error, Option), + } + let c = &mut *cursor; + let pinned = c.arrow_schema_pin.clone(); + let inner: &mut Cursor<'static> = c.cursor_for_mut(); + let outcome = panic_guard(|| -> NextArrow { + let rb = match inner.next_arrow_batch_inner(pinned.as_ref()) { + Ok(Some(rb)) => rb, + Ok(None) => return NextArrow::End, + Err(e) => return NextArrow::Err(e, None), + }; + let schema_ref = rb.schema(); + let struct_array: StructArray = rb.into(); + let array_data = struct_array.into_data(); + match arrow::ffi::to_ffi(&array_data) { + Ok((ffi_array, ffi_schema)) => NextArrow::Ok(ffi_array, ffi_schema, schema_ref), + Err(e) => NextArrow::Err( + Error::new(ErrorCode::ArrowExport, e.to_string()), + Some(schema_ref), + ), + } + }); + match outcome { + NextArrow::Ok(ffi_array, ffi_schema, schema_ref) => { + c.arrow_schema_pin = Some(schema_ref); + std::ptr::write(out_array, ffi_array); + std::ptr::write(out_schema, ffi_schema); + line_reader_arrow_batch_result::line_reader_arrow_batch_ok + } + NextArrow::End => line_reader_arrow_batch_result::line_reader_arrow_batch_end, + NextArrow::Err(e, pin_to_restore) => { + match pin_to_restore { + Some(pin) => { + c.arrow_schema_pin = Some(pin); + } + None => { + if e.code() != ErrorCode::SchemaDrift { + c.arrow_schema_pin = pinned; + } + } + } + write_err_box(err_out, e); + line_reader_arrow_batch_result::line_reader_arrow_batch_error + } + } + } +} diff --git a/questdb-rs-ffi/src/lib.rs b/questdb-rs-ffi/src/lib.rs index 4cf0f6f0..88c3c715 100644 --- a/questdb-rs-ffi/src/lib.rs +++ b/questdb-rs-ffi/src/lib.rs @@ -264,6 +264,17 @@ pub enum line_sender_error_code { /// QWP/WebSocket server rejection or terminal protocol violation. line_sender_error_server_rejection, + + /// `line_sender_buffer_append_arrow` was passed a column whose + /// Arrow / QuestDB kind cannot be persisted to a QuestDB table. + /// Only emitted with the `arrow` feature enabled. + line_sender_error_arrow_unsupported_column_kind, + + /// `line_sender_buffer_append_arrow` rejected a `RecordBatch` at + /// client-side structural validation (column count, name encoding, + /// FFI struct contract). Only emitted with the `arrow` feature + /// enabled. + line_sender_error_arrow_ingest, } impl From for line_sender_error_code { @@ -296,6 +307,10 @@ impl From for line_sender_error_code { line_sender_error_code::line_sender_error_protocol_version_error } ErrorCode::InvalidDecimal => line_sender_error_code::line_sender_error_invalid_decimal, + ErrorCode::ArrowUnsupportedColumnKind => { + line_sender_error_code::line_sender_error_arrow_unsupported_column_kind + } + ErrorCode::ArrowIngest => line_sender_error_code::line_sender_error_arrow_ingest, } } } @@ -920,6 +935,18 @@ pub unsafe extern "C" fn line_sender_buffer_new_qwp() -> *mut line_sender_buffer })) } +/// Construct a QWP/WebSocket columnar `line_sender_buffer` with the +/// default 127-byte name length limit. Required by +/// `line_sender_buffer_append_arrow*`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn line_sender_buffer_new_qwp_ws() -> *mut line_sender_buffer { + let buffer = Buffer::new_qwp_ws(); + Box::into_raw(Box::new(line_sender_buffer { + buffer, + empty_peek_buf_is_null: true, + })) +} + /// Construct a QWP/UDP `line_sender_buffer` with a custom maximum length for /// table and column names. /// @@ -3604,6 +3631,437 @@ pub unsafe fn _build_system_hack(err: *mut questdb_conf_str_parse_err) { } } +// Crate is `panic = "abort"`; `catch_unwind` would be a no-op in +// shipped builds and harms `cargo test` diagnostics. Validation +// happens up-front in `arrow_append_impl`. + +/// Append every row of an Apache Arrow `RecordBatch` (Arrow C Data +/// Interface) to `buffer`. The per-row designated timestamp is not +/// sent — the server stamps each row on arrival. +/// +/// `array` may be either a Struct array (one child per column, the +/// standard RecordBatch shape) or a non-Struct single-column array +/// whose `schema->name` becomes the column name. +/// +/// Ownership: see the corresponding declaration in +/// `include/questdb/ingress/line_sender.h`. +#[cfg(feature = "arrow")] +#[unsafe(no_mangle)] +pub unsafe extern "C" fn line_sender_buffer_append_arrow( + buffer: *mut line_sender_buffer, + table: line_sender_table_name, + array: *mut arrow::ffi::FFI_ArrowArray, + schema: *const arrow::ffi::FFI_ArrowSchema, + err_out: *mut *mut line_sender_error, +) -> bool { + unsafe { arrow_append_impl(buffer, table, array, schema, None, err_out) } +} + +/// Variant of `line_sender_buffer_append_arrow` that sources each +/// row's designated timestamp from a named `Timestamp(_)` column +/// inside the batch. The column must be `Timestamp(Microsecond | +/// Nanosecond | Millisecond, _)` with no null rows. Same ownership +/// contract as `line_sender_buffer_append_arrow`. +#[cfg(feature = "arrow")] +#[unsafe(no_mangle)] +pub unsafe extern "C" fn line_sender_buffer_append_arrow_at_column( + buffer: *mut line_sender_buffer, + table: line_sender_table_name, + array: *mut arrow::ffi::FFI_ArrowArray, + schema: *const arrow::ffi::FFI_ArrowSchema, + ts_column: line_sender_column_name, + err_out: *mut *mut line_sender_error, +) -> bool { + unsafe { arrow_append_impl(buffer, table, array, schema, Some(ts_column), err_out) } +} + +// Bounds for the pre-walk that protects `arrow::ffi::from_ffi` against +// adversarial FFI input. Three independent caps: +// * `MAX_ARROW_SCHEMA_DEPTH` bounds recursion depth (children + dictionary +// chain). arrow-rs unrolls both onto the host stack; without this cap +// a deep schema would stack-overflow inside `from_ffi`. +// * `MAX_ARROW_SCHEMA_CHILDREN_PER_NODE` bounds breadth per node. +// * `MAX_ARROW_SCHEMA_TOTAL_NODES` bounds the whole tree (depth × breadth +// would otherwise be combinatorial under shared children / cyclic DAGs). +#[cfg(feature = "arrow")] +const MAX_ARROW_SCHEMA_DEPTH: usize = 64; +#[cfg(feature = "arrow")] +const MAX_ARROW_SCHEMA_CHILDREN_PER_NODE: i64 = 65_536; +#[cfg(feature = "arrow")] +const MAX_ARROW_SCHEMA_TOTAL_NODES: usize = 4_096; +// Mirrors `MAX_ARROW_INGEST_ROWS` in `questdb-rs::ingress::arrow`. +// `arrow::ffi::from_ffi` reads `(*a).length` as i64 and casts to +// usize before the inner crate gets to check the row cap, so a +// negative or `i64::MAX` length must be rejected here. +#[cfg(feature = "arrow")] +const MAX_ARROW_ARRAY_LENGTH: i64 = 16 * 1024 * 1024; + +#[cfg(feature = "arrow")] +fn arrow_ingest_err(msg: impl Into) -> Error { + Error::new(ErrorCode::ArrowIngest, msg.into()) +} + +// Format strings the Arrow C Data Interface accepts; trusted on a cheap +// prefix match. We do NOT enforce the full grammar — arrow-rs's own +// `DataType::try_from` does the structural parse and returns an Err on +// unknown variants. We only reject the inputs that would panic inside +// `FFI_ArrowSchema::format()` (NULL pointer / non-UTF-8) before reaching +// the parser. +#[cfg(feature = "arrow")] +unsafe fn validate_format_str(s: *const arrow::ffi::FFI_ArrowSchema) -> questdb::Result<()> { + unsafe { + let p = (*s).format; + if p.is_null() { + return Err(arrow_ingest_err("Arrow schema format pointer is NULL")); + } + let cstr = std::ffi::CStr::from_ptr(p); + cstr.to_str() + .map_err(|_| arrow_ingest_err("Arrow schema format string is not UTF-8"))?; + Ok(()) + } +} + +#[cfg(feature = "arrow")] +unsafe fn try_reserve_one(v: &mut Vec) -> questdb::Result<()> { + v.try_reserve(1) + .map_err(|_| arrow_ingest_err("Arrow schema pre-walk: reservation failed")) +} + +#[cfg(feature = "arrow")] +unsafe fn validate_arrow_schema_depth( + schema: *const arrow::ffi::FFI_ArrowSchema, +) -> questdb::Result<()> { + unsafe { + let mut stack: Vec<(*const arrow::ffi::FFI_ArrowSchema, usize)> = Vec::new(); + let mut visited: std::collections::HashSet<*const arrow::ffi::FFI_ArrowSchema> = + std::collections::HashSet::new(); + let mut total: usize = 0; + try_reserve_one(&mut stack)?; + stack.push((schema, 0)); + while let Some((s, depth)) = stack.pop() { + if !visited.insert(s) { + return Err(arrow_ingest_err( + "Arrow schema contains a cycle (revisited node)", + )); + } + total += 1; + if total > MAX_ARROW_SCHEMA_TOTAL_NODES { + return Err(arrow_ingest_err(format!( + "Arrow schema total node count exceeds {}", + MAX_ARROW_SCHEMA_TOTAL_NODES + ))); + } + if depth > MAX_ARROW_SCHEMA_DEPTH { + return Err(arrow_ingest_err(format!( + "Arrow schema nesting depth exceeds {}", + MAX_ARROW_SCHEMA_DEPTH + ))); + } + validate_format_str(s)?; + let n = (*s).n_children; + if n < 0 { + return Err(arrow_ingest_err(format!( + "Arrow schema n_children {} is negative", + n + ))); + } + if n > MAX_ARROW_SCHEMA_CHILDREN_PER_NODE { + return Err(arrow_ingest_err(format!( + "Arrow schema n_children {} exceeds per-node cap {}", + n, MAX_ARROW_SCHEMA_CHILDREN_PER_NODE + ))); + } + let dict = (*s).dictionary; + if !dict.is_null() { + try_reserve_one(&mut stack)?; + stack.push((dict as *const _, depth + 1)); + } + if n == 0 { + continue; + } + let children = (*s).children; + if children.is_null() { + return Err(arrow_ingest_err( + "Arrow schema declares children but pointer is NULL", + )); + } + for i in 0..n as usize { + let child = *children.add(i); + if child.is_null() { + return Err(arrow_ingest_err("Arrow schema child pointer is NULL")); + } + try_reserve_one(&mut stack)?; + stack.push((child as *const _, depth + 1)); + } + } + Ok(()) + } +} + +// Cross-walk schema + array in lockstep. arrow-rs's `from_ffi` asserts on +// mismatches between the two trees (`n_children` agreement for Struct / +// Union, `n_buffers` consistency, etc.); under `panic = "abort"` that +// assert aborts the host. We pre-check everything we can. +#[cfg(feature = "arrow")] +unsafe fn validate_arrow_array_depth( + array: *const arrow::ffi::FFI_ArrowArray, + schema: *const arrow::ffi::FFI_ArrowSchema, +) -> questdb::Result<()> { + unsafe { + let mut stack: Vec<( + *const arrow::ffi::FFI_ArrowArray, + *const arrow::ffi::FFI_ArrowSchema, + usize, + )> = Vec::new(); + let mut visited: std::collections::HashSet<*const arrow::ffi::FFI_ArrowArray> = + std::collections::HashSet::new(); + let mut total: usize = 0; + try_reserve_one(&mut stack)?; + stack.push((array, schema, 0)); + while let Some((a, s, depth)) = stack.pop() { + if !visited.insert(a) { + return Err(arrow_ingest_err( + "Arrow array contains a cycle (revisited node)", + )); + } + total += 1; + if total > MAX_ARROW_SCHEMA_TOTAL_NODES { + return Err(arrow_ingest_err(format!( + "Arrow array total node count exceeds {}", + MAX_ARROW_SCHEMA_TOTAL_NODES + ))); + } + if depth > MAX_ARROW_SCHEMA_DEPTH { + return Err(arrow_ingest_err(format!( + "Arrow array nesting depth exceeds {}", + MAX_ARROW_SCHEMA_DEPTH + ))); + } + let length = (*a).length; + let offset = (*a).offset; + if length < 0 { + return Err(arrow_ingest_err(format!( + "Arrow array length {} is negative", + length + ))); + } + if offset < 0 { + return Err(arrow_ingest_err(format!( + "Arrow array offset {} is negative", + offset + ))); + } + if length > MAX_ARROW_ARRAY_LENGTH { + return Err(arrow_ingest_err(format!( + "Arrow array length {} exceeds {}", + length, MAX_ARROW_ARRAY_LENGTH + ))); + } + if offset > MAX_ARROW_ARRAY_LENGTH { + return Err(arrow_ingest_err(format!( + "Arrow array offset {} exceeds {}", + offset, MAX_ARROW_ARRAY_LENGTH + ))); + } + let na = (*a).n_children; + let ns = (*s).n_children; + if na < 0 { + return Err(arrow_ingest_err(format!( + "Arrow array n_children {} is negative", + na + ))); + } + if na != ns { + return Err(arrow_ingest_err(format!( + "Arrow array n_children {} disagrees with schema n_children {}", + na, ns + ))); + } + if na > MAX_ARROW_SCHEMA_CHILDREN_PER_NODE { + return Err(arrow_ingest_err(format!( + "Arrow array n_children {} exceeds per-node cap {}", + na, MAX_ARROW_SCHEMA_CHILDREN_PER_NODE + ))); + } + if (*a).n_buffers < 0 { + return Err(arrow_ingest_err(format!( + "Arrow array n_buffers {} is negative", + (*a).n_buffers + ))); + } + let dict_a = (*a).dictionary; + let dict_s = (*s).dictionary; + match (dict_a.is_null(), dict_s.is_null()) { + (true, true) => {} + (false, false) => { + try_reserve_one(&mut stack)?; + stack.push((dict_a as *const _, dict_s as *const _, depth + 1)); + } + _ => { + return Err(arrow_ingest_err( + "Arrow array / schema disagree on dictionary presence", + )); + } + } + if na == 0 { + continue; + } + let a_children = (*a).children; + let s_children = (*s).children; + if a_children.is_null() || s_children.is_null() { + return Err(arrow_ingest_err( + "Arrow array or schema declares children but pointer is NULL", + )); + } + for i in 0..na as usize { + let child_a = *a_children.add(i); + let child_s = *s_children.add(i); + if child_a.is_null() || child_s.is_null() { + return Err(arrow_ingest_err( + "Arrow array or schema child pointer is NULL", + )); + } + try_reserve_one(&mut stack)?; + stack.push((child_a as *const _, child_s as *const _, depth + 1)); + } + } + Ok(()) + } +} + +#[cfg(feature = "arrow")] +unsafe fn arrow_append_impl( + buffer: *mut line_sender_buffer, + table: line_sender_table_name, + array: *mut arrow::ffi::FFI_ArrowArray, + schema: *const arrow::ffi::FFI_ArrowSchema, + ts_column: Option, + err_out: *mut *mut line_sender_error, +) -> bool { + use arrow::datatypes::{DataType, Field, Schema}; + use arrow_array::{ArrayRef, RecordBatch, StructArray, make_array}; + use std::sync::Arc; + unsafe { + if buffer.is_null() || array.is_null() || schema.is_null() { + arrow_err_to_c_box( + err_out, + ErrorCode::InvalidApiCall, + "line_sender_buffer_append_arrow: NULL buffer / array / schema".to_string(), + ); + return false; + } + // Bound depth, breadth and total node count on both trees BEFORE + // consuming the array, so a rejection leaves caller-owned + // `array->release` intact. Walks include the dictionary chain + // (which `arrow::ffi::from_ffi` recurses through) and cross-checks + // array/schema `n_children` agreement to fend off the asserts + // inside arrow-rs that would otherwise abort under `panic = "abort"`. + if let Err(e) = validate_arrow_schema_depth(schema) { + arrow_err_to_c_box(err_out, e.code(), e.msg().to_string()); + return false; + } + if let Err(e) = validate_arrow_array_depth(array, schema) { + arrow_err_to_c_box(err_out, e.code(), e.msg().to_string()); + return false; + } + // Move out + null caller's release; every return path now + // drops `imported_array` exactly once. + let imported_array = std::ptr::read(array); + (*array).release = None; + let inner = unwrap_buffer_mut(buffer); + let array_data = match arrow::ffi::from_ffi(imported_array, &*schema) { + Ok(d) => d, + Err(e) => { + arrow_err_to_c_box( + err_out, + ErrorCode::ArrowIngest, + format!("from_ffi failed: {}", e), + ); + return false; + } + }; + // `from_ffi` uses `new_unchecked`; this is the trust boundary. + // A skipped bound here aborts the host under `panic = "abort"`. + if let Err(e) = array_data.validate_full() { + arrow_err_to_c_box( + err_out, + ErrorCode::ArrowIngest, + format!("Arrow array validation failed: {}", e), + ); + return false; + } + let rb = if matches!(array_data.data_type(), DataType::Struct(_)) { + if array_data.nulls().is_some_and(|n| n.null_count() > 0) { + arrow_err_to_c_box( + err_out, + ErrorCode::ArrowIngest, + "top-level Struct array must have no null rows for RecordBatch ingest" + .to_string(), + ); + return false; + } + let struct_arr = StructArray::from(array_data); + let rb_schema = Arc::new(Schema::new(struct_arr.fields().clone())); + let columns: Vec = struct_arr.columns().to_vec(); + match RecordBatch::try_new(rb_schema, columns) { + Ok(rb) => rb, + Err(e) => { + arrow_err_to_c_box( + err_out, + ErrorCode::ArrowIngest, + format!("RecordBatch::try_new failed: {}", e), + ); + return false; + } + } + } else { + let field = match Field::try_from(&*schema) { + Ok(f) => f, + Err(e) => { + arrow_err_to_c_box( + err_out, + ErrorCode::ArrowIngest, + format!("schema conversion failed: {}", e), + ); + return false; + } + }; + let arr_ref: ArrayRef = make_array(array_data); + let rb_schema = Arc::new(Schema::new(vec![field])); + match RecordBatch::try_new(rb_schema, vec![arr_ref]) { + Ok(rb) => rb, + Err(e) => { + arrow_err_to_c_box( + err_out, + ErrorCode::ArrowIngest, + format!("RecordBatch::try_new failed: {}", e), + ); + return false; + } + } + }; + let result = match ts_column { + Some(ts) => inner.append_arrow_at_column(table.as_name(), &rb, ts.as_name()), + None => inner.append_arrow(table.as_name(), &rb), + }; + bubble_err_to_c!(err_out, result); + true + } +} + +#[cfg(feature = "arrow")] +fn arrow_err_to_c_box(err_out: *mut *mut line_sender_error, code: ErrorCode, msg: String) { + unsafe { + if err_out.is_null() { + return; + } + *err_out = Box::into_raw(Box::new(line_sender_error { + error: Error::new(code, msg), + qwp_ws_error: None, + })); + } +} + #[cfg(test)] mod tests { use super::*; @@ -3650,6 +4108,9 @@ mod tests { (line_sender_error_invalid_decimal, 13), // New since 6.1.0 — must remain at the tail. (line_sender_error_server_rejection, 14), + // New since 7.0.0 — arrow feature. Append-only. + (line_sender_error_arrow_unsupported_column_kind, 15), + (line_sender_error_arrow_ingest, 16), ]; for (variant, want) in expected { assert_eq!( @@ -4271,4 +4732,276 @@ mod tests { line_sender_error_free(raw); } } + + #[cfg(feature = "arrow")] + mod arrow_validator_tests { + use super::super::*; + use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; + use std::ffi::CString; + + // Build a chain of FFI_ArrowSchemas via the `dictionary` pointer + // of length `depth`. Each parent owns one child via a leaked + // `Box` so the test can free the chain manually + // at teardown. The chain reuses the inner `format = "i"` Int32 + // tag — that's all `validate_arrow_schema_depth` reads. + unsafe fn build_dict_chain(depth: usize) -> *mut FFI_ArrowSchema { + let format = CString::new("i").unwrap(); + let mut head: *mut FFI_ArrowSchema = std::ptr::null_mut(); + for _ in 0..depth { + let layout = std::alloc::Layout::new::(); + let raw = unsafe { std::alloc::alloc_zeroed(layout) } as *mut FFI_ArrowSchema; + unsafe { + (*raw).format = format.as_ptr(); + (*raw).dictionary = head; + } + head = raw; + } + std::mem::forget(format); + head + } + + unsafe fn drop_dict_chain(mut node: *mut FFI_ArrowSchema) { + while !node.is_null() { + let next = unsafe { (*node).dictionary }; + let layout = std::alloc::Layout::new::(); + unsafe { std::alloc::dealloc(node as *mut u8, layout) }; + node = next; + } + } + + #[test] + fn schema_dictionary_chain_at_depth_cap_succeeds() { + unsafe { + let head = build_dict_chain(MAX_ARROW_SCHEMA_DEPTH); + let res = validate_arrow_schema_depth(head); + drop_dict_chain(head); + assert!(res.is_ok(), "depth = cap should be accepted: {:?}", res); + } + } + + #[test] + fn schema_dictionary_chain_above_depth_cap_rejected() { + unsafe { + let head = build_dict_chain(MAX_ARROW_SCHEMA_DEPTH + 2); + let res = validate_arrow_schema_depth(head); + drop_dict_chain(head); + let err = res.unwrap_err(); + assert!( + err.msg().contains("depth"), + "expected depth-cap error, got: {}", + err.msg() + ); + } + } + + #[test] + fn schema_null_format_rejected() { + unsafe { + let layout = std::alloc::Layout::new::(); + let raw = std::alloc::alloc_zeroed(layout) as *mut FFI_ArrowSchema; + let res = validate_arrow_schema_depth(raw); + std::alloc::dealloc(raw as *mut u8, layout); + let err = res.unwrap_err(); + assert!( + err.msg().contains("format"), + "expected format-NULL error, got: {}", + err.msg() + ); + } + } + + #[test] + fn schema_negative_n_children_rejected() { + unsafe { + let format = CString::new("i").unwrap(); + let layout = std::alloc::Layout::new::(); + let raw = std::alloc::alloc_zeroed(layout) as *mut FFI_ArrowSchema; + (*raw).format = format.as_ptr(); + (*raw).n_children = -1; + let res = validate_arrow_schema_depth(raw); + std::alloc::dealloc(raw as *mut u8, layout); + let err = res.unwrap_err(); + assert!( + err.msg().contains("negative"), + "expected negative-n_children error, got: {}", + err.msg() + ); + } + } + + #[test] + fn schema_breadth_above_cap_rejected() { + unsafe { + let format = CString::new("i").unwrap(); + let layout = std::alloc::Layout::new::(); + let raw = std::alloc::alloc_zeroed(layout) as *mut FFI_ArrowSchema; + (*raw).format = format.as_ptr(); + (*raw).n_children = MAX_ARROW_SCHEMA_CHILDREN_PER_NODE + 1; + let res = validate_arrow_schema_depth(raw); + std::alloc::dealloc(raw as *mut u8, layout); + let err = res.unwrap_err(); + assert!( + err.msg().contains("n_children"), + "expected n_children-cap error, got: {}", + err.msg() + ); + } + } + + #[test] + fn array_n_buffers_negative_rejected() { + unsafe { + let format = CString::new("i").unwrap(); + let s_layout = std::alloc::Layout::new::(); + let s_raw = std::alloc::alloc_zeroed(s_layout) as *mut FFI_ArrowSchema; + (*s_raw).format = format.as_ptr(); + let a_layout = std::alloc::Layout::new::(); + let a_raw = std::alloc::alloc_zeroed(a_layout) as *mut FFI_ArrowArray; + (*a_raw).n_buffers = -1; + let res = validate_arrow_array_depth(a_raw, s_raw); + std::alloc::dealloc(s_raw as *mut u8, s_layout); + std::alloc::dealloc(a_raw as *mut u8, a_layout); + let err = res.unwrap_err(); + assert!( + err.msg().contains("n_buffers"), + "expected n_buffers-negative error, got: {}", + err.msg() + ); + } + } + + #[test] + fn array_schema_n_children_mismatch_rejected() { + unsafe { + let format = CString::new("+s").unwrap(); + let s_layout = std::alloc::Layout::new::(); + let s_raw = std::alloc::alloc_zeroed(s_layout) as *mut FFI_ArrowSchema; + (*s_raw).format = format.as_ptr(); + (*s_raw).n_children = 0; + let a_layout = std::alloc::Layout::new::(); + let a_raw = std::alloc::alloc_zeroed(a_layout) as *mut FFI_ArrowArray; + (*a_raw).n_children = 5; + let res = validate_arrow_array_depth(a_raw, s_raw); + std::alloc::dealloc(s_raw as *mut u8, s_layout); + std::alloc::dealloc(a_raw as *mut u8, a_layout); + let err = res.unwrap_err(); + assert!( + err.msg().contains("disagrees"), + "expected n_children-disagreement error, got: {}", + err.msg() + ); + } + } + + #[test] + fn schema_self_dictionary_cycle_rejected() { + unsafe { + let format = CString::new("i").unwrap(); + let layout = std::alloc::Layout::new::(); + let raw = std::alloc::alloc_zeroed(layout) as *mut FFI_ArrowSchema; + (*raw).format = format.as_ptr(); + (*raw).dictionary = raw; + let res = validate_arrow_schema_depth(raw); + (*raw).dictionary = std::ptr::null_mut(); + std::alloc::dealloc(raw as *mut u8, layout); + let err = res.unwrap_err(); + assert!( + err.msg().contains("cycle"), + "expected cycle error, got: {}", + err.msg() + ); + } + } + + #[test] + fn array_self_dictionary_cycle_rejected() { + unsafe { + let format = CString::new("i").unwrap(); + let s_layout = std::alloc::Layout::new::(); + let s_raw = std::alloc::alloc_zeroed(s_layout) as *mut FFI_ArrowSchema; + (*s_raw).format = format.as_ptr(); + (*s_raw).dictionary = s_raw; + let a_layout = std::alloc::Layout::new::(); + let a_raw = std::alloc::alloc_zeroed(a_layout) as *mut FFI_ArrowArray; + (*a_raw).dictionary = a_raw; + let res = validate_arrow_array_depth(a_raw, s_raw); + (*s_raw).dictionary = std::ptr::null_mut(); + (*a_raw).dictionary = std::ptr::null_mut(); + std::alloc::dealloc(s_raw as *mut u8, s_layout); + std::alloc::dealloc(a_raw as *mut u8, a_layout); + let err = res.unwrap_err(); + assert!( + err.msg().contains("cycle"), + "expected cycle error, got: {}", + err.msg() + ); + } + } + + #[test] + fn array_negative_length_rejected() { + unsafe { + let format = CString::new("i").unwrap(); + let s_layout = std::alloc::Layout::new::(); + let s_raw = std::alloc::alloc_zeroed(s_layout) as *mut FFI_ArrowSchema; + (*s_raw).format = format.as_ptr(); + let a_layout = std::alloc::Layout::new::(); + let a_raw = std::alloc::alloc_zeroed(a_layout) as *mut FFI_ArrowArray; + (*a_raw).length = -1; + let res = validate_arrow_array_depth(a_raw, s_raw); + std::alloc::dealloc(s_raw as *mut u8, s_layout); + std::alloc::dealloc(a_raw as *mut u8, a_layout); + let err = res.unwrap_err(); + assert!( + err.msg().contains("length"), + "expected negative-length error, got: {}", + err.msg() + ); + } + } + + #[test] + fn array_negative_offset_rejected() { + unsafe { + let format = CString::new("i").unwrap(); + let s_layout = std::alloc::Layout::new::(); + let s_raw = std::alloc::alloc_zeroed(s_layout) as *mut FFI_ArrowSchema; + (*s_raw).format = format.as_ptr(); + let a_layout = std::alloc::Layout::new::(); + let a_raw = std::alloc::alloc_zeroed(a_layout) as *mut FFI_ArrowArray; + (*a_raw).offset = -1; + let res = validate_arrow_array_depth(a_raw, s_raw); + std::alloc::dealloc(s_raw as *mut u8, s_layout); + std::alloc::dealloc(a_raw as *mut u8, a_layout); + let err = res.unwrap_err(); + assert!( + err.msg().contains("offset"), + "expected negative-offset error, got: {}", + err.msg() + ); + } + } + + #[test] + fn array_length_above_cap_rejected() { + unsafe { + let format = CString::new("i").unwrap(); + let s_layout = std::alloc::Layout::new::(); + let s_raw = std::alloc::alloc_zeroed(s_layout) as *mut FFI_ArrowSchema; + (*s_raw).format = format.as_ptr(); + let a_layout = std::alloc::Layout::new::(); + let a_raw = std::alloc::alloc_zeroed(a_layout) as *mut FFI_ArrowArray; + (*a_raw).length = MAX_ARROW_ARRAY_LENGTH + 1; + let res = validate_arrow_array_depth(a_raw, s_raw); + std::alloc::dealloc(s_raw as *mut u8, s_layout); + std::alloc::dealloc(a_raw as *mut u8, a_layout); + let err = res.unwrap_err(); + assert!( + err.msg().contains("length"), + "expected length-cap error, got: {}", + err.msg() + ); + } + } + } } diff --git a/questdb-rs/Cargo.toml b/questdb-rs/Cargo.toml index 70aac7a2..84139135 100644 --- a/questdb-rs/Cargo.toml +++ b/questdb-rs/Cargo.toml @@ -11,7 +11,7 @@ categories = ["database"] authors = ["Adam Cimarosti "] [package.metadata.docs.rs] -features = ["almost-all-features"] +features = ["almost-all-features", "arrow", "polars"] [lib] name = "questdb" @@ -64,6 +64,22 @@ p12-keystore = { version = "0.2", optional = true } zstd = { version = "0.13", optional = true } +# Apache Arrow integration. `ffi` feature enables Arrow C Data Interface +# export. Pinned to a single major to match DataFusion's current major; +# bump deliberately per release notes. +arrow = { version = "58", optional = true, default-features = false, features = ["ffi"] } +arrow-array = { version = "58", optional = true, default-features = false } +arrow-schema = { version = "58", optional = true, default-features = false } +arrow-buffer = { version = "58", optional = true, default-features = false } +arrow-data = { version = "58", optional = true, default-features = false } +# 64-byte aligned allocations for build-pass Arrow buffers (validity, +# BOOLEAN bit-pack, ARRAY offsets, SYMBOL union dict). +aligned-vec = { version = "0.6", optional = true } +# Polars bridge via the Arrow C Data Interface. Tighter pin than arrow +# because polars 0.x churns the ffi surface across minors. +polars = { version = "0.53", optional = true, default-features = false, features = ["dtype-categorical"] } +polars-arrow = { version = "0.53", optional = true, default-features = false, features = ["compute"] } + [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.60", features = [ "Win32_Foundation", @@ -79,6 +95,9 @@ slugify = "0.1.0" indoc = "2" [dev-dependencies] +# Pulled in transitively by `arrow-array`; named explicitly here so unit +# tests under `ingress::arrow::tests` can build `Float16Array` payloads. +half = "2" socket2 = "0.6.1" mio = { version = "1", features = ["os-poll", "net"] } chrono = "0.4.31" @@ -172,6 +191,24 @@ sync-reader-ws = ["_egress", "_keystore-roots"] ## Decompression for `FLAG_ZSTD` `RESULT_BATCH` payloads. compression-zstd = ["_egress", "dep:zstd"] +## Arrow integration: streaming Cursor → RecordBatchReader (egress) and +## RecordBatch → Buffer (ingress). Both directions ride QWP/WS. +## See `doc/QUESTDB_ARROW_INTEGRATION_DESIGN.md`. +arrow = [ + "sync-reader-ws", + "_sender-qwp-ws", + "dep:arrow", + "dep:arrow-array", + "dep:arrow-schema", + "dep:arrow-buffer", + "dep:arrow-data", + "dep:aligned-vec", + "dep:bytes", +] + +## Polars sub-feature. ~30 lines of wrappers on top of `arrow`. +polars = ["arrow", "sync-reader-ws", "dep:polars", "dep:polars-arrow"] + ## Run integration tests against a real QuestDB server launched from the ## `questdb/` submodule. Requires JDK 25 + Maven and a built jar at ## `../questdb/core/target/questdb-*-SNAPSHOT.jar`. @@ -196,6 +233,9 @@ _keystore-roots = ["dep:jks", "dep:p12-keystore"] ## thus compiling with `--all-features` will not work. ## Instead use `--features almost-all-features`. ## This is useful for quickly running `cargo test` or `cargo clippy`. +## +## Excludes `arrow` / `polars`: those are opt-in. CI runs them separately +## via `cargo test --features almost-all-features,arrow,polars`. almost-all-features = [ "sync-sender", "sync-reader-ws", @@ -255,6 +295,10 @@ required-features = ["sync-reader-ws"] name = "qwp_ws_unified_sfa_bench" required-features = ["sync-sender-qwp-ws"] +[[example]] +name = "polars" +required-features = ["polars", "sync-sender-qwp-ws"] + # Decoder microbenchmark anchoring the perf claims from commits # `8ec0a85` (zero-copy decode) and `1163d43` (tighter SYMBOL/VARCHAR # decode hot paths). Run with: diff --git a/questdb-rs/examples/polars.rs b/questdb-rs/examples/polars.rs new file mode 100644 index 00000000..d5deacbd --- /dev/null +++ b/questdb-rs/examples/polars.rs @@ -0,0 +1,99 @@ +//! End-to-end polars × QuestDB demo: ingest a `DataFrame` over QWP/WS, +//! then read it back via the egress `Reader` directly into a polars +//! `DataFrame`. +//! +//! Run against a local QuestDB with QWP/WS enabled: +//! +//! ```bash +//! cargo run --example polars --features polars +//! ``` + +use std::error::Error; +use std::num::NonZeroUsize; + +use polars::prelude::{DataFrame, IntoColumn, NamedFrom, PlSmallStr, Series}; +use questdb::{ + egress::Reader, + ingress::{Sender, TableName, polars::dataframe_to_batches}, +}; + +const TABLE: &str = "trades_polars_demo"; + +fn build_df() -> DataFrame { + let symbol = Series::new( + PlSmallStr::from("symbol"), + &["ETH-USD", "BTC-USD", "ETH-USD", "BTC-USD"], + ); + let price = Series::new( + PlSmallStr::from("price"), + &[2615.54, 65432.10, 2616.00, 65440.55], + ); + let amount = Series::new( + PlSmallStr::from("amount"), + &[0.00044, 0.0012, 0.00050, 0.0008], + ); + DataFrame::new( + 4, + vec![ + symbol.into_column(), + price.into_column(), + amount.into_column(), + ], + ) + .unwrap() +} + +fn ingest(host: &str, port: &str, df: &DataFrame) -> Result<(), Box> { + let mut sender = Sender::from_conf(format!("qwpws::addr={host}:{port};"))?; + let mut buffer = sender.new_buffer(); + let table = TableName::new(TABLE)?; + let max_rows = NonZeroUsize::new(10_000); + for rb in dataframe_to_batches(df, max_rows) { + let rb = rb?; + buffer.append_arrow(table, &rb)?; + sender.flush(&mut buffer)?; + } + Ok(()) +} + +fn read_back(host: &str, port: &str) -> Result> { + let mut reader = Reader::from_conf(format!("ws::addr={host}:{port};"))?; + let mut cursor = reader + .prepare(format!("SELECT symbol, price, amount FROM {TABLE}")) + .execute()?; + Ok(cursor.fetch_all_polars()?) +} + +fn main() -> Result<(), Box> { + let host = std::env::args() + .nth(1) + .unwrap_or_else(|| "127.0.0.1".to_string()); + let port = std::env::args() + .nth(2) + .unwrap_or_else(|| "9000".to_string()); + + let df = build_df(); + println!("==== INGEST ===="); + println!("table: {TABLE}"); + println!("shape: {:?} (rows × cols)", df.shape()); + println!("schema: {:?}", df.schema()); + println!("{df}"); + + ingest(&host, &port, &df)?; + println!( + "✓ flushed {} rows over QWP/WS to {host}:{port}\n", + df.height() + ); + + println!("==== READ-BACK ===="); + let back = read_back(&host, &port)?; + println!("shape: {:?} (rows × cols)", back.shape()); + println!("schema: {:?}", back.schema()); + println!("n_chunks per column:"); + for col in back.columns() { + println!(" {:>8} → {} chunk(s)", col.name(), col.n_chunks()); + } + println!("{back}"); + + Ok(()) +} diff --git a/questdb-rs/src/egress/arrow/convert.rs b/questdb-rs/src/egress/arrow/convert.rs new file mode 100644 index 00000000..b7dd5783 --- /dev/null +++ b/questdb-rs/src/egress/arrow/convert.rs @@ -0,0 +1,808 @@ +/******************************************************************************* + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2025 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +//! `DecodedBatch` → `arrow_array::RecordBatch` conversion. + +use std::collections::HashMap; +use std::sync::Arc; + +use aligned_vec::{AVec, ConstAlign}; +use arrow_array::{ + Array, ArrayRef, BinaryArray, BooleanArray, Decimal64Array, Decimal128Array, Decimal256Array, + DictionaryArray, FixedSizeBinaryArray, Int8Array, Int16Array, Int32Array, Int64Array, + ListArray, RecordBatch, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, +}; +use arrow_buffer::{Buffer, NullBuffer}; +use arrow_data::ArrayDataBuilder; +use arrow_schema::{ArrowError, DataType, Field, Schema as ArrowSchema, TimeUnit}; +use bytes::Bytes; + +use crate::egress::arrow::schema::to_arrow_export; +use crate::egress::column_kind::ColumnKind; +use crate::egress::decoder::{ArrayBuffers, ColumnBuffer, DecodedBatch, DecodedColumn}; +use crate::egress::error::{Error, Result, fmt}; +use crate::egress::schema::Schema; +use crate::egress::symbol_dict::SymbolDict; + +type ABytes = AVec>; + +// `Bytes::from_owner` requires the owner to be `Send + Sync + 'static`. +// arrow-rs's RecordBatch can be dropped on any thread (Python consumers +// release on a worker pool), so the AVec we hand it must satisfy these +// bounds. A future aligned-vec release that adds a !Send field would +// silently break the FFI export path — this static check fails to +// compile if that happens. +const _: fn() = || { + fn assert_send_sync_static() {} + assert_send_sync_static::(); +}; + +/// Working buffers reused across SYMBOL columns in one batch. Reuses the +/// remap HashMap allocation per `batch_to_record_batch` call so a wide +/// batch with N SYMBOL columns does not pay N independent `HashMap::new()` +/// costs. The hasher is `std::collections::hash_map::RandomState` — +/// changing to a u32-tuned hasher is a follow-up. +#[derive(Default)] +struct SymbolBuildScratch { + remap: HashMap, +} + +pub(crate) fn batch_to_record_batch( + schema_ref: Arc, + egress_schema: &Schema, + batch: DecodedBatch, + dict: &SymbolDict, +) -> Result { + let DecodedBatch { + row_count, columns, .. + } = batch; + if columns.len() != schema_ref.fields().len() { + return Err(fmt!( + ProtocolError, + "schema/batch column count mismatch: schema={} batch={}", + schema_ref.fields().len(), + columns.len() + )); + } + let mut arrays: Vec = Vec::with_capacity(columns.len()); + let mut sym_scratch = SymbolBuildScratch::default(); + for (idx, decoded) in columns.into_iter().enumerate() { + let field = schema_ref.field(idx); + let kind = egress_schema + .column(idx) + .map(|c| c.kind) + .ok_or_else(|| fmt!(InvalidApiCall, "egress schema missing column {}", idx))?; + arrays.push(column_to_array( + field, + kind, + decoded, + row_count, + dict, + &mut sym_scratch, + )?); + } + RecordBatch::try_new(schema_ref, arrays).map_err(|e| to_arrow_export(e.to_string())) +} + +fn column_to_array( + field: &Field, + kind: ColumnKind, + decoded: DecodedColumn, + row_count: usize, + dict: &SymbolDict, + sym_scratch: &mut SymbolBuildScratch, +) -> Result { + Ok(match (kind, decoded) { + (ColumnKind::Boolean, DecodedColumn::Boolean(buf)) => { + boolean_array(buf, row_count).map(|a| Arc::new(a) as ArrayRef)? + } + (ColumnKind::Byte, DecodedColumn::Byte(buf)) => { + primitive_array(buf, row_count, DataType::Int8)? + } + (ColumnKind::Short, DecodedColumn::Short(buf)) => { + primitive_array(buf, row_count, DataType::Int16)? + } + (ColumnKind::Int, DecodedColumn::Int(buf)) => { + primitive_array(buf, row_count, DataType::Int32)? + } + (ColumnKind::Long, DecodedColumn::Long(buf)) => { + primitive_array(buf, row_count, DataType::Int64)? + } + (ColumnKind::Float, DecodedColumn::Float(buf)) => { + primitive_array(buf, row_count, DataType::Float32)? + } + (ColumnKind::Double, DecodedColumn::Double(buf)) => { + primitive_array(buf, row_count, DataType::Float64)? + } + (ColumnKind::Char, DecodedColumn::Char(buf)) => { + primitive_array(buf, row_count, DataType::UInt16)? + } + (ColumnKind::Ipv4, DecodedColumn::Ipv4(buf)) => { + primitive_array(buf, row_count, DataType::UInt32)? + } + (ColumnKind::Timestamp, DecodedColumn::Timestamp(buf)) => { + timestamp_array(buf, row_count, TimeUnit::Microsecond)? + } + (ColumnKind::TimestampNanos, DecodedColumn::TimestampNanos(buf)) => { + timestamp_array(buf, row_count, TimeUnit::Nanosecond)? + } + (ColumnKind::Date, DecodedColumn::Date(buf)) => { + timestamp_array(buf, row_count, TimeUnit::Millisecond)? + } + (ColumnKind::Uuid, DecodedColumn::Uuid(buf)) => fixed_bytes_array(buf, row_count, 16)?, + (ColumnKind::Long256, DecodedColumn::Long256(buf)) => { + fixed_bytes_array(buf, row_count, 32)? + } + (ColumnKind::Decimal64, DecodedColumn::Decimal64 { buffer, scale }) => { + decimal_array(buffer, row_count, DataType::Decimal64(18, scale))? + } + (ColumnKind::Decimal128, DecodedColumn::Decimal128 { buffer, scale }) => { + decimal_array(buffer, row_count, DataType::Decimal128(38, scale))? + } + (ColumnKind::Decimal256, DecodedColumn::Decimal256 { buffer, scale }) => { + decimal_array(buffer, row_count, DataType::Decimal256(76, scale))? + } + ( + ColumnKind::Varchar, + DecodedColumn::Varchar { + offsets, + data, + validity, + }, + ) => varlen_string_array(field, offsets, data, validity, row_count)?, + ( + ColumnKind::Binary, + DecodedColumn::Binary { + offsets, + data, + validity, + }, + ) => varlen_binary_array(field, offsets, data, validity, row_count)?, + ( + ColumnKind::Geohash, + DecodedColumn::Geohash { + buffer, + byte_width, + precision_bits, + }, + ) => geohash_array(buffer, byte_width, precision_bits, row_count)?, + ( + ColumnKind::Symbol, + DecodedColumn::Symbol { + codes, + validity, + local_dict, + }, + ) => { + let active = local_dict.as_ref().unwrap_or(dict); + symbol_array(codes, validity, active, row_count, sym_scratch)? + } + (ColumnKind::DoubleArray, DecodedColumn::DoubleArray(b)) => { + array_column_to_arrow(field, b, row_count, ArrayLeaf::Float64)? + } + (ColumnKind::LongArray, DecodedColumn::LongArray(b)) => { + array_column_to_arrow(field, b, row_count, ArrayLeaf::Int64)? + } + (kind, decoded) => { + return Err(fmt!( + ProtocolError, + "kind/decoded mismatch: kind={:?} variant={:?}", + kind, + decoded + )); + } + }) +} + +fn primitive_array(buf: ColumnBuffer, row_count: usize, dtype: DataType) -> Result { + let nulls = buffer_null_buffer(&buf.validity, row_count)?; + let values = buffer_to_arrow(&buf.values); + let data = ArrayDataBuilder::new(dtype) + .len(row_count) + .add_buffer(values) + .nulls(nulls) + .align_buffers(true) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + Ok(arrow_array::make_array(data)) +} + +fn decimal_array(buf: ColumnBuffer, row_count: usize, dtype: DataType) -> Result { + let nulls = buffer_null_buffer(&buf.validity, row_count)?; + let values = buffer_to_arrow(&buf.values); + let data = ArrayDataBuilder::new(dtype.clone()) + .len(row_count) + .add_buffer(values) + .nulls(nulls) + .align_buffers(true) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + Ok(match dtype { + DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef, + DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef, + DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef, + _ => unreachable!(), + }) +} + +fn timestamp_array(buf: ColumnBuffer, row_count: usize, unit: TimeUnit) -> Result { + let nulls = buffer_null_buffer(&buf.validity, row_count)?; + let values = buffer_to_arrow(&buf.values); + let dtype = DataType::Timestamp(unit, Some(Arc::from("UTC"))); + let data = ArrayDataBuilder::new(dtype) + .len(row_count) + .add_buffer(values) + .nulls(nulls) + .align_buffers(true) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + let arr: ArrayRef = match unit { + TimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from(data)), + TimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from(data)), + TimeUnit::Millisecond => Arc::new(TimestampMillisecondArray::from(data)), + other => { + return Err(fmt!( + ProtocolError, + "unsupported timestamp TimeUnit on egress: {:?}", + other + )); + } + }; + Ok(arr) +} + +fn fixed_bytes_array(buf: ColumnBuffer, row_count: usize, n: i32) -> Result { + let nulls = buffer_null_buffer(&buf.validity, row_count)?; + let values = buffer_to_arrow(&buf.values); + let data = ArrayDataBuilder::new(DataType::FixedSizeBinary(n)) + .len(row_count) + .add_buffer(values) + .nulls(nulls) + .align_buffers(true) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + Ok(Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef) +} + +fn varlen_string_array( + _field: &Field, + offsets: Vec, + data: Bytes, + validity: Option, + row_count: usize, +) -> Result { + let nulls = bytes_null_buffer(&validity, row_count)?; + let off = offsets_i32(&offsets)?; + let data = ArrayDataBuilder::new(DataType::Utf8) + .len(row_count) + .add_buffer(Buffer::from(bytes_from_avec(off))) + .add_buffer(bytes_to_arrow(data)) + .nulls(nulls) + .align_buffers(true) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + Ok(Arc::new(StringArray::from(data)) as ArrayRef) +} + +fn varlen_binary_array( + _field: &Field, + offsets: Vec, + data: Bytes, + validity: Option, + row_count: usize, +) -> Result { + let nulls = bytes_null_buffer(&validity, row_count)?; + let off = offsets_i32(&offsets)?; + let data = ArrayDataBuilder::new(DataType::Binary) + .len(row_count) + .add_buffer(Buffer::from(bytes_from_avec(off))) + .add_buffer(bytes_to_arrow(data)) + .nulls(nulls) + .align_buffers(true) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + Ok(Arc::new(BinaryArray::from(data)) as ArrayRef) +} + +fn boolean_array(buf: ColumnBuffer, row_count: usize) -> Result { + let nulls = buffer_null_buffer(&buf.validity, row_count)?; + if buf.values.len() < row_count { + return Err(fmt!( + ProtocolError, + "boolean wire payload truncated: have {} bytes, need {}", + buf.values.len(), + row_count + )); + } + let mut packed = ABytes::with_capacity(64, row_count.div_ceil(8)); + packed.resize(row_count.div_ceil(8), 0); + for (i, &b) in buf.values.iter().take(row_count).enumerate() { + if b != 0 { + packed[i >> 3] |= 1u8 << (i & 7); + } + } + let buf = Buffer::from(bytes_from_avec(packed)); + let data = ArrayDataBuilder::new(DataType::Boolean) + .len(row_count) + .add_buffer(buf) + .nulls(nulls) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + Ok(BooleanArray::from(data)) +} + +fn geohash_array( + buf: ColumnBuffer, + byte_width: u8, + precision_bits: u8, + row_count: usize, +) -> Result { + let nulls = buffer_null_buffer(&buf.validity, row_count)?; + let (dtype, target_width) = match precision_bits { + 1..=7 => (DataType::Int8, 1usize), + 8..=15 => (DataType::Int16, 2), + 16..=31 => (DataType::Int32, 4), + 32..=60 => (DataType::Int64, 8), + other => { + return Err(fmt!( + ProtocolError, + "geohash precision_bits {} not in 1..=60", + other + )); + } + }; + let bw = byte_width as usize; + let required = row_count + .checked_mul(bw) + .ok_or_else(|| fmt!(ProtocolError, "geohash payload size overflows usize"))?; + if buf.values.len() < required { + return Err(fmt!( + ProtocolError, + "geohash wire payload truncated: have {} bytes, need row_count={} * byte_width={} = {}", + buf.values.len(), + row_count, + bw, + required + )); + } + let values_buf = if bw == target_width { + buffer_to_arrow(&buf.values) + } else if bw < target_width { + widen_zero_extend(&buf.values, bw, target_width, row_count) + } else { + return Err(fmt!( + ProtocolError, + "geohash wire byte_width {} exceeds Arrow target width {} for precision_bits {}", + byte_width, + target_width, + precision_bits + )); + }; + let data = ArrayDataBuilder::new(dtype.clone()) + .len(row_count) + .add_buffer(values_buf) + .nulls(nulls) + .align_buffers(true) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + Ok(match dtype { + DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef, + DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef, + DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef, + DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef, + _ => unreachable!(), + }) +} + +fn widen_zero_extend(src: &Bytes, src_width: usize, dst_width: usize, row_count: usize) -> Buffer { + let mut out = ABytes::with_capacity(64, row_count * dst_width); + out.resize(row_count * dst_width, 0); + for r in 0..row_count { + let s = r * src_width; + let d = r * dst_width; + out[d..d + src_width].copy_from_slice(&src[s..s + src_width]); + } + Buffer::from(bytes_from_avec(out)) +} + +fn symbol_array( + codes: Vec, + validity: Option, + dict: &SymbolDict, + row_count: usize, + scratch: &mut SymbolBuildScratch, +) -> Result { + let nulls = bytes_null_buffer(&validity, row_count)?; + scratch.remap.clear(); + if scratch.remap.capacity() < codes.len().min(64) { + scratch + .remap + .reserve(codes.len().min(64) - scratch.remap.capacity()); + } + let remap = &mut scratch.remap; + let mut union_offsets: Vec = Vec::with_capacity(codes.len().min(64) + 1); + union_offsets.push(0); + let mut union_bytes: ABytes = ABytes::new(64); + let mut dense = ABytes::with_capacity(64, codes.len() * 4); + dense.resize(codes.len() * 4, 0); + + fn resolve( + code: u32, + remap: &mut HashMap, + union_offsets: &mut Vec, + union_bytes: &mut ABytes, + dict: &SymbolDict, + ) -> Result { + if let Some(&dense_code) = remap.get(&code) { + return Ok(dense_code); + } + let s = dict + .get(code) + .ok_or_else(|| fmt!(ProtocolError, "symbol code {} not in dict", code))?; + union_bytes.extend_from_slice(s.as_bytes()); + let next_off = union_bytes.len() as i32; + union_offsets.push(next_off); + let assigned = (union_offsets.len() - 2) as u32; + remap.insert(code, assigned); + Ok(assigned) + } + + match nulls.as_ref() { + None => { + for (row, &code) in codes.iter().enumerate() { + let dense_code = resolve( + code, + &mut *remap, + &mut union_offsets, + &mut union_bytes, + dict, + )?; + let base = row * 4; + dense[base..base + 4].copy_from_slice(&dense_code.to_le_bytes()); + } + } + Some(n) => { + for row in n.valid_indices() { + let code = codes[row]; + let dense_code = resolve( + code, + &mut *remap, + &mut union_offsets, + &mut union_bytes, + dict, + )?; + let base = row * 4; + dense[base..base + 4].copy_from_slice(&dense_code.to_le_bytes()); + } + } + } + + let mut union_offsets_avec = ABytes::with_capacity(64, union_offsets.len() * 4); + for off in &union_offsets { + union_offsets_avec.extend_from_slice(&off.to_le_bytes()); + } + let values_data = ArrayDataBuilder::new(DataType::Utf8) + .len(union_offsets.len() - 1) + .add_buffer(Buffer::from(bytes_from_avec(union_offsets_avec))) + .add_buffer(Buffer::from(bytes_from_avec(union_bytes))) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + let values = arrow_array::StringArray::from(values_data); + let keys_buf = Buffer::from(bytes_from_avec(dense)); + let dict_data = ArrayDataBuilder::new(DataType::Dictionary( + Box::new(DataType::UInt32), + Box::new(DataType::Utf8), + )) + .len(row_count) + .add_buffer(keys_buf) + .add_child_data(values.into_data()) + .nulls(nulls) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + Ok( + Arc::new(DictionaryArray::::from( + dict_data, + )) as ArrayRef, + ) +} + +#[derive(Clone, Copy)] +enum ArrayLeaf { + Float64, + Int64, +} + +fn array_column_to_arrow( + field: &Field, + b: ArrayBuffers, + row_count: usize, + leaf: ArrayLeaf, +) -> Result { + let ArrayBuffers { + data_offsets, + data, + shapes, + shape_offsets, + validity, + } = b; + let nulls = bytes_null_buffer(&validity, row_count)?; + let leaf_dtype = match leaf { + ArrayLeaf::Float64 => DataType::Float64, + ArrayLeaf::Int64 => DataType::Int64, + }; + let elem_size = 8usize; + if !data.len().is_multiple_of(elem_size) { + return Err(to_arrow_export(format!( + "ARRAY wire data length {} not a multiple of element size {}", + data.len(), + elem_size + ))); + } + let total_elements = data.len() / elem_size; + if let Some(&last_off) = data_offsets.last() + && last_off as usize != data.len() + { + return Err(to_arrow_export(format!( + "ARRAY data_offsets tail {} disagrees with data length {}", + last_off, + data.len() + ))); + } + let ndim = ndim_from_field(field)?; + let leaf_buf = bytes_to_arrow(data); + let leaf_data = ArrayDataBuilder::new(leaf_dtype) + .len(total_elements) + .add_buffer(leaf_buf) + .align_buffers(true) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + let leaf_array: ArrayRef = match leaf { + ArrayLeaf::Float64 => Arc::new(arrow_array::Float64Array::from(leaf_data)), + ArrayLeaf::Int64 => Arc::new(arrow_array::Int64Array::from(leaf_data)), + }; + let per_level_counts = compute_per_level_counts(&shapes, &shape_offsets, ndim, row_count)?; + nest_lists(field, leaf_array, per_level_counts, nulls, ndim) +} + +fn ndim_from_field(field: &Field) -> Result { + fn depth(dt: &DataType, acc: usize) -> usize { + match dt { + DataType::List(inner) | DataType::LargeList(inner) => depth(inner.data_type(), acc + 1), + _ => acc, + } + } + let d = depth(field.data_type(), 0); + if d == 0 { + return Err(fmt!( + InvalidApiCall, + "expected nested list field, got {:?}", + field.data_type() + )); + } + Ok(d) +} + +fn compute_per_level_counts( + shapes: &[u32], + shape_offsets: &[u32], + ndim: usize, + row_count: usize, +) -> Result>> { + let mut levels: Vec> = vec![Vec::new(); ndim]; + for row in 0..row_count { + let lo = *shape_offsets + .get(row) + .ok_or_else(|| fmt!(ProtocolError, "shape_offsets missing row {}", row))? + as usize; + let hi = *shape_offsets.get(row + 1).ok_or_else(|| { + fmt!( + ProtocolError, + "shape_offsets missing row {} terminator", + row + ) + })? as usize; + if hi < lo || hi > shapes.len() { + return Err(fmt!( + ProtocolError, + "row {} shape range [{}, {}) out of shapes len {}", + row, + lo, + hi, + shapes.len() + )); + } + let span = hi - lo; + if span == 0 { + for level in &mut levels { + level.push(0); + } + continue; + } + if span != ndim { + return Err(fmt!( + ProtocolError, + "row {} has shape len {} expected ndim {}", + row, + span, + ndim + )); + } + let row_shape = &shapes[lo..hi]; + let mut group_count: u32 = 1; + for (level, &dim) in row_shape.iter().enumerate() { + if level == 0 { + levels[0].push(dim); + } else { + for _ in 0..group_count { + levels[level].push(dim); + } + } + group_count = group_count.checked_mul(dim).ok_or_else(|| { + fmt!( + ProtocolError, + "row {} shape product overflows u32 at level {}", + row, + level + ) + })?; + } + } + Ok(levels) +} + +fn nest_lists( + field: &Field, + leaf: ArrayRef, + per_level_counts: Vec>, + outer_nulls: Option, + ndim: usize, +) -> Result { + let mut current = leaf; + let mut current_dtype = leaf_dtype_at_depth(field.data_type(), ndim); + for level in (1..ndim).rev() { + let counts = &per_level_counts[level]; + let offsets = counts_to_offsets_i32(counts)?; + let next_field = Arc::new(Field::new("item", current_dtype, true)); + let dtype = DataType::List(next_field); + let data = ArrayDataBuilder::new(dtype.clone()) + .len(counts.len()) + .add_buffer(Buffer::from(bytes_from_avec(offsets))) + .add_child_data(current.to_data()) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + current = Arc::new(ListArray::from(data)) as ArrayRef; + current_dtype = dtype; + } + let counts0 = &per_level_counts[0]; + let outer_offsets = counts_to_offsets_i32(counts0)?; + let outer_field = Arc::new(Field::new("item", current_dtype, true)); + let outer_dtype = DataType::List(outer_field); + let data = ArrayDataBuilder::new(outer_dtype) + .len(counts0.len()) + .add_buffer(Buffer::from(bytes_from_avec(outer_offsets))) + .add_child_data(current.to_data()) + .nulls(outer_nulls) + .build() + .map_err(|e| to_arrow_export(e.to_string()))?; + Ok(Arc::new(ListArray::from(data)) as ArrayRef) +} + +fn leaf_dtype_at_depth(dt: &DataType, depth: usize) -> DataType { + if depth == 0 { + return dt.clone(); + } + match dt { + DataType::List(inner) | DataType::LargeList(inner) => { + leaf_dtype_at_depth(inner.data_type(), depth - 1) + } + _ => dt.clone(), + } +} + +/// Returns Err on overflow. Per the server-side per-batch wire cap +/// (`MAX_BATCH_WIRE_BYTES = MAX_ZSTD_DECOMPRESSED = 64 MiB`) and +/// `MAX_ARRAY_ELEMENTS_PER_ROW = 16M`, the cumulative element count for +/// any List level in a single batch is bounded by ~8M, far below +/// i32::MAX. The error path is defensive. +fn counts_to_offsets_i32(counts: &[u32]) -> Result { + let mut out = ABytes::with_capacity(64, (counts.len() + 1) * 4); + let mut running: i32 = 0; + out.extend_from_slice(&running.to_le_bytes()); + for &c in counts { + running = running + .checked_add(c as i32) + .ok_or_else(|| fmt!(ProtocolError, "List offset overflows i32"))?; + out.extend_from_slice(&running.to_le_bytes()); + } + Ok(out) +} + +fn offsets_i32(offsets: &[u32]) -> Result { + let mut out = ABytes::with_capacity(64, offsets.len() * 4); + for &o in offsets { + if o > i32::MAX as u32 { + return Err(fmt!(ProtocolError, "varlen offset {} exceeds i32::MAX", o)); + } + out.extend_from_slice(&(o as i32).to_le_bytes()); + } + Ok(out) +} + +fn buffer_to_arrow(b: &Bytes) -> Buffer { + Buffer::from(b.clone()) +} + +fn bytes_to_arrow(b: Bytes) -> Buffer { + Buffer::from(b) +} + +fn bytes_from_avec(v: ABytes) -> Bytes { + Bytes::from_owner(v) +} + +fn buffer_null_buffer(validity: &Option, row_count: usize) -> Result> { + bytes_null_buffer(validity, row_count) +} + +fn bytes_null_buffer(validity: &Option, row_count: usize) -> Result> { + let bytes = match validity { + None => return Ok(None), + Some(b) => b, + }; + let needed = row_count.div_ceil(8); + if bytes.len() < needed { + return Err(fmt!( + ProtocolError, + "validity bitmap is {} bytes but row_count={} needs at least {}", + bytes.len(), + row_count, + needed + )); + } + let mut inverted = ABytes::with_capacity(64, needed); + inverted.extend_from_slice(&bytes[..needed]); + for b in inverted.iter_mut() { + *b = !*b; + } + // Mask post-inversion trailing bits — pads were 0, would flip to 1 + // (=valid) and pollute downstream raw-bitmap hashers/copiers. + let trailing_bits = row_count % 8; + if trailing_bits != 0 + && let Some(last) = inverted.last_mut() + { + *last &= (1u8 << trailing_bits) - 1; + } + Ok(Some(NullBuffer::new(arrow_buffer::BooleanBuffer::new( + Buffer::from(bytes_from_avec(inverted)), + 0, + row_count, + )))) +} + +/// Boxes a QuestDB [`Error`] as an [`ArrowError::ExternalError`]. +/// Recover via [`try_downcast_questdb`](super::reader::try_downcast_questdb). +pub fn external_arrow_error(e: Error) -> ArrowError { + ArrowError::ExternalError(Box::new(e)) +} diff --git a/questdb-rs/src/egress/arrow/mod.rs b/questdb-rs/src/egress/arrow/mod.rs new file mode 100644 index 00000000..5d6f92f2 --- /dev/null +++ b/questdb-rs/src/egress/arrow/mod.rs @@ -0,0 +1,48 @@ +//! Apache Arrow egress adapter. See `doc/QUESTDB_ARROW_INTEGRATION_DESIGN.md`. + +pub(crate) mod convert; +#[cfg(feature = "polars")] +pub mod polars; +pub(crate) mod reader; +pub(crate) mod schema; + +#[cfg(test)] +mod tests; + +pub use convert::external_arrow_error; +#[cfg(feature = "polars")] +pub use polars::CursorPolarsIter; +pub use reader::{CursorRecordBatchReader, has_tentative_array, try_downcast_questdb}; + +pub(crate) use convert::batch_to_record_batch; +pub(crate) use schema::{batch_arrow_schema, schemas_equal}; + +/// Field-metadata keys this client writes into the `Arc` of +/// every column it emits via the Arrow egress adapter, plus the +/// standard Arrow extension-name key. Read by `classify` on ingress +/// and by mid-stream drift detection (`schemas_equal`). +pub mod metadata { + /// Carries the QuestDB native column type when the Arrow type + /// alone is ambiguous (e.g. `Int8` → `byte`, `UInt16` → `char`). + pub const COLUMN_TYPE: &str = "questdb.column_type"; + /// `"true"` on the field that is the table's designated timestamp. + /// Informational only — not load-bearing for drift detection. + pub const DESIGNATED_TIMESTAMP: &str = "questdb.designated_timestamp"; + /// `"asc"` / `"desc"`. Informational only. + pub const DESIGNATED_TIMESTAMP_ORDER: &str = "questdb.designated_timestamp_order"; + /// Geohash precision in bits (1..=60). Required when the QuestDB + /// native column kind is `geohash*`. + pub const GEOHASH_BITS: &str = "questdb.geohash_bits"; + /// Marks a UTF-8 / dictionary column as the QuestDB `SYMBOL` kind. + pub const SYMBOL: &str = "questdb.symbol"; + /// Native ARRAY dimensionality. + pub const ARRAY_DIM: &str = "questdb.array_dim"; + /// `"true"` when `ARRAY_DIM` is a placeholder from an empty batch; + /// drift detection accepts any opposite ndim until firmed up. + pub const ARRAY_DIM_TENTATIVE: &str = "questdb.array_dim_tentative"; + /// Standard Apache Arrow extension-name field-metadata key. + pub const ARROW_EXTENSION_NAME: &str = "ARROW:extension:name"; + /// Value used in [`ARROW_EXTENSION_NAME`] to mark a + /// `FixedSizeBinary(16)` column as the canonical Arrow UUID. + pub const EXT_ARROW_UUID: &str = "arrow.uuid"; +} diff --git a/questdb-rs/src/egress/arrow/polars.rs b/questdb-rs/src/egress/arrow/polars.rs new file mode 100644 index 00000000..50188473 --- /dev/null +++ b/questdb-rs/src/egress/arrow/polars.rs @@ -0,0 +1,276 @@ +//! Polars sub-feature: `RecordBatch ↔ DataFrame` via Arrow C Data Interface. + +use arrow_array::{Array, RecordBatch}; +use arrow_schema::SchemaRef; +use polars::frame::DataFrame; +use polars::prelude::{Column, IntoColumn, PlSmallStr, Series}; + +use crate::egress::Cursor; +use crate::egress::arrow::has_tentative_array; +use crate::egress::error::{Error, ErrorCode, Result, fmt}; + +// FFI cross-crate helpers in `crate::ingress::polars`. + +impl Cursor<'_> { + /// Decode one batch as a Polars [`DataFrame`]. `Ok(None)` on + /// stream end. + /// + /// This is the low-level per-batch entry point and does **not** + /// detect mid-stream Arrow schema drift; if a later batch's + /// schema differs from earlier ones the resulting DataFrames will + /// simply disagree on columns. Use + /// [`Cursor::iter_polars`](crate::egress::Cursor::iter_polars) + /// for a drift-checked iterator, or + /// [`Cursor::fetch_all_polars`] / [`Cursor::as_arrow_reader`] + /// for higher-level adapters that pin the schema on first batch. + pub fn next_polars(&mut self) -> Result> { + match self.next_arrow_batch_inner(None)? { + None => Ok(None), + Some(rb) => Ok(Some(record_batch_to_dataframe(rb)?)), + } + } + + /// Eagerly drain into one chunked Polars [`DataFrame`]. A stream + /// that yields a schema but no batches becomes an empty DataFrame; + /// only a stream without a schema (e.g. cancelled pre-prelude) + /// errors as `NoSchema`. Drift detection is inherited from + /// [`Cursor::iter_polars`]. + pub fn fetch_all_polars(&mut self) -> Result { + let mut iter = self.iter_polars()?; + let mut acc: Option = None; + for item in iter.by_ref() { + let df = item?; + acc = Some(match acc { + None => df, + Some(mut prev) => { + if prev.height() == 0 && prev.schema() != df.schema() { + df + } else { + prev.vstack_mut_owned(df) + .map_err(|e| fmt!(ArrowExport, "polars vstack failed: {}", e))?; + prev + } + } + }); + } + let schema = iter.schema(); + match acc { + Some(df) => Ok(df), + None => record_batch_to_dataframe(RecordBatch::new_empty(schema)), + } + } +} + +/// Drift-checked iterator yielding Polars [`DataFrame`]s, one per +/// QWP batch. Built by [`Cursor::iter_polars`]. Snapshots the first +/// batch's Arrow schema at construction and poisons (terminates) on +/// mid-stream schema drift. +pub struct CursorPolarsIter<'r, 'c> { + cursor: &'c mut Cursor<'r>, + schema: SchemaRef, + pending: Option, + poisoned: bool, +} + +impl<'r, 'c> CursorPolarsIter<'r, 'c> { + pub(crate) fn new(cursor: &'c mut Cursor<'r>) -> Result { + let first = cursor.next_arrow_batch_inner(None)?.ok_or_else(|| { + Error::new( + ErrorCode::NoSchema, + "no batch produced; nothing to snapshot", + ) + })?; + let schema = first.schema(); + Ok(Self { + cursor, + schema, + pending: Some(first), + poisoned: false, + }) + } + + /// First batch's schema. Upgrades on tentative→firm ndim + /// (see [`has_tentative_array`]). + pub fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} + +impl Iterator for CursorPolarsIter<'_, '_> { + type Item = Result; + + fn next(&mut self) -> Option { + if self.poisoned { + return None; + } + let rb = if let Some(rb) = self.pending.take() { + rb + } else { + match self.cursor.next_arrow_batch_inner(Some(&self.schema)) { + Ok(Some(rb)) => { + if has_tentative_array(&self.schema) && rb.schema() != self.schema { + self.poisoned = true; + return Some(Err(Error::new( + ErrorCode::SchemaDrift, + "tentative→firm ndim upgrade mid-stream; the \ + iterator pins the first batch's schema. Use \ + Cursor::next_polars to handle drift explicitly", + ))); + } + rb + } + Ok(None) => { + self.poisoned = true; + return None; + } + Err(e) => { + self.poisoned = true; + return Some(Err(e)); + } + } + }; + let df = record_batch_to_dataframe(rb); + if df.is_err() { + self.poisoned = true; + } + Some(df) + } +} + +/// [`RecordBatch`] → Polars [`DataFrame`] via Arrow C Data Interface. +/// Zero-copy for primitive/string/binary. [`ErrorCode::ArrowExport`] on +/// handoff failure. +pub fn record_batch_to_dataframe(rb: RecordBatch) -> Result { + let schema = rb.schema(); + let row_count = rb.num_rows(); + let mut columns: Vec = Vec::with_capacity(rb.num_columns()); + for (col, field) in rb.columns().iter().zip(schema.fields().iter()) { + let array_data = col.to_data(); + let (rs_array, rs_schema) = arrow::ffi::to_ffi(&array_data).map_err(|e| { + fmt!( + ArrowExport, + "to_ffi failed for column '{}': {}", + field.name(), + e + ) + })?; + let pa_schema = unsafe { crate::ingress::polars::rs_schema_into_pa(rs_schema) }; + let pa_array = unsafe { crate::ingress::polars::rs_array_into_pa(rs_array) }; + let pa_field = + unsafe { polars_arrow::ffi::import_field_from_c(&pa_schema) }.map_err(|e| { + fmt!( + ArrowExport, + "import_field_from_c('{}'): {}", + field.name(), + e + ) + })?; + let pa_array_box = + unsafe { polars_arrow::ffi::import_array_from_c(pa_array, pa_field.dtype) }.map_err( + |e| { + fmt!( + ArrowExport, + "import_array_from_c('{}'): {}", + field.name(), + e + ) + }, + )?; + let name: PlSmallStr = field.name().as_str().into(); + let series = Series::from_arrow(name, pa_array_box) + .map_err(|e| fmt!(ArrowExport, "Series::from_arrow('{}'): {}", field.name(), e))?; + columns.push(series.into_column()); + } + DataFrame::new(row_count, columns) + .map_err(|e| fmt!(ArrowExport, "DataFrame::new failed: {}", e)) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + + use arrow_array::builder::{Float64Builder, Int64Builder, StringBuilder}; + use arrow_array::{ArrayRef, RecordBatch}; + use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + + fn rb_mixed() -> RecordBatch { + let mut ii = Int64Builder::new(); + ii.append_value(1); + ii.append_value(2); + ii.append_value(3); + let mut ff = Float64Builder::new(); + ff.append_value(1.5); + ff.append_value(2.5); + ff.append_value(3.5); + let mut ss = StringBuilder::new(); + ss.append_value("a"); + ss.append_value("b"); + ss.append_value("c"); + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("i", DataType::Int64, false), + Field::new("f", DataType::Float64, false), + Field::new("s", DataType::Utf8, false), + ])); + RecordBatch::try_new( + schema, + vec![ + Arc::new(ii.finish()) as ArrayRef, + Arc::new(ff.finish()) as ArrayRef, + Arc::new(ss.finish()) as ArrayRef, + ], + ) + .unwrap() + } + + #[test] + fn record_batch_to_dataframe_preserves_column_count_and_height() { + let rb = rb_mixed(); + let df = record_batch_to_dataframe(rb).unwrap(); + assert_eq!(df.width(), 3); + assert_eq!(df.height(), 3); + let cols = df.columns(); + assert_eq!(cols[0].name().as_str(), "i"); + assert_eq!(cols[1].name().as_str(), "f"); + assert_eq!(cols[2].name().as_str(), "s"); + } + + #[test] + fn record_batch_to_dataframe_preserves_int_values() { + let rb = rb_mixed(); + let df = record_batch_to_dataframe(rb).unwrap(); + let col = &df.columns()[0]; + let series = col.as_materialized_series(); + let i64s = series.i64().unwrap(); + assert_eq!(i64s.get(0), Some(1)); + assert_eq!(i64s.get(1), Some(2)); + assert_eq!(i64s.get(2), Some(3)); + } + + #[test] + fn record_batch_to_dataframe_preserves_string_values() { + let rb = rb_mixed(); + let df = record_batch_to_dataframe(rb).unwrap(); + let col = &df.columns()[2]; + let series = col.as_materialized_series(); + let s = series.str().unwrap(); + assert_eq!(s.get(0), Some("a")); + assert_eq!(s.get(1), Some("b")); + assert_eq!(s.get(2), Some("c")); + } + + #[test] + fn record_batch_to_dataframe_zero_rows_succeeds() { + let schema = Arc::new(ArrowSchema::new(vec![Field::new( + "v", + DataType::Int64, + false, + )])); + let mut ii = Int64Builder::new(); + let arr: ArrayRef = Arc::new(ii.finish()); + let rb = RecordBatch::try_new(schema, vec![arr]).unwrap(); + let df = record_batch_to_dataframe(rb).unwrap(); + assert_eq!(df.height(), 0); + assert_eq!(df.width(), 1); + } +} diff --git a/questdb-rs/src/egress/arrow/reader.rs b/questdb-rs/src/egress/arrow/reader.rs new file mode 100644 index 00000000..c2f7ced1 --- /dev/null +++ b/questdb-rs/src/egress/arrow/reader.rs @@ -0,0 +1,128 @@ +/******************************************************************************* + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2025 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +//! Streaming `RecordBatchReader` adapter over a [`Cursor`]. + +use arrow_array::{RecordBatch, RecordBatchReader}; +use arrow_schema::{ArrowError, SchemaRef}; + +use crate::egress::Cursor; +use crate::egress::arrow::convert::external_arrow_error; +use crate::egress::error::{Error, ErrorCode}; + +/// Adapter implementing [`arrow_array::RecordBatchReader`] over a +/// [`Cursor`]. Snapshots the first batch's Arrow schema at construction +/// and poisons on mid-stream schema drift. Failover semantics inherit +/// from [`Cursor::next_batch`](crate::egress::Cursor::next_batch). +pub struct CursorRecordBatchReader<'r, 'c> { + cursor: &'c mut Cursor<'r>, + schema: SchemaRef, + pending: Option, + poisoned: bool, +} + +impl<'r, 'c> CursorRecordBatchReader<'r, 'c> { + pub(crate) fn new(cursor: &'c mut Cursor<'r>) -> Result { + let first = cursor.next_arrow_batch_inner(None)?.ok_or_else(|| { + Error::new( + ErrorCode::NoSchema, + "no batch produced; nothing to snapshot", + ) + })?; + let schema = first.schema(); + Ok(Self { + cursor, + schema, + pending: Some(first), + poisoned: false, + }) + } + + /// Snapshotted schema. Same as the [`RecordBatchReader::schema`] + /// trait method, exposed for callers without the trait imported. + pub fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} + +impl Iterator for CursorRecordBatchReader<'_, '_> { + type Item = Result; + + fn next(&mut self) -> Option { + if self.poisoned { + return None; + } + if let Some(rb) = self.pending.take() { + return Some(Ok(rb)); + } + match self.cursor.next_arrow_batch_inner(Some(&self.schema)) { + Ok(Some(rb)) => { + if has_tentative_array(&self.schema) && rb.schema() != self.schema { + self.poisoned = true; + return Some(Err(external_arrow_error(Error::new( + ErrorCode::SchemaDrift, + "tentative→firm ndim upgrade is not representable in \ + RecordBatchReader (schema must be stable for the \ + reader's lifetime); use Cursor::next_arrow_batch \ + to handle drift explicitly", + )))); + } + Some(Ok(rb)) + } + Ok(None) => { + self.poisoned = true; + None + } + Err(e) => { + self.poisoned = true; + Some(Err(external_arrow_error(e))) + } + } + } +} + +/// True if any field carries [`metadata::ARRAY_DIM_TENTATIVE`](crate::egress::arrow::metadata::ARRAY_DIM_TENTATIVE). +/// Gates the tentative→firm ndim mid-stream upgrade. +pub fn has_tentative_array(schema: &SchemaRef) -> bool { + schema.fields().iter().any(|f| { + f.metadata() + .get(crate::egress::arrow::metadata::ARRAY_DIM_TENTATIVE) + .is_some_and(|v| v == "true") + }) +} + +impl RecordBatchReader for CursorRecordBatchReader<'_, '_> { + fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} + +/// Downcast an [`ArrowError`] produced by this adapter to the +/// underlying [`Error`]. Returns `None` for foreign Arrow errors. +pub fn try_downcast_questdb(err: &ArrowError) -> Option<&Error> { + match err { + ArrowError::ExternalError(boxed) => boxed.downcast_ref::(), + _ => None, + } +} diff --git a/questdb-rs/src/egress/arrow/schema.rs b/questdb-rs/src/egress/arrow/schema.rs new file mode 100644 index 00000000..feb16490 --- /dev/null +++ b/questdb-rs/src/egress/arrow/schema.rs @@ -0,0 +1,254 @@ +/******************************************************************************* + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2025 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +//! Arrow schema construction from `Schema` + first `DecodedBatch`. + +use std::collections::HashMap; +use std::sync::Arc; + +use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit}; + +use crate::egress::arrow::metadata::*; +use crate::egress::column_kind::ColumnKind; +use crate::egress::decoder::{DecodedBatch, DecodedColumn}; +use crate::egress::error::{Error, ErrorCode, Result, fmt}; +use crate::egress::schema::Schema; + +pub(crate) fn batch_arrow_schema(schema: &Schema, batch: &DecodedBatch) -> Result { + if schema.len() != batch.columns.len() { + return Err(fmt!( + ProtocolError, + "schema/batch column count mismatch: schema={} batch={}", + schema.len(), + batch.columns.len() + )); + } + let mut fields = Vec::with_capacity(schema.len()); + for (idx, col) in schema.columns().iter().enumerate() { + let decoded = &batch.columns[idx]; + fields.push(arrow_field(&col.name, col.kind, decoded)?); + } + Ok(ArrowSchema::new(fields)) +} + +pub(crate) fn schemas_equal(a: &ArrowSchema, b: &ArrowSchema) -> bool { + if a.fields().len() != b.fields().len() { + return false; + } + for (fa, fb) in a.fields().iter().zip(b.fields().iter()) { + if fa.name() != fb.name() || fa.is_nullable() != fb.is_nullable() { + return false; + } + let tentative_a = is_tentative_array(fa); + let tentative_b = is_tentative_array(fb); + if !tentative_a && !tentative_b && fa.data_type() != fb.data_type() { + return false; + } + for key in [COLUMN_TYPE, GEOHASH_BITS, SYMBOL, ARROW_EXTENSION_NAME] { + if fa.metadata().get(key) != fb.metadata().get(key) { + return false; + } + } + if !tentative_a + && !tentative_b + && fa.metadata().get(ARRAY_DIM) != fb.metadata().get(ARRAY_DIM) + { + return false; + } + } + true +} + +fn is_tentative_array(f: &Field) -> bool { + f.metadata() + .get(ARRAY_DIM_TENTATIVE) + .is_some_and(|v| v == "true") +} + +fn arrow_field(name: &str, kind: ColumnKind, decoded: &DecodedColumn) -> Result { + let (dtype, mut md) = match (kind, decoded) { + (ColumnKind::Boolean, _) => (DataType::Boolean, md_for(kind)), + (ColumnKind::Byte, _) => (DataType::Int8, md_for(kind)), + (ColumnKind::Short, _) => (DataType::Int16, md_for(kind)), + (ColumnKind::Int, _) => (DataType::Int32, md_for(kind)), + (ColumnKind::Long, _) => (DataType::Int64, md_for(kind)), + (ColumnKind::Float, _) => (DataType::Float32, md_for(kind)), + (ColumnKind::Double, _) => (DataType::Float64, md_for(kind)), + (ColumnKind::Char, _) => (DataType::UInt16, md_for(kind)), + (ColumnKind::Ipv4, _) => (DataType::UInt32, md_for(kind)), + (ColumnKind::Timestamp, _) => ( + DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("UTC"))), + md_for(kind), + ), + (ColumnKind::TimestampNanos, _) => ( + DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("UTC"))), + md_for(kind), + ), + (ColumnKind::Date, _) => ( + DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("UTC"))), + md_for(kind), + ), + (ColumnKind::Uuid, _) => { + let mut m = md_for(kind); + m.insert(ARROW_EXTENSION_NAME.into(), EXT_ARROW_UUID.into()); + (DataType::FixedSizeBinary(16), m) + } + (ColumnKind::Long256, _) => (DataType::FixedSizeBinary(32), md_for(kind)), + (ColumnKind::Symbol, _) => { + let mut m = md_for(kind); + m.insert(SYMBOL.into(), "true".into()); + ( + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), + m, + ) + } + (ColumnKind::Varchar, DecodedColumn::Varchar { .. }) => (DataType::Utf8, md_for(kind)), + (ColumnKind::Binary, DecodedColumn::Binary { .. }) => (DataType::Binary, md_for(kind)), + ( + ColumnKind::Geohash, + DecodedColumn::Geohash { + buffer: _, + byte_width: _, + precision_bits, + }, + ) => { + let dtype = geohash_dtype_for_precision(*precision_bits).ok_or_else(|| { + fmt!( + ProtocolError, + "geohash precision_bits {} not in 1..=60 for column '{}'", + precision_bits, + name + ) + })?; + let mut m = md_for(kind); + m.insert(GEOHASH_BITS.into(), precision_bits.to_string()); + (dtype, m) + } + (ColumnKind::Decimal64, DecodedColumn::Decimal64 { scale, .. }) => { + (DataType::Decimal64(18, *scale), md_for(kind)) + } + (ColumnKind::Decimal128, DecodedColumn::Decimal128 { scale, .. }) => { + (DataType::Decimal128(38, *scale), md_for(kind)) + } + (ColumnKind::Decimal256, DecodedColumn::Decimal256 { scale, .. }) => { + (DataType::Decimal256(76, *scale), md_for(kind)) + } + (ColumnKind::DoubleArray, DecodedColumn::DoubleArray(buf)) => build_array_field( + name, + kind, + DataType::Float64, + &buf.shapes, + &buf.shape_offsets, + )?, + (ColumnKind::LongArray, DecodedColumn::LongArray(buf)) => { + build_array_field(name, kind, DataType::Int64, &buf.shapes, &buf.shape_offsets)? + } + (other, _) => { + return Err(fmt!( + ProtocolError, + "arrow_field: column '{}' kind {:?} does not match decoded column variant", + name, + other + )); + } + }; + md.insert(COLUMN_TYPE.into(), kind.name().into()); + Ok(Field::new(name, dtype, true).with_metadata(md)) +} + +fn md_for(_kind: ColumnKind) -> HashMap { + HashMap::new() +} + +fn geohash_dtype_for_precision(precision_bits: u8) -> Option { + Some(match precision_bits { + 1..=7 => DataType::Int8, + 8..=15 => DataType::Int16, + 16..=31 => DataType::Int32, + 32..=60 => DataType::Int64, + _ => return None, + }) +} + +fn build_array_field( + name: &str, + kind: ColumnKind, + leaf: DataType, + shapes: &[u32], + shape_offsets: &[u32], +) -> Result<(DataType, HashMap)> { + let (ndim, tentative) = match ndim_from_shapes(shapes, shape_offsets)? { + Some(n) => (n, false), + None => (1, true), + }; + if ndim == 0 { + return Err(fmt!( + ProtocolError, + "array column '{}' has ndim=0; QuestDB ARRAY is always at least 1-D", + name + )); + } + let mut dtype = leaf; + for _ in 0..ndim { + dtype = DataType::List(Arc::new(Field::new("item", dtype, true))); + } + let mut md = md_for(kind); + md.insert(ARRAY_DIM.into(), ndim.to_string()); + if tentative { + md.insert(ARRAY_DIM_TENTATIVE.into(), "true".into()); + } + Ok((dtype, md)) +} + +fn ndim_from_shapes(shapes: &[u32], shape_offsets: &[u32]) -> Result> { + if shape_offsets.len() < 2 { + return Ok(None); + } + for w in shape_offsets.windows(2) { + let dims = w[1].checked_sub(w[0]).ok_or_else(|| { + fmt!( + ProtocolError, + "shape_offsets not monotonic: {} < {}", + w[1], + w[0] + ) + })? as usize; + if dims > 0 { + if dims > shapes.len() { + return Err(fmt!( + ProtocolError, + "shape_offsets points past shapes buffer (dim_count={}, shapes.len()={})", + dims, + shapes.len() + )); + } + return Ok(Some(dims)); + } + } + Ok(None) +} + +pub(crate) fn to_arrow_export(msg: impl Into) -> Error { + Error::new(ErrorCode::ArrowExport, msg.into()) +} diff --git a/questdb-rs/src/egress/arrow/tests.rs b/questdb-rs/src/egress/arrow/tests.rs new file mode 100644 index 00000000..eda86325 --- /dev/null +++ b/questdb-rs/src/egress/arrow/tests.rs @@ -0,0 +1,888 @@ +use std::sync::Arc; + +use arrow_array::Array; +use arrow_schema::{DataType, TimeUnit}; +use bytes::Bytes; + +use super::*; +use crate::egress::column_kind::ColumnKind; +use crate::egress::decoder::{ArrayBuffers, ColumnBuffer, DecodedBatch, DecodedColumn}; +use crate::egress::schema::{Schema, SchemaColumn}; +use crate::egress::symbol_dict::SymbolDict; + +fn buf(values: Vec, validity: Option>) -> ColumnBuffer { + ColumnBuffer { + values: Bytes::from(values), + validity: validity.map(Bytes::from), + } +} + +fn schema_of(cols: &[(&str, ColumnKind)]) -> Schema { + Schema::from_columns( + cols.iter() + .map(|(n, k)| SchemaColumn { + name: (*n).into(), + kind: *k, + }) + .collect(), + ) +} + +fn decoded_of(row_count: usize, columns: Vec) -> DecodedBatch { + DecodedBatch { + request_id: 1, + batch_seq: 0, + schema_id: 7, + row_count, + columns, + flags: 0, + } +} + +#[test] +fn long_column_roundtrip() { + let mut values = Vec::with_capacity(24); + for v in [1i64, -2, 0x0102_0304_0506_0708] { + values.extend_from_slice(&v.to_le_bytes()); + } + let s = schema_of(&[("v", ColumnKind::Long)]); + let b = decoded_of(3, vec![DecodedColumn::Long(buf(values, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int64); + let rb = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); + assert_eq!(rb.num_rows(), 3); + let col = rb + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(col.value(0), 1); + assert_eq!(col.value(1), -2); + assert_eq!(col.value(2), 0x0102_0304_0506_0708); +} + +#[test] +fn validity_inversion_runs_on_export() { + let mut values = Vec::with_capacity(32); + for v in [10i64, 20, 30, 40] { + values.extend_from_slice(&v.to_le_bytes()); + } + let qwp_bitmap = vec![0b0000_0010u8]; + let s = schema_of(&[("v", ColumnKind::Long)]); + let b = decoded_of(4, vec![DecodedColumn::Long(buf(values, Some(qwp_bitmap)))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + let rb = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); + let col = rb + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert!(col.is_valid(0)); + assert!(col.is_null(1)); + assert!(col.is_valid(2)); + assert!(col.is_valid(3)); +} + +#[test] +fn boolean_bit_packs_on_export() { + let values = vec![0u8, 1, 0, 1, 1]; + let s = schema_of(&[("b", ColumnKind::Boolean)]); + let b = decoded_of(5, vec![DecodedColumn::Boolean(buf(values, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::Boolean); + let rb = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); + let col = rb + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert!(!col.value(0)); + assert!(col.value(1)); + assert!(!col.value(2)); + assert!(col.value(3)); + assert!(col.value(4)); +} + +#[test] +fn timestamp_micros_carries_timezone() { + let mut values = Vec::with_capacity(16); + for v in [1_700_000_000_000_000i64, 1_700_000_000_001_000] { + values.extend_from_slice(&v.to_le_bytes()); + } + let s = schema_of(&[("ts", ColumnKind::Timestamp)]); + let b = decoded_of(2, vec![DecodedColumn::Timestamp(buf(values, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + match arrow_schema.field(0).data_type() { + DataType::Timestamp(TimeUnit::Microsecond, tz) => { + assert_eq!(tz.as_deref(), Some("UTC")); + } + other => panic!("expected Timestamp(µs, UTC), got {:?}", other), + } + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn varchar_zero_copy_path_under_2gb() { + let strings = ["hi", "", "yo"]; + let mut data = Vec::new(); + let mut offsets: Vec = vec![0]; + for s in &strings { + data.extend_from_slice(s.as_bytes()); + offsets.push(data.len() as u32); + } + let s = schema_of(&[("v", ColumnKind::Varchar)]); + let b = decoded_of( + 3, + vec![DecodedColumn::Varchar { + offsets, + data: Bytes::from(data), + validity: None, + }], + ); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::Utf8); + let rb = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); + let col = rb + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(col.value(0), "hi"); + assert_eq!(col.value(1), ""); + assert_eq!(col.value(2), "yo"); +} + +#[test] +fn binary_zero_copy_path_under_2gb() { + let blobs: &[&[u8]] = &[&[1, 2, 3], &[], &[0xFF, 0x00]]; + let mut data = Vec::new(); + let mut offsets: Vec = vec![0]; + for b in blobs { + data.extend_from_slice(b); + offsets.push(data.len() as u32); + } + let s = schema_of(&[("b", ColumnKind::Binary)]); + let batch = decoded_of( + 3, + vec![DecodedColumn::Binary { + offsets, + data: Bytes::from(data), + validity: None, + }], + ); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &batch).unwrap()); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::Binary); + let rb = batch_to_record_batch(arrow_schema, &s, batch, &SymbolDict::new()).unwrap(); + let col = rb + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(col.value(0), &[1, 2, 3]); + assert_eq!(col.value(1), &[] as &[u8]); + assert_eq!(col.value(2), &[0xFF, 0x00]); +} + +#[test] +fn uuid_field_carries_arrow_uuid_extension() { + let raw: Vec = (0..32u8).collect(); + let s = schema_of(&[("id", ColumnKind::Uuid)]); + let b = decoded_of(2, vec![DecodedColumn::Uuid(buf(raw, None))]); + let arrow_schema = batch_arrow_schema(&s, &b).unwrap(); + let field = arrow_schema.field(0); + assert_eq!(field.data_type(), &DataType::FixedSizeBinary(16)); + assert_eq!( + field + .metadata() + .get(metadata::ARROW_EXTENSION_NAME) + .map(String::as_str), + Some("arrow.uuid") + ); + assert_eq!( + field + .metadata() + .get(metadata::COLUMN_TYPE) + .map(String::as_str), + Some("uuid") + ); +} + +#[test] +fn symbol_built_with_union_dict_per_batch() { + let mut dict = SymbolDict::new(); + dict.apply_delta( + 0, + [b"AAPL".as_slice(), b"MSFT".as_slice(), b"GOOG".as_slice()], + ) + .unwrap(); + let codes: Vec = vec![0, 2, 0, 1]; + let s = schema_of(&[("sym", ColumnKind::Symbol)]); + let b = decoded_of( + 4, + vec![DecodedColumn::Symbol { + codes, + validity: None, + local_dict: None, + }], + ); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + match arrow_schema.field(0).data_type() { + DataType::Dictionary(k, v) => { + assert_eq!(**k, DataType::UInt32); + assert_eq!(**v, DataType::Utf8); + } + other => panic!("expected Dictionary(UInt32, Utf8), got {:?}", other), + } + let rb = batch_to_record_batch(arrow_schema, &s, b, &dict).unwrap(); + let dict_arr = rb + .column(0) + .as_any() + .downcast_ref::>() + .unwrap(); + let values = dict_arr + .values() + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(values.len(), 3); + let mut decoded: Vec = (0..dict_arr.len()) + .map(|r| { + let key = dict_arr.keys().value(r); + values.value(key as usize).to_string() + }) + .collect(); + decoded.sort_by_key(|s| match s.as_str() { + "AAPL" => 0, + "GOOG" => 1, + "MSFT" => 2, + _ => 99, + }); + decoded.dedup(); + let names: Vec<&str> = decoded.iter().map(String::as_str).collect(); + assert!(names.contains(&"AAPL")); + assert!(names.contains(&"GOOG")); + assert!(names.contains(&"MSFT")); +} + +#[test] +fn geohash_widens_to_target_arrow_width() { + let raw = vec![0xABu8, 0xCD, 0x12, 0x34]; + let s = schema_of(&[("g", ColumnKind::Geohash)]); + let b = decoded_of( + 4, + vec![DecodedColumn::Geohash { + buffer: buf(raw, None), + byte_width: 1, + precision_bits: 6, + }], + ); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int8); + assert_eq!( + arrow_schema + .field(0) + .metadata() + .get(metadata::GEOHASH_BITS) + .map(String::as_str), + Some("6") + ); + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn array_2d_double_builds_nested_list() { + let mut data = Vec::new(); + for v in [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0] { + data.extend_from_slice(&v.to_le_bytes()); + } + let buffers = ArrayBuffers { + data_offsets: vec![0, 48, 64], + data: Bytes::from(data), + shapes: vec![2, 3, 1, 2], + shape_offsets: vec![0, 2, 4], + validity: None, + }; + let s = schema_of(&[("a", ColumnKind::DoubleArray)]); + let b = decoded_of(2, vec![DecodedColumn::DoubleArray(buffers)]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + let dt = arrow_schema.field(0).data_type(); + match dt { + DataType::List(outer) => match outer.data_type() { + DataType::List(inner) => assert_eq!(inner.data_type(), &DataType::Float64), + other => panic!("expected inner List(Float64), got {:?}", other), + }, + other => panic!("expected nested List, got {:?}", other), + } + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn schemas_equal_ignores_nullability_when_metadata_matches() { + let a = batch_arrow_schema( + &schema_of(&[("v", ColumnKind::Long)]), + &decoded_of(0, vec![DecodedColumn::Long(buf(Vec::new(), None))]), + ) + .unwrap(); + let b = batch_arrow_schema( + &schema_of(&[("v", ColumnKind::Long)]), + &decoded_of(0, vec![DecodedColumn::Long(buf(Vec::new(), None))]), + ) + .unwrap(); + assert!(schemas_equal(&a, &b)); +} + +fn le_bytes_of(values: &[T]) -> Vec +where + T: Copy + AsLeBytes, +{ + let mut out = Vec::with_capacity(std::mem::size_of_val(values)); + for v in values { + out.extend_from_slice(&v.as_le_slice()); + } + out +} + +trait AsLeBytes: Copy { + fn as_le_slice(self) -> Vec; +} + +macro_rules! impl_as_le { + ($t:ty) => { + impl AsLeBytes for $t { + fn as_le_slice(self) -> Vec { + self.to_le_bytes().to_vec() + } + } + }; +} +impl_as_le!(i8); +impl_as_le!(i16); +impl_as_le!(i32); +impl_as_le!(i64); +impl_as_le!(u16); +impl_as_le!(u32); +impl_as_le!(f32); +impl_as_le!(f64); + +#[test] +fn byte_column_passes_through_int8() { + let raw = le_bytes_of(&[1i8, -1, 127, -128]); + let s = schema_of(&[("b", ColumnKind::Byte)]); + let b = decoded_of(4, vec![DecodedColumn::Byte(buf(raw, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int8); + let rb = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); + let col = rb + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(col.values(), &[1i8, -1, 127, -128]); +} + +#[test] +fn short_column_passes_through_int16() { + let raw = le_bytes_of(&[1i16, -1, i16::MAX, i16::MIN]); + let s = schema_of(&[("s", ColumnKind::Short)]); + let b = decoded_of(4, vec![DecodedColumn::Short(buf(raw, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int16); + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn int_column_passes_through_int32() { + let raw = le_bytes_of(&[1i32, -1, i32::MAX]); + let s = schema_of(&[("i", ColumnKind::Int)]); + let b = decoded_of(3, vec![DecodedColumn::Int(buf(raw, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int32); + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn float_column_passes_through_float32() { + let raw = le_bytes_of(&[1.5f32, -2.5, std::f32::consts::PI]); + let s = schema_of(&[("f", ColumnKind::Float)]); + let b = decoded_of(3, vec![DecodedColumn::Float(buf(raw, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::Float32); + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn double_column_passes_through_float64() { + let raw = le_bytes_of(&[1.5f64, -2.5, std::f64::consts::PI]); + let s = schema_of(&[("d", ColumnKind::Double)]); + let b = decoded_of(3, vec![DecodedColumn::Double(buf(raw, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::Float64); + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn date_column_is_timestamp_millis_utc() { + let raw = le_bytes_of(&[1_700_000_000_000i64, 1_700_000_001_000]); + let s = schema_of(&[("d", ColumnKind::Date)]); + let b = decoded_of(2, vec![DecodedColumn::Date(buf(raw, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + match arrow_schema.field(0).data_type() { + DataType::Timestamp(TimeUnit::Millisecond, tz) => { + assert_eq!(tz.as_deref(), Some("UTC")); + } + other => panic!("expected Timestamp(ms, UTC), got {:?}", other), + } + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn timestamp_nanos_is_timestamp_nanosecond_utc() { + let raw = le_bytes_of(&[1_700_000_000_000_000_000i64, 1_700_000_000_000_000_001]); + let s = schema_of(&[("ts", ColumnKind::TimestampNanos)]); + let b = decoded_of(2, vec![DecodedColumn::TimestampNanos(buf(raw, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + match arrow_schema.field(0).data_type() { + DataType::Timestamp(TimeUnit::Nanosecond, tz) => { + assert_eq!(tz.as_deref(), Some("UTC")); + } + other => panic!("expected Timestamp(ns, UTC), got {:?}", other), + } + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn char_column_is_uint16_with_metadata() { + let raw = le_bytes_of(&[0x41u16, 0x42, 0x43]); + let s = schema_of(&[("c", ColumnKind::Char)]); + let b = decoded_of(3, vec![DecodedColumn::Char(buf(raw, None))]); + let arrow_schema = batch_arrow_schema(&s, &b).unwrap(); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::UInt16); + assert_eq!( + arrow_schema + .field(0) + .metadata() + .get(metadata::COLUMN_TYPE) + .map(String::as_str), + Some("char") + ); +} + +#[test] +fn ipv4_column_is_uint32_with_metadata() { + let raw = le_bytes_of(&[0x0100_007Fu32, 0x0101_A8C0]); + let s = schema_of(&[("ip", ColumnKind::Ipv4)]); + let b = decoded_of(2, vec![DecodedColumn::Ipv4(buf(raw, None))]); + let arrow_schema = batch_arrow_schema(&s, &b).unwrap(); + assert_eq!(arrow_schema.field(0).data_type(), &DataType::UInt32); + assert_eq!( + arrow_schema + .field(0) + .metadata() + .get(metadata::COLUMN_TYPE) + .map(String::as_str), + Some("ipv4") + ); +} + +#[test] +fn long256_is_fixed_size_binary_32() { + let raw: Vec = (0..64u8).collect(); + let s = schema_of(&[("l", ColumnKind::Long256)]); + let b = decoded_of(2, vec![DecodedColumn::Long256(buf(raw, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + assert_eq!( + arrow_schema.field(0).data_type(), + &DataType::FixedSizeBinary(32) + ); + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn decimal64_carries_precision_and_scale() { + let raw = le_bytes_of(&[12345i64, 6789]); + let s = schema_of(&[("d", ColumnKind::Decimal64)]); + let b = decoded_of( + 2, + vec![DecodedColumn::Decimal64 { + buffer: buf(raw, None), + scale: 3, + }], + ); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + match arrow_schema.field(0).data_type() { + DataType::Decimal64(precision, scale) => { + assert_eq!(*precision, 18); + assert_eq!(*scale, 3); + } + other => panic!("expected Decimal64(_, _), got {:?}", other), + } + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn decimal128_carries_precision_and_scale() { + let raw = bytes::Bytes::from(vec![0u8; 32]); + let s = schema_of(&[("d", ColumnKind::Decimal128)]); + let b = decoded_of( + 2, + vec![DecodedColumn::Decimal128 { + buffer: ColumnBuffer { + values: raw, + validity: None, + }, + scale: 5, + }], + ); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + match arrow_schema.field(0).data_type() { + DataType::Decimal128(precision, scale) => { + assert_eq!(*precision, 38); + assert_eq!(*scale, 5); + } + other => panic!("expected Decimal128(_, _), got {:?}", other), + } + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn decimal256_carries_precision_and_scale() { + let raw = bytes::Bytes::from(vec![0u8; 64]); + let s = schema_of(&[("d", ColumnKind::Decimal256)]); + let b = decoded_of( + 2, + vec![DecodedColumn::Decimal256 { + buffer: ColumnBuffer { + values: raw, + validity: None, + }, + scale: 7, + }], + ); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + match arrow_schema.field(0).data_type() { + DataType::Decimal256(precision, scale) => { + assert_eq!(*precision, 76); + assert_eq!(*scale, 7); + } + other => panic!("expected Decimal256(_, _), got {:?}", other), + } + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn long_array_builds_nested_list_int64() { + let mut data = Vec::new(); + for v in [10i64, 20, 30, 40, 50, 60] { + data.extend_from_slice(&v.to_le_bytes()); + } + let buffers = crate::egress::decoder::ArrayBuffers { + data_offsets: vec![0, 24, 48], + data: bytes::Bytes::from(data), + shapes: vec![3, 3], + shape_offsets: vec![0, 1, 2], + validity: None, + }; + let s = schema_of(&[("la", ColumnKind::LongArray)]); + let b = decoded_of(2, vec![DecodedColumn::LongArray(buffers)]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + match arrow_schema.field(0).data_type() { + DataType::List(inner) => { + assert_eq!(inner.data_type(), &DataType::Int64); + } + other => panic!("expected List(Int64), got {:?}", other), + } + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn array_1d_double_builds_single_list_level() { + let mut data = Vec::new(); + for v in [1.0f64, 2.0, 3.0, 4.0, 5.0] { + data.extend_from_slice(&v.to_le_bytes()); + } + let buffers = crate::egress::decoder::ArrayBuffers { + data_offsets: vec![0, 16, 40], + data: bytes::Bytes::from(data), + shapes: vec![2, 3], + shape_offsets: vec![0, 1, 2], + validity: None, + }; + let s = schema_of(&[("a", ColumnKind::DoubleArray)]); + let b = decoded_of(2, vec![DecodedColumn::DoubleArray(buffers)]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + match arrow_schema.field(0).data_type() { + DataType::List(inner) => { + assert_eq!(inner.data_type(), &DataType::Float64); + } + other => panic!("expected single List(Float64), got {:?}", other), + } + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn array_3d_double_builds_three_list_levels() { + let mut data = Vec::new(); + for v in [1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0] { + data.extend_from_slice(&v.to_le_bytes()); + } + let buffers = crate::egress::decoder::ArrayBuffers { + data_offsets: vec![0, 64], + data: bytes::Bytes::from(data), + shapes: vec![2, 2, 2], + shape_offsets: vec![0, 3], + validity: None, + }; + let s = schema_of(&[("a", ColumnKind::DoubleArray)]); + let b = decoded_of(1, vec![DecodedColumn::DoubleArray(buffers)]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + fn depth(dt: &DataType) -> usize { + match dt { + DataType::List(inner) => 1 + depth(inner.data_type()), + _ => 0, + } + } + assert_eq!(depth(arrow_schema.field(0).data_type()), 3); + let _ = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); +} + +#[test] +fn array_with_null_row_skips_shape() { + let mut data = Vec::new(); + for v in [1.0f64, 2.0, 3.0] { + data.extend_from_slice(&v.to_le_bytes()); + } + let buffers = crate::egress::decoder::ArrayBuffers { + data_offsets: vec![0, 24, 24], + data: bytes::Bytes::from(data), + shapes: vec![3], + shape_offsets: vec![0, 1, 1], + validity: Some(bytes::Bytes::from(vec![0b0000_0010u8])), + }; + let s = schema_of(&[("a", ColumnKind::DoubleArray)]); + let b = decoded_of(2, vec![DecodedColumn::DoubleArray(buffers)]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + let rb = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); + let col = rb + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert!(col.is_valid(0)); + assert!(col.is_null(1)); +} + +#[test] +fn symbol_with_local_dict_overrides_connection_dict() { + let mut local = SymbolDict::new(); + local + .apply_delta(0, [b"L0".as_slice(), b"L1".as_slice()]) + .unwrap(); + let connection = SymbolDict::new(); + let s = schema_of(&[("sym", ColumnKind::Symbol)]); + let b = decoded_of( + 2, + vec![DecodedColumn::Symbol { + codes: vec![0, 1], + validity: None, + local_dict: Some(local), + }], + ); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + let rb = batch_to_record_batch(arrow_schema, &s, b, &connection).unwrap(); + let dict_arr = rb + .column(0) + .as_any() + .downcast_ref::>() + .unwrap(); + let values = dict_arr + .values() + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(values.len(), 2); +} + +#[test] +fn empty_batch_produces_zero_row_record_batch() { + let s = schema_of(&[("v", ColumnKind::Long)]); + let b = decoded_of(0, vec![DecodedColumn::Long(buf(Vec::new(), None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + let rb = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()).unwrap(); + assert_eq!(rb.num_rows(), 0); + assert_eq!(rb.num_columns(), 1); +} + +#[test] +fn ffi_round_trip_preserves_record_batch() { + let mut data = Vec::new(); + for v in [1i64, 2, 3] { + data.extend_from_slice(&v.to_le_bytes()); + } + let s = schema_of(&[("v", ColumnKind::Long)]); + let batch = decoded_of(3, vec![DecodedColumn::Long(buf(data, None))]); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &batch).unwrap()); + let rb = batch_to_record_batch(arrow_schema.clone(), &s, batch, &SymbolDict::new()).unwrap(); + let struct_array: arrow_array::StructArray = rb.into(); + let data = struct_array.into_data(); + let (ffi_array, ffi_schema) = arrow::ffi::to_ffi(&data).unwrap(); + let imported = unsafe { arrow::ffi::from_ffi(ffi_array, &ffi_schema) }.unwrap(); + let restored: arrow_array::StructArray = imported.into(); + assert_eq!(restored.len(), 3); + assert_eq!(restored.num_columns(), 1); +} + +#[test] +fn schemas_equal_detects_dtype_drift() { + let a = batch_arrow_schema( + &schema_of(&[("v", ColumnKind::Long)]), + &decoded_of(0, vec![DecodedColumn::Long(buf(Vec::new(), None))]), + ) + .unwrap(); + let b = batch_arrow_schema( + &schema_of(&[("v", ColumnKind::Int)]), + &decoded_of(0, vec![DecodedColumn::Int(buf(Vec::new(), None))]), + ) + .unwrap(); + assert!(!schemas_equal(&a, &b)); +} + +#[test] +fn empty_array_batch_emits_tentative_ndim_marker() { + let buffers = crate::egress::decoder::ArrayBuffers { + data_offsets: vec![], + data: bytes::Bytes::new(), + shapes: vec![], + shape_offsets: vec![], + validity: None, + }; + let s = schema_of(&[("a", ColumnKind::DoubleArray)]); + let b = decoded_of(0, vec![DecodedColumn::DoubleArray(buffers)]); + let arrow_schema = batch_arrow_schema(&s, &b).unwrap(); + let md = arrow_schema.field(0).metadata(); + assert_eq!( + md.get(crate::egress::arrow::metadata::ARRAY_DIM_TENTATIVE) + .map(String::as_str), + Some("true") + ); +} + +#[test] +fn firm_array_batch_has_no_tentative_marker() { + let mut data = Vec::new(); + for v in [1.0f64, 2.0, 3.0] { + data.extend_from_slice(&v.to_le_bytes()); + } + let buffers = crate::egress::decoder::ArrayBuffers { + data_offsets: vec![0, 24], + data: bytes::Bytes::from(data), + shapes: vec![3], + shape_offsets: vec![0, 1], + validity: None, + }; + let s = schema_of(&[("a", ColumnKind::DoubleArray)]); + let b = decoded_of(1, vec![DecodedColumn::DoubleArray(buffers)]); + let arrow_schema = batch_arrow_schema(&s, &b).unwrap(); + let md = arrow_schema.field(0).metadata(); + assert!( + md.get(crate::egress::arrow::metadata::ARRAY_DIM_TENTATIVE) + .is_none() + ); +} + +#[test] +fn schemas_equal_accepts_tentative_to_firm_array_upgrade() { + let empty_buffers = crate::egress::decoder::ArrayBuffers { + data_offsets: vec![], + data: bytes::Bytes::new(), + shapes: vec![], + shape_offsets: vec![], + validity: None, + }; + let tentative = batch_arrow_schema( + &schema_of(&[("a", ColumnKind::DoubleArray)]), + &decoded_of(0, vec![DecodedColumn::DoubleArray(empty_buffers)]), + ) + .unwrap(); + + let mut data = Vec::new(); + for v in [1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0] { + data.extend_from_slice(&v.to_le_bytes()); + } + let firm_buffers = crate::egress::decoder::ArrayBuffers { + data_offsets: vec![0, 64], + data: bytes::Bytes::from(data), + shapes: vec![2, 2, 2], + shape_offsets: vec![0, 3], + validity: None, + }; + let firm = batch_arrow_schema( + &schema_of(&[("a", ColumnKind::DoubleArray)]), + &decoded_of(1, vec![DecodedColumn::DoubleArray(firm_buffers)]), + ) + .unwrap(); + + assert!(schemas_equal(&tentative, &firm)); + assert!(schemas_equal(&firm, &tentative)); +} + +#[test] +fn schemas_equal_detects_array_dim_drift_when_both_firm() { + let mut data1 = Vec::new(); + for v in [1.0f64, 2.0, 3.0] { + data1.extend_from_slice(&v.to_le_bytes()); + } + let b1 = crate::egress::decoder::ArrayBuffers { + data_offsets: vec![0, 24], + data: bytes::Bytes::from(data1), + shapes: vec![3], + shape_offsets: vec![0, 1], + validity: None, + }; + let s1 = batch_arrow_schema( + &schema_of(&[("a", ColumnKind::DoubleArray)]), + &decoded_of(1, vec![DecodedColumn::DoubleArray(b1)]), + ) + .unwrap(); + let mut data2 = Vec::new(); + for v in [1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0] { + data2.extend_from_slice(&v.to_le_bytes()); + } + let b2 = crate::egress::decoder::ArrayBuffers { + data_offsets: vec![0, 64], + data: bytes::Bytes::from(data2), + shapes: vec![2, 2, 2], + shape_offsets: vec![0, 3], + validity: None, + }; + let s2 = batch_arrow_schema( + &schema_of(&[("a", ColumnKind::DoubleArray)]), + &decoded_of(1, vec![DecodedColumn::DoubleArray(b2)]), + ) + .unwrap(); + assert!(!schemas_equal(&s1, &s2)); +} + +// Force `ArrayDataBuilder::build()` to reject a malformed Decimal64 +// payload (10 rows promised, only 8 bytes supplied — one row's worth) +// and verify the failure surfaces as `ErrorCode::ArrowExport` through +// `batch_to_record_batch`. Regression guard against the export wrap +// being dropped on a future refactor: without it, the underlying +// arrow-rs error would propagate as a different code (or panic under +// `panic = "abort"`). +#[test] +fn arrow_export_surfaces_on_malformed_decimal64() { + use crate::egress::error::ErrorCode; + let values = vec![0u8; 8]; + let s = schema_of(&[("d", ColumnKind::Decimal64)]); + let b = decoded_of( + 10, + vec![DecodedColumn::Decimal64 { + buffer: buf(values, None), + scale: 2, + }], + ); + let arrow_schema = Arc::new(batch_arrow_schema(&s, &b).unwrap()); + let err = batch_to_record_batch(arrow_schema, &s, b, &SymbolDict::new()) + .expect_err("malformed Decimal64 must error, not panic"); + assert_eq!(err.code(), ErrorCode::ArrowExport); +} diff --git a/questdb-rs/src/egress/decoder.rs b/questdb-rs/src/egress/decoder.rs index c3463d65..5b3f3330 100644 --- a/questdb-rs/src/egress/decoder.rs +++ b/questdb-rs/src/egress/decoder.rs @@ -795,6 +795,21 @@ fn decode_decimal_wide( crate::egress::binds::MAX_DECIMAL_SCALE )); } + let per_width_max: i8 = match width { + 8 => 18, + 16 => 38, + 32 => crate::egress::binds::MAX_DECIMAL_SCALE, + _ => crate::egress::binds::MAX_DECIMAL_SCALE, + }; + if scale > per_width_max { + return Err(fmt!( + ProtocolError, + "DECIMAL{} scale {} exceeds per-width maximum {}", + width * 8, + scale, + per_width_max + )); + } // DECIMAL64 NULL is `Long.MIN_VALUE` (spec §11.5). DECIMAL128 NULL is // both halves `Long.MIN_VALUE` (server: `lo == LONG_NULL && hi == // LONG_NULL`); DECIMAL256 NULL is four halves `Long.MIN_VALUE` diff --git a/questdb-rs/src/egress/error.rs b/questdb-rs/src/egress/error.rs index f63c2144..2253b4c8 100644 --- a/questdb-rs/src/egress/error.rs +++ b/questdb-rs/src/egress/error.rs @@ -121,6 +121,31 @@ pub enum ErrorCode { /// Surfaced only mid-query — initial connect failover (before any /// batch is yielded) does not raise this and behaves transparently. FailoverWouldDuplicate, + + /// Streaming Arrow adapter saw a mid-stream schema change: a later + /// `RESULT_BATCH` decoded into an Arrow schema that differs from + /// the snapshot captured at adapter construction. The adapter is + /// poisoned; the underlying [`crate::egress::Cursor`] remains + /// usable and the caller may re-wrap it with a fresh + /// `as_arrow_reader()` call to snapshot the new schema. + /// + /// Only emitted on the `arrow` feature. + SchemaDrift, + + /// `Cursor::as_arrow_reader()` was called on a stream that + /// terminated before any `RESULT_BATCH` was decoded — there is no + /// schema to snapshot. Recoverable: the caller can either treat + /// this as a "no rows" result, or re-execute the query. + /// + /// Only emitted on the `arrow` feature. + NoSchema, + + /// Arrow C Data Interface export failed (e.g. arrow-rs rejected an + /// internal invariant on the produced `ArrayData`). Indicates a + /// crate bug; not user-recoverable. + /// + /// Only emitted on the `arrow` feature. + ArrowExport, } /// Upgrade-time topology rejection carried alongside an `Error`. diff --git a/questdb-rs/src/egress/mod.rs b/questdb-rs/src/egress/mod.rs index 353b1b0b..a0e3a789 100644 --- a/questdb-rs/src/egress/mod.rs +++ b/questdb-rs/src/egress/mod.rs @@ -44,6 +44,8 @@ // are surfaced via the top-level `pub use` block below; everything // else stays internal and is free to evolve without a breaking // change. +#[cfg(feature = "arrow")] +pub mod arrow; pub(crate) mod auth; pub(crate) mod binds; pub mod column; diff --git a/questdb-rs/src/egress/reader.rs b/questdb-rs/src/egress/reader.rs index 219ba761..91b62a18 100644 --- a/questdb-rs/src/egress/reader.rs +++ b/questdb-rs/src/egress/reader.rs @@ -190,6 +190,25 @@ const _: fn() = || { assert_send_sync::(); }; +// Two blanket impls of the same trait force method-resolution ambiguity +// iff the target type IS `Send`; the call thus compiles only when the +// type is `!Send`. +const _: fn() = || { + trait AmbiguousIfSend { + fn _disambiguate() {} + } + impl AmbiguousIfSend<()> for T {} + impl AmbiguousIfSend for T {} + fn assert_not_send() { + let _: fn() = >::_disambiguate; + } + assert_not_send::>(); + #[cfg(feature = "arrow")] + assert_not_send::>(); + #[cfg(feature = "polars")] + assert_not_send::>(); +}; + impl Reader { /// Open a new connection from a connect string. pub fn from_conf>(conf: T) -> Result { @@ -1445,6 +1464,156 @@ impl<'r> Cursor<'r> { } } + /// Wrap this cursor as an Arrow [`RecordBatchReader`]. Blocks until + /// the first `RESULT_BATCH` is decoded, then snapshots its schema. + /// Mid-stream schema drift poisons the adapter; re-wrap to resume. + /// Returns [`ErrorCode::NoSchema`] if the stream terminates before + /// any batch is produced. + /// + /// [`RecordBatchReader`]: arrow_array::RecordBatchReader + /// [`ErrorCode::NoSchema`]: crate::egress::ErrorCode::NoSchema + #[cfg(feature = "arrow")] + pub fn as_arrow_reader<'c>( + &'c mut self, + ) -> Result> { + crate::egress::arrow::CursorRecordBatchReader::new(self) + } + + /// Eagerly drain every batch and return them together with the + /// pinned Arrow schema. Symmetric with + /// [`Cursor::fetch_all_polars`](crate::egress::Cursor::fetch_all_polars). + /// Errors as [`ErrorCode::NoSchema`] if the stream ends without + /// producing a batch; surfaces drift as + /// [`ErrorCode::SchemaDrift`]. + /// + /// [`ErrorCode::NoSchema`]: crate::egress::ErrorCode::NoSchema + /// [`ErrorCode::SchemaDrift`]: crate::egress::ErrorCode::SchemaDrift + #[cfg(feature = "arrow")] + pub fn fetch_all_arrow( + &mut self, + ) -> Result<(arrow_schema::SchemaRef, Vec)> { + let mut reader = self.as_arrow_reader()?; + let mut batches: Vec = Vec::new(); + for item in reader.by_ref() { + batches.push(item.map_err(|e| { + crate::egress::arrow::try_downcast_questdb(&e) + .cloned() + .unwrap_or_else(|| fmt!(ArrowExport, "{}", e)) + })?); + } + Ok((reader.schema(), batches)) + } + + /// Drift-checked iterator over Polars [`DataFrame`](polars::frame::DataFrame)s, + /// one per QWP batch. Snapshots the first batch's Arrow schema + /// and yields `Err(SchemaDrift)` then terminates if a + /// later batch diverges. Returns `Err(NoSchema)` if the stream + /// ends before any batch is produced. + /// + /// Use this in preference to a `while let Some(df) = cursor.next_polars()?` + /// loop when you care about schema consistency mid-stream. + #[cfg(feature = "polars")] + pub fn iter_polars<'c>(&'c mut self) -> Result> { + crate::egress::arrow::CursorPolarsIter::new(self) + } + + /// Next batch as an Arrow [`RecordBatch`](arrow_array::RecordBatch). + /// `Ok(None)` on stream end; replays terminal errors like + /// [`Cursor::next_batch`]. No drift check — use + /// [`Cursor::as_arrow_reader`] for that. + #[cfg(feature = "arrow")] + pub fn next_arrow_batch(&mut self) -> Result> { + self.next_arrow_batch_inner(None) + } + + #[cfg(feature = "arrow")] + #[doc(hidden)] + pub fn next_arrow_batch_inner( + &mut self, + expected_schema: Option<&arrow_schema::SchemaRef>, + ) -> Result> { + use crate::egress::arrow::{batch_arrow_schema, batch_to_record_batch, schemas_equal}; + use std::sync::Arc; + + if self.done { + return match self.terminal_error.as_ref() { + Some(e) => Err(e.clone()), + None => Ok(None), + }; + } + let outcome = match self.next_batch_inner() { + Ok(o) => o, + Err(e) => { + if self.done && self.terminal_error.is_none() { + self.terminal_error = Some(e.clone()); + } + return Err(e); + } + }; + match outcome { + NextOutcome::Done => Ok(None), + NextOutcome::HaveBatch => { + let decoded = self + .last_batch + .take() + .expect("HaveBatch implies last_batch"); + let egress_schema = match self.reader.registry.get(decoded.schema_id) { + Some(s) => s.clone(), + None => { + let e = fmt!( + ProtocolError, + "schema id {} missing from registry", + decoded.schema_id + ); + self.stash_arrow_terminal_error(&e); + return Err(e); + } + }; + let arrow_schema = match batch_arrow_schema(&egress_schema, &decoded) { + Ok(s) => Arc::new(s), + Err(e) => { + self.stash_arrow_terminal_error(&e); + return Err(e); + } + }; + if let Some(expected) = expected_schema + && !schemas_equal(expected.as_ref(), arrow_schema.as_ref()) + { + let e = fmt!( + SchemaDrift, + "mid-stream Arrow schema drift: expected schema differs from batch_seq={}", + decoded.batch_seq + ); + // Discard the drift batch but keep the cursor live — + // the caller may re-pin and resume from the next batch. + return Err(e); + } + match batch_to_record_batch( + arrow_schema, + &egress_schema, + decoded, + &self.reader.dict, + ) { + Ok(rb) => Ok(Some(rb)), + Err(e) => { + self.stash_arrow_terminal_error(&e); + Err(e) + } + } + } + } + } + + // Replay-contract stash for errors that bypass `next_batch_inner` + // (schema drift, batch_to_record_batch). Cursor stays live. + #[cfg(feature = "arrow")] + fn stash_arrow_terminal_error(&mut self, err: &Error) { + self.done = true; + if self.terminal_error.is_none() { + self.terminal_error = Some(err.clone()); + } + } + fn next_batch_inner(&mut self) -> Result { loop { // Transport read: a failure here (socket closed, TLS diff --git a/questdb-rs/src/error.rs b/questdb-rs/src/error.rs index 4d40655c..918c9674 100644 --- a/questdb-rs/src/error.rs +++ b/questdb-rs/src/error.rs @@ -84,6 +84,18 @@ pub enum ErrorCode { /// QWP/WebSocket server rejection or terminal protocol violation. ServerRejection, + + /// `Buffer::append_arrow` was passed a column whose Arrow / QuestDB + /// kind cannot be persisted to a QuestDB table (e.g. `ARRAY(LONG, N-D)` + /// is query-result-only on the egress side and has no QWP wire tag for + /// ingress). Only emitted on the `arrow` feature. + ArrowUnsupportedColumnKind, + + /// `Buffer::append_arrow` was passed a `RecordBatch` that failed + /// client-side structural validation (column count vs schema, name + /// encoding, ARROW C Data Interface invariants on a freshly imported + /// array, etc.). Only emitted on the `arrow` feature. + ArrowIngest, } /// An error that occurred when using QuestDB client library. diff --git a/questdb-rs/src/ingress.rs b/questdb-rs/src/ingress.rs index b1569abf..9ff76a76 100644 --- a/questdb-rs/src/ingress.rs +++ b/questdb-rs/src/ingress.rs @@ -68,6 +68,11 @@ pub use sender::*; mod decimal; pub use decimal::DecimalView; +#[cfg(feature = "arrow")] +pub mod arrow; +#[cfg(feature = "polars")] +pub mod polars; + const MAX_NAME_LEN_DEFAULT: usize = 127; /// The maximum allowed dimensions for arrays. diff --git a/questdb-rs/src/ingress/arrow.rs b/questdb-rs/src/ingress/arrow.rs new file mode 100644 index 00000000..7bd7cbdb --- /dev/null +++ b/questdb-rs/src/ingress/arrow.rs @@ -0,0 +1,4513 @@ +/******************************************************************************* + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2025 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +//! `RecordBatch → Buffer` ingress. Walks the batch row-major; column +//! type-hint resolution follows Decision 14 of the design doc +//! (`questdb.column_type` > `ARROW:extension:name` > Arrow type alone). + +use arrow_array::types::{UInt8Type, UInt16Type, UInt32Type}; +use arrow_array::{ + Array, ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array, + Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, DictionaryArray, + DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray, + DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array, Float32Array, + Float64Array, Int8Array, Int16Array, Int32Array, Int64Array, LargeBinaryArray, LargeListArray, + LargeStringArray, ListArray, RecordBatch, StringArray, StringViewArray, Time32MillisecondArray, + Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray, + TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt8Array, + UInt16Array, UInt32Array, UInt64Array, +}; +use arrow_schema::{DataType, TimeUnit}; + +use crate::error::{Error, ErrorCode}; +use crate::ingress::buffer::{ + ArrowBatchInfo, ArrowBulkCtx, ArrowDecimalSpec, QWP_DECIMAL_MAX_SCALE, QwpColumnKind, + QwpWsColumnarBuffer, +}; +use crate::ingress::{Buffer, ColumnName, TableName}; +use crate::{Result, fmt}; + +impl Buffer { + /// Append every row of `batch` to this buffer. Per-row designated + /// timestamp is omitted from the wire payload; the server stamps + /// each row on arrival (matches [`Buffer::at_now`](Buffer::at_now) + /// per-row semantics). + /// + /// Requires a QWP/WS buffer. On error, the buffer is rolled back + /// atomically to its pre-call state — no partial batch is committed. + /// + /// Use [`Buffer::append_arrow_at_column`] to source the timestamp + /// from a batch column. + /// + /// # Null encoding (data loss) + /// + /// QuestDB's `BOOLEAN`, `BYTE` and `SHORT` wire kinds have no null + /// representation. Nulls in an Arrow `Boolean` / `Int8` / `Int16` + /// column are silently coerced to the zero value (`false`, `0`, + /// `0`) when appended. Use the wider integer types if null + /// fidelity matters (Arrow `Int32`/`Int64` carry sentinels; + /// Arrow `UInt8` widens to QuestDB `INT` and preserves nulls via + /// the `i32::MIN` sentinel). + /// + /// # Schema rigidity across batches + /// + /// Multiple `append_arrow` calls against the same table-in-buffer + /// must supply the same set of columns. A batch that omits a + /// previously-seen column is rejected with [`ErrorCode::InvalidApiCall`] + /// at commit time. Project / re-order client-side if the producer + /// sends a different shape per batch. + /// + /// # Errors + /// + /// * [`ErrorCode::ArrowUnsupportedColumnKind`] — column's Arrow + /// type has no QWP wire mapping. + /// * [`ErrorCode::ArrowIngest`] — structural validation failed. + /// * [`ErrorCode::InvalidApiCall`] — non-QWP/WS buffer, row-by-row + /// row already in progress on the same table, or a previously- + /// seen column was omitted from the batch. + pub fn append_arrow(&mut self, table: TableName<'_>, batch: &RecordBatch) -> Result<()> { + self.append_arrow_inner(table, batch, None) + } + + /// Append every row of `batch`, sourcing the per-row designated + /// timestamp from `ts_column`. The column must be a + /// `Timestamp(Microsecond | Nanosecond | Millisecond, _)` with no + /// null rows; `Millisecond` is widened to µs on the wire. + /// + /// # Errors + /// + /// In addition to the errors from [`Buffer::append_arrow`]: + /// + /// * [`ErrorCode::ArrowIngest`] — `ts_column` is missing, not a + /// `Timestamp(_)` Arrow type, or has null rows. + pub fn append_arrow_at_column( + &mut self, + table: TableName<'_>, + batch: &RecordBatch, + ts_column: ColumnName<'_>, + ) -> Result<()> { + self.append_arrow_inner(table, batch, Some(ts_column)) + } + + fn append_arrow_inner( + &mut self, + table: TableName<'_>, + batch: &RecordBatch, + ts_column: Option>, + ) -> Result<()> { + let schema = batch.schema(); + let row_count = batch.num_rows(); + let col_count = batch.num_columns(); + if schema.fields().len() != col_count { + return Err(fmt!( + ArrowIngest, + "RecordBatch schema/columns mismatch: schema={} columns={}", + schema.fields().len(), + col_count + )); + } + if row_count == 0 { + return Ok(()); + } + if row_count > MAX_ARROW_INGEST_ROWS { + return Err(fmt!( + ArrowIngest, + "row count {} exceeds maximum {} for a single append_arrow call", + row_count, + MAX_ARROW_INGEST_ROWS + )); + } + check_batch_data_bounds(batch)?; + let ts_col_idx = match ts_column { + Some(name) => Some(resolve_ts_column(batch, name)?), + None => None, + }; + let user_col_count = col_count - if ts_col_idx.is_some() { 1 } else { 0 }; + if user_col_count == 0 { + return Err(fmt!( + ArrowIngest, + "RecordBatch must have at least one non-timestamp column when row_count > 0" + )); + } + let effective_rows = u32::try_from(row_count) + .map_err(|_| fmt!(ArrowIngest, "row count {} exceeds u32::MAX", row_count))?; + let qwp_ws = self.as_qwp_ws_mut().ok_or_else(|| { + Error::new( + ErrorCode::InvalidApiCall, + "Buffer::append_arrow requires a QWP/WebSocket buffer (Buffer::new_qwp_ws)" + .to_string(), + ) + })?; + let ctx = qwp_ws.arrow_bulk_begin(table)?; + let mut guard = BulkGuard { + qwp_ws, + ctx: Some(ctx), + }; + let inner_result = emit_arrow_batch( + guard.qwp_ws, + guard.ctx.as_ref().expect("ctx is Some until committed"), + batch, + &schema, + ts_col_idx, + ); + match inner_result { + Ok(()) => { + let ctx = guard.ctx.as_ref().expect("ctx is Some until committed"); + match guard.qwp_ws.arrow_bulk_commit(ctx, effective_rows) { + Ok(()) => { + let ctx = guard.ctx.take().expect("ctx is Some until committed"); + guard.qwp_ws.arrow_bulk_finish(ctx); + Ok(()) + } + Err(e) => Err(e), + } + } + Err(e) => Err(e), + } + } +} + +struct BulkGuard<'a> { + qwp_ws: &'a mut QwpWsColumnarBuffer, + ctx: Option, +} + +impl Drop for BulkGuard<'_> { + fn drop(&mut self) { + if let Some(ctx) = self.ctx.take() { + self.qwp_ws.arrow_bulk_rollback(ctx); + } + } +} + +#[inline] +fn emit_arrow_batch( + qwp_ws: &mut QwpWsColumnarBuffer, + ctx: &ArrowBulkCtx, + batch: &RecordBatch, + schema: &arrow_schema::SchemaRef, + ts_col_idx: Option, +) -> Result<()> { + for (idx, field) in schema.fields().iter().enumerate() { + if Some(idx) == ts_col_idx { + continue; + } + let col_name = + ColumnName::new(field.name()).map_err(|e| decorate_column(e, field.name()))?; + let kind = classify(field.as_ref(), batch.column(idx).as_ref()) + .map_err(|e| decorate_column(e, field.name()))?; + emit_arrow_column(qwp_ws, ctx, col_name, kind, batch.column(idx).as_ref()) + .map_err(|e| decorate_column(e, field.name()))?; + } + if let Some(idx) = ts_col_idx { + let arr = batch.column(idx); + let field_name = schema.field(idx).name(); + emit_arrow_designated_ts(qwp_ws, ctx, schema.field(idx).data_type(), arr.as_ref()) + .map_err(|e| decorate_column(e, field_name))?; + } + Ok(()) +} + +// `starts_with` (not `contains`) so a user column name containing the +// substring cannot bypass the double-wrap guard. +const COLUMN_ERR_PREFIX: &str = "[column='"; + +fn decorate_column(err: Error, column_name: &str) -> Error { + if err.msg().starts_with(COLUMN_ERR_PREFIX) { + return err; + } + Error::new( + err.code(), + format!("{}{}'] {}", COLUMN_ERR_PREFIX, column_name, err.msg()), + ) +} + +fn resolve_ts_column(batch: &RecordBatch, name: ColumnName<'_>) -> Result { + let target = name.as_ref(); + for (idx, field) in batch.schema().fields().iter().enumerate() { + if field.name() == target { + if !matches!(field.data_type(), DataType::Timestamp(_, _)) { + return Err(fmt!( + ArrowIngest, + "designated timestamp column '{}' is not Timestamp(_), got {:?}", + target, + field.data_type() + )); + } + return Ok(idx); + } + } + Err(fmt!( + ArrowIngest, + "designated timestamp column '{}' not found in RecordBatch schema", + target + )) +} + +fn emit_arrow_designated_ts( + qwp_ws: &mut QwpWsColumnarBuffer, + ctx: &ArrowBulkCtx, + dtype: &DataType, + arr: &dyn Array, +) -> Result<()> { + if arr.null_count() > 0 { + return Err(fmt!( + ArrowIngest, + "designated timestamp column must have no null rows" + )); + } + let rows = arr.len() as u32; + let info = ArrowBatchInfo { + bitmap: None, + rows, + non_null: rows, + }; + let le = cfg!(target_endian = "little"); + match dtype { + DataType::Timestamp(TimeUnit::Microsecond, _) => { + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + qwp_ws.arrow_bulk_set_designated_ts(ctx, QwpColumnKind::TimestampMicros, info, |out| { + if le { + // SAFETY: i64 has no padding; LE target → wire-format bytes. + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + non_null_le_into(out, arr, |row| a.value(row).to_le_bytes())?; + } + Ok(()) + }) + } + DataType::Timestamp(TimeUnit::Nanosecond, _) => { + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + qwp_ws.arrow_bulk_set_designated_ts(ctx, QwpColumnKind::TimestampNanos, info, |out| { + if le { + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + non_null_le_into(out, arr, |row| a.value(row).to_le_bytes())?; + } + Ok(()) + }) + } + DataType::Timestamp(TimeUnit::Millisecond, _) => { + // QWP designated TS supports µs/ns only; widen ms → µs. + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + qwp_ws.arrow_bulk_set_designated_ts(ctx, QwpColumnKind::TimestampMicros, info, |out| { + try_non_null_le_into(out, arr, |row| { + let v = a.value(row); + v.checked_mul(1_000).map(i64::to_le_bytes).ok_or_else(|| { + fmt!( + ArrowIngest, + "designated timestamp ms→µs overflow at row {} (value {})", + row, + v + ) + }) + }) + }) + } + other => Err(fmt!( + ArrowIngest, + "designated timestamp column has unsupported Arrow type {:?}", + other + )), + } +} + +fn try_reserve_bytes(out: &mut Vec, additional: usize, label: &str) -> Result<()> { + out.try_reserve(additional).map_err(|_| { + fmt!( + ArrowIngest, + "{}: allocator could not reserve {} bytes", + label, + additional + ) + }) +} + +fn try_reserve_typed(v: &mut Vec, additional: usize, label: &str) -> Result<()> { + v.try_reserve(additional).map_err(|_| { + fmt!( + ArrowIngest, + "{}: allocator could not reserve {} elements", + label, + additional + ) + }) +} + +/// LE primitive fast-path: `try_reserve` then `extend_from_slice` of a +/// host-LE-equal slice. Funnels every LE no-null path through one +/// allocator-aware helper so OOM surfaces as `ArrowIngest` rather than +/// aborting under `panic = "abort"`. +/// +/// SAFETY: `bytes` must be a host-LE re-interpretation of `T`'s value +/// representation. Caller is responsible for that invariant — every +/// in-tree caller pipes `typed_slice_as_le_bytes` which encodes it +/// statically. +fn extend_le_bytes_checked(out: &mut Vec, bytes: &[u8]) -> Result<()> { + try_reserve_bytes(out, bytes.len(), "primitive LE fast-path")?; + out.extend_from_slice(bytes); + Ok(()) +} + +fn full_with_sentinel_into( + out: &mut Vec, + arr: &dyn Array, + sentinel: [u8; N], + mut get_bytes: impl FnMut(usize) -> [u8; N], +) -> Result<()> { + let row_count = arr.len(); + let bytes = row_count.checked_mul(N).ok_or_else(|| { + fmt!( + ArrowIngest, + "full_with_sentinel: row_count {} * elem {} overflows usize", + row_count, + N + ) + })?; + try_reserve_bytes(out, bytes, "primitive column")?; + for row in 0..row_count { + if arr.is_null(row) { + out.extend_from_slice(&sentinel); + } else { + out.extend_from_slice(&get_bytes(row)); + } + } + Ok(()) +} + +fn try_full_with_sentinel_into( + out: &mut Vec, + arr: &dyn Array, + sentinel: [u8; N], + mut get_bytes: impl FnMut(usize) -> Result<[u8; N]>, +) -> Result<()> { + let row_count = arr.len(); + let bytes = row_count.checked_mul(N).ok_or_else(|| { + fmt!( + ArrowIngest, + "try_full_with_sentinel: row_count {} * elem {} overflows usize", + row_count, + N + ) + })?; + try_reserve_bytes(out, bytes, "primitive column")?; + for row in 0..row_count { + if arr.is_null(row) { + out.extend_from_slice(&sentinel); + } else { + let bytes = get_bytes(row)?; + out.extend_from_slice(&bytes); + } + } + Ok(()) +} + +// Returns `len - null_count`, surfacing the inconsistency from +// `arrow::ffi::from_ffi` (which uses `new_unchecked` and does not enforce +// `null_count ≤ len`) as a structured error rather than letting the +// subtraction wrap to near-usize::MAX and trigger an allocator abort. +fn non_null_count(arr: &dyn Array, label: &str) -> Result { + let row_count = arr.len(); + let null_count = arr.null_count(); + if null_count > row_count { + return Err(fmt!( + ArrowIngest, + "{}: null_count {} exceeds len {}; inconsistent Arrow buffer", + label, + null_count, + row_count + )); + } + Ok(row_count - null_count) +} + +fn non_null_le_into( + out: &mut Vec, + arr: &dyn Array, + mut get_bytes: impl FnMut(usize) -> [u8; N], +) -> Result<()> { + let non_null = non_null_count(arr, "primitive column")?; + let row_count = arr.len(); + let bytes = non_null.checked_mul(N).ok_or_else(|| { + fmt!( + ArrowIngest, + "primitive column: non_null {} * elem {} overflows usize", + non_null, + N + ) + })?; + try_reserve_bytes(out, bytes, "primitive column")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + out.extend_from_slice(&get_bytes(row)); + } + Ok(()) +} + +fn try_non_null_le_into( + out: &mut Vec, + arr: &dyn Array, + mut get_bytes: impl FnMut(usize) -> Result<[u8; N]>, +) -> Result<()> { + let non_null = non_null_count(arr, "primitive column")?; + let row_count = arr.len(); + let bytes = non_null.checked_mul(N).ok_or_else(|| { + fmt!( + ArrowIngest, + "primitive column: non_null {} * elem {} overflows usize", + non_null, + N + ) + })?; + try_reserve_bytes(out, bytes, "primitive column")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + let bytes = get_bytes(row)?; + out.extend_from_slice(&bytes); + } + Ok(()) +} + +fn non_null_fsb_into(out: &mut Vec, arr: &FixedSizeBinaryArray, size: usize) -> Result<()> { + let non_null = non_null_count(arr, "FixedSizeBinary column")?; + let row_count = arr.len(); + let bytes = non_null.checked_mul(size).ok_or_else(|| { + fmt!( + ArrowIngest, + "FixedSizeBinary column: non_null {} * elem {} overflows usize", + non_null, + size + ) + })?; + try_reserve_bytes(out, bytes, "FixedSizeBinary column")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + out.extend_from_slice(arr.value(row)); + } + Ok(()) +} + +#[inline] +unsafe fn typed_slice_as_le_bytes(slice: &[T]) -> &[u8] { + unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, std::mem::size_of_val(slice)) } +} + +fn emit_arrow_column( + qwp_ws: &mut QwpWsColumnarBuffer, + ctx: &ArrowBulkCtx, + col_name: ColumnName<'_>, + kind: ColumnKind, + arr: &dyn Array, +) -> Result<()> { + let non_null_usize = non_null_count(arr, "column")?; + let rows = u32::try_from(arr.len()) + .map_err(|_| fmt!(ArrowIngest, "row count {} exceeds u32::MAX", arr.len()))?; + let non_null = u32::try_from(non_null_usize).map_err(|_| { + fmt!( + ArrowIngest, + "non-null count {} exceeds u32::MAX", + non_null_usize + ) + })?; + let null_count = arr.len() - non_null_usize; + let validity = if null_count > 0 { arr.nulls() } else { None }; + let info_full = ArrowBatchInfo { + bitmap: None, + rows, + non_null, + }; + let info_sparse = ArrowBatchInfo { + bitmap: validity, + rows, + non_null, + }; + let le_no_nulls = cfg!(target_endian = "little") && null_count == 0; + match kind { + ColumnKind::Bool => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_bool(ctx, col_name, info_full, |packed, existing_rows| { + pack_bool_bits_into(packed, existing_rows, a) + }) + } + ColumnKind::I8 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::I8, info_full, |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + full_with_sentinel_into(out, arr, [0u8; 1], |row| [a.value(row) as u8])?; + } + Ok(()) + }) + } + ColumnKind::I16 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::I16, info_full, |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + full_with_sentinel_into(out, arr, 0i16.to_le_bytes(), |row| { + a.value(row).to_le_bytes() + })?; + } + Ok(()) + }) + } + ColumnKind::I32 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::I32, info_full, |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + full_with_sentinel_into(out, arr, i32::MIN.to_le_bytes(), |row| { + a.value(row).to_le_bytes() + })?; + } + Ok(()) + }) + } + ColumnKind::I64 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::I64, info_full, |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + full_with_sentinel_into(out, arr, i64::MIN.to_le_bytes(), |row| { + a.value(row).to_le_bytes() + })?; + } + Ok(()) + }) + } + ColumnKind::F16ToF32 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::F32, info_full, |out| { + if null_count == 0 { + let bytes = + a.values().len().checked_mul(4).ok_or_else(|| { + fmt!(ArrowIngest, "Float16 dense extend size overflow") + })?; + try_reserve_bytes(out, bytes, "Float16 column")?; + for &h in a.values() { + out.extend_from_slice(&h.to_f32().to_le_bytes()); + } + } else { + full_with_sentinel_into(out, arr, f32::NAN.to_le_bytes(), |row| { + a.value(row).to_f32().to_le_bytes() + })?; + } + Ok(()) + }) + } + ColumnKind::F32 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::F32, info_full, |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + full_with_sentinel_into(out, arr, f32::NAN.to_le_bytes(), |row| { + a.value(row).to_le_bytes() + })?; + } + Ok(()) + }) + } + ColumnKind::F64 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::F64, info_full, |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + full_with_sentinel_into(out, arr, f64::NAN.to_le_bytes(), |row| { + a.value(row).to_le_bytes() + })?; + } + Ok(()) + }) + } + ColumnKind::Char => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::Char, info_full, |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + full_with_sentinel_into(out, arr, 0u16.to_le_bytes(), |row| { + a.value(row).to_le_bytes() + })?; + } + Ok(()) + }) + } + ColumnKind::Ipv4 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::Ipv4, info_sparse, |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + non_null_le_into(out, arr, |row| a.value(row).to_le_bytes())?; + } + Ok(()) + }) + } + ColumnKind::U8WidenToI32 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::I32, info_full, |out| { + if null_count == 0 { + try_reserve_bytes( + out, + a.values() + .len() + .checked_mul(4) + .ok_or_else(|| fmt!(ArrowIngest, "U8 widen reservation overflow"))?, + "U8 widen column", + )?; + for &v in a.values() { + out.extend_from_slice(&(v as i32).to_le_bytes()); + } + } else { + full_with_sentinel_into(out, arr, i32::MIN.to_le_bytes(), |row| { + (a.value(row) as i32).to_le_bytes() + })?; + } + Ok(()) + }) + } + ColumnKind::U16WidenToI32 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::I32, info_full, |out| { + if null_count == 0 { + try_reserve_bytes( + out, + a.values() + .len() + .checked_mul(4) + .ok_or_else(|| fmt!(ArrowIngest, "U16 widen reservation overflow"))?, + "U16 widen column", + )?; + for &v in a.values() { + out.extend_from_slice(&(v as i32).to_le_bytes()); + } + } else { + full_with_sentinel_into(out, arr, i32::MIN.to_le_bytes(), |row| { + (a.value(row) as i32).to_le_bytes() + })?; + } + Ok(()) + }) + } + ColumnKind::U32WidenToI64 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::I64, info_full, |out| { + if null_count == 0 { + try_reserve_bytes( + out, + a.values() + .len() + .checked_mul(8) + .ok_or_else(|| fmt!(ArrowIngest, "U32 widen reservation overflow"))?, + "U32 widen column", + )?; + for &v in a.values() { + out.extend_from_slice(&(v as i64).to_le_bytes()); + } + } else { + full_with_sentinel_into(out, arr, i64::MIN.to_le_bytes(), |row| { + (a.value(row) as i64).to_le_bytes() + })?; + } + Ok(()) + }) + } + ColumnKind::U64WidenToI64Checked => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::I64, info_full, |out| { + try_full_with_sentinel_into(out, arr, i64::MIN.to_le_bytes(), |row| { + let v = a.value(row); + if v > i64::MAX as u64 { + return Err(fmt!( + ArrowIngest, + "UInt64 value {} at row {} exceeds i64::MAX; \ + QuestDB QWP-WS encodes integers as signed i64", + v, + row + )); + } + Ok((v as i64).to_le_bytes()) + }) + }) + } + ColumnKind::TimestampSecondToMicros => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed( + ctx, + col_name, + QwpColumnKind::TimestampMicros, + info_sparse, + |out| { + if null_count == 0 { + let src = a.values(); + let bytes = src.len().checked_mul(8).ok_or_else(|| { + fmt!(ArrowIngest, "TimestampSecond→µs reservation overflow") + })?; + try_reserve_bytes(out, bytes, "TimestampSecond column")?; + for (row, &v) in src.iter().enumerate() { + let widened = v.checked_mul(1_000_000).ok_or_else(|| { + fmt!( + ArrowIngest, + "Timestamp s→µs overflow at row {} (value {})", + row, + v + ) + })?; + out.extend_from_slice(&widened.to_le_bytes()); + } + Ok(()) + } else { + try_non_null_le_into(out, arr, |row| { + let v = a.value(row); + v.checked_mul(1_000_000) + .map(i64::to_le_bytes) + .ok_or_else(|| { + fmt!( + ArrowIngest, + "Timestamp s→µs overflow at row {} (value {})", + row, + v + ) + }) + }) + } + }, + ) + } + ColumnKind::TimestampMicros => { + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + qwp_ws.arrow_bulk_set_fixed( + ctx, + col_name, + QwpColumnKind::TimestampMicros, + info_sparse, + |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { + typed_slice_as_le_bytes(a.values()) + })?; + } else { + non_null_le_into(out, arr, |row| a.value(row).to_le_bytes())?; + } + Ok(()) + }, + ) + } + ColumnKind::TimestampNanos => { + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + qwp_ws.arrow_bulk_set_fixed( + ctx, + col_name, + QwpColumnKind::TimestampNanos, + info_sparse, + |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { + typed_slice_as_le_bytes(a.values()) + })?; + } else { + non_null_le_into(out, arr, |row| a.value(row).to_le_bytes())?; + } + Ok(()) + }, + ) + } + ColumnKind::Date => { + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::Date, info_sparse, |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + non_null_le_into(out, arr, |row| a.value(row).to_le_bytes())?; + } + Ok(()) + }) + } + ColumnKind::Date32Days => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::Date, info_sparse, |out| { + if null_count == 0 { + let src = a.values(); + let bytes = src + .len() + .checked_mul(8) + .ok_or_else(|| fmt!(ArrowIngest, "Date32 days→ms reservation overflow"))?; + try_reserve_bytes(out, bytes, "Date32 column")?; + for (row, &d) in src.iter().enumerate() { + let ms = (d as i64).checked_mul(86_400_000).ok_or_else(|| { + fmt!( + ArrowIngest, + "Date32 days→ms overflow at row {} (value {})", + row, + d + ) + })?; + out.extend_from_slice(&ms.to_le_bytes()); + } + Ok(()) + } else { + try_non_null_le_into(out, arr, |row| { + let days = a.value(row) as i64; + days.checked_mul(86_400_000) + .map(i64::to_le_bytes) + .ok_or_else(|| { + fmt!( + ArrowIngest, + "Date32 days→ms overflow at row {} (value {})", + row, + days + ) + }) + }) + } + }) + } + ColumnKind::Date64Ms => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::Date, info_sparse, |out| { + if le_no_nulls { + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(a.values()) })?; + } else { + non_null_le_into(out, arr, |row| a.value(row).to_le_bytes())?; + } + Ok(()) + }) + } + ColumnKind::TimeAsLong(unit) => { + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::I64, info_full, |out| { + build_time_as_long_into(out, arr, unit) + }) + } + ColumnKind::DurationAsLong(unit) => { + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::I64, info_full, |out| { + build_duration_as_long_into(out, arr, unit) + }) + } + ColumnKind::Utf8 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_varlen( + ctx, + col_name, + QwpColumnKind::String, + info_sparse, + |offsets, data| build_varlen_from_string_into(offsets, data, a), + ) + } + ColumnKind::LargeUtf8 => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_varlen( + ctx, + col_name, + QwpColumnKind::String, + info_sparse, + |offsets, data| build_varlen_from_large_string_into(offsets, data, a), + ) + } + ColumnKind::Utf8View => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_varlen( + ctx, + col_name, + QwpColumnKind::String, + info_sparse, + |offsets, data| build_varlen_from_string_view_into(offsets, data, a), + ) + } + ColumnKind::Binary => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_varlen( + ctx, + col_name, + QwpColumnKind::Binary, + info_sparse, + |offsets, data| build_varlen_from_binary_into(offsets, data, a), + ) + } + ColumnKind::LargeBinary => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_varlen( + ctx, + col_name, + QwpColumnKind::Binary, + info_sparse, + |offsets, data| build_varlen_from_large_binary_into(offsets, data, a), + ) + } + ColumnKind::BinaryView => { + let a = arr.as_any().downcast_ref::().unwrap(); + qwp_ws.arrow_bulk_set_varlen( + ctx, + col_name, + QwpColumnKind::Binary, + info_sparse, + |offsets, data| build_varlen_from_binary_view_into(offsets, data, a), + ) + } + ColumnKind::Uuid => { + let a = arr.as_any().downcast_ref::().unwrap(); + let elem = a.value_length() as usize; + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::Uuid, info_sparse, |out| { + if null_count == 0 { + let start = a.offset() * elem; + out.extend_from_slice(&a.value_data()[start..start + a.len() * elem]); + } else { + non_null_fsb_into(out, a, elem)?; + } + Ok(()) + }) + } + ColumnKind::Long256 => { + let a = arr.as_any().downcast_ref::().unwrap(); + let elem = a.value_length() as usize; + qwp_ws.arrow_bulk_set_fixed(ctx, col_name, QwpColumnKind::Long256, info_sparse, |out| { + if null_count == 0 { + let start = a.offset() * elem; + out.extend_from_slice(&a.value_data()[start..start + a.len() * elem]); + } else { + non_null_fsb_into(out, a, elem)?; + } + Ok(()) + }) + } + ColumnKind::Geohash(precision) => { + qwp_ws.arrow_bulk_set_geohash(ctx, col_name, precision, info_sparse, |out| { + build_geohash_bytes_into(out, arr, precision) + }) + } + ColumnKind::SymbolDict { key, value } => { + let payload = build_symbol_payload_dyn(arr, key, value)?; + qwp_ws.arrow_bulk_set_symbol( + ctx, + col_name, + &payload.keys, + &payload.entries, + &payload.dict_data, + info_sparse, + ) + } + ColumnKind::Decimal32WidenToDecimal64 => { + let a = arr.as_any().downcast_ref::().unwrap(); + let scale = decimal_scale_u8(a.scale(), "Decimal32", 9)?; + qwp_ws.arrow_bulk_set_decimal( + ctx, + col_name, + QwpColumnKind::Decimal64, + ArrowDecimalSpec { + scale, + element_width: 8, + }, + info_sparse, + |out| { + build_decimal_bytes_i32_widen_into(out, a)?; + Ok(()) + }, + ) + } + ColumnKind::Decimal64 => { + let a = arr.as_any().downcast_ref::().unwrap(); + let scale = decimal_scale_u8(a.scale(), "Decimal64", 18)?; + qwp_ws.arrow_bulk_set_decimal( + ctx, + col_name, + QwpColumnKind::Decimal64, + ArrowDecimalSpec { + scale, + element_width: 8, + }, + info_sparse, + |out| { + if le_no_nulls { + // SAFETY: i64 has no padding; LE target → wire-format bytes. + extend_le_bytes_checked(out, unsafe { + typed_slice_as_le_bytes(a.values()) + })?; + } else { + build_decimal_bytes_i64_into(out, a)?; + } + Ok(()) + }, + ) + } + ColumnKind::Decimal128 => { + let a = arr.as_any().downcast_ref::().unwrap(); + let scale = decimal_scale_u8(a.scale(), "Decimal128", 38)?; + qwp_ws.arrow_bulk_set_decimal( + ctx, + col_name, + QwpColumnKind::Decimal128, + ArrowDecimalSpec { + scale, + element_width: 16, + }, + info_sparse, + |out| { + if le_no_nulls { + // SAFETY: i128 has no padding; LE target → wire-format bytes. + extend_le_bytes_checked(out, unsafe { + typed_slice_as_le_bytes(a.values()) + })?; + } else { + build_decimal_bytes_i128_into(out, a)?; + } + Ok(()) + }, + ) + } + ColumnKind::Decimal256 => { + let a = arr.as_any().downcast_ref::().unwrap(); + let scale = decimal_scale_u8(a.scale(), "Decimal256", QWP_DECIMAL_MAX_SCALE)?; + qwp_ws.arrow_bulk_set_decimal( + ctx, + col_name, + QwpColumnKind::Decimal, + ArrowDecimalSpec { + scale, + element_width: 32, + }, + info_sparse, + |out| { + if le_no_nulls { + // SAFETY: i256 is `#[repr(C)] { low: u128, high: i128 }`; + // on LE that's byte-identical to `to_le_bytes()` output. + // The static asserts on size + endianness fail to + // compile if a future arrow_buffer reshapes i256. + const _: () = { + assert!(std::mem::size_of::() == 32); + assert!(std::mem::align_of::() <= 32); + }; + #[cfg(target_endian = "big")] + compile_error!("Decimal256 LE fast-path requires little-endian host"); + extend_le_bytes_checked(out, unsafe { + typed_slice_as_le_bytes(a.values()) + })?; + } else { + build_decimal_bytes_i256_into(out, a)?; + } + Ok(()) + }, + ) + } + ColumnKind::ArrayDouble(ndim) => qwp_ws.arrow_bulk_set_array( + ctx, + col_name, + QwpColumnKind::DoubleArray, + info_sparse, + |data| build_array_blob_data_into(data, arr, ndim), + ), + } +} + +/// Bit-pack `arr` directly into `out`, appending after `existing_rows` +/// already present. Skips the intermediate `Vec` allocation the old +/// `pack_bool_bits` returned. The destination is the column's owned +/// `packed_bits` buffer. +fn pack_bool_bits_into(out: &mut Vec, existing_rows: usize, arr: &BooleanArray) -> Result<()> { + let row_count = arr.len(); + let total_rows = existing_rows + row_count; + let total_bytes = total_rows.div_ceil(8); + if out.len() < total_bytes { + out.resize(total_bytes, 0); + } + let value_buf = arr.values(); + let null_buf = arr.nulls(); + let nulls_aligned = null_buf.is_none_or(|nb| nb.offset().is_multiple_of(8)); + if existing_rows.is_multiple_of(8) && value_buf.offset().is_multiple_of(8) && nulls_aligned { + let n_bytes = row_count.div_ceil(8); + let v_start = value_buf.offset() / 8; + let v_end = v_start.checked_add(n_bytes).ok_or_else(|| { + fmt!( + ArrowIngest, + "BOOL pack: value-buffer end offset overflow (start={}, n_bytes={})", + v_start, + n_bytes + ) + })?; + // `from_ffi` builds the Boolean array via `new_unchecked`; a + // truncated value buffer would slice-panic and abort the host. + let raw = value_buf.values(); + if v_end > raw.len() { + return Err(fmt!( + ArrowIngest, + "BOOL pack: value buffer {} bytes shorter than required {} bytes", + raw.len(), + v_end + )); + } + let dst_off = existing_rows / 8; + let full_bytes = row_count / 8; + out[dst_off..dst_off + full_bytes].copy_from_slice(&raw[v_start..v_start + full_bytes]); + let trailing = row_count % 8; + if trailing != 0 { + let mask = (1u8 << trailing) - 1; + out[dst_off + full_bytes] |= raw[v_start + full_bytes] & mask; + } + if let Some(nb) = null_buf { + let n_start = nb.offset() / 8; + let n_end = n_start.checked_add(n_bytes).ok_or_else(|| { + fmt!( + ArrowIngest, + "BOOL pack: null-buffer end offset overflow (start={}, n_bytes={})", + n_start, + n_bytes + ) + })?; + let null_raw = nb.buffer().as_slice(); + if n_end > null_raw.len() { + return Err(fmt!( + ArrowIngest, + "BOOL pack: null buffer {} bytes shorter than required {} bytes", + null_raw.len(), + n_end + )); + } + for (p, &v) in out[dst_off..dst_off + full_bytes] + .iter_mut() + .zip(&null_raw[n_start..n_start + full_bytes]) + { + *p &= v; + } + if trailing != 0 { + let mask = (1u8 << trailing) - 1; + out[dst_off + full_bytes] &= null_raw[n_start + full_bytes] | !mask; + } + } + return Ok(()); + } + for row in 0..row_count { + if !arr.is_null(row) && arr.value(row) { + let target = existing_rows + row; + out[target / 8] |= 1 << (target % 8); + } + } + Ok(()) +} + +fn varlen_data_base(data: &[u8], label: &str) -> Result { + u32::try_from(data.len()) + .map_err(|_| fmt!(ArrowIngest, "{} data base offset exceeds u32::MAX", label)) +} + +fn build_varlen_from_string_into( + offsets: &mut Vec, + data: &mut Vec, + arr: &StringArray, +) -> Result<()> { + if arr.null_count() == 0 && arr.offset() == 0 { + return varlen_no_null_i32_into( + offsets, + data, + arr.value_offsets(), + arr.value_data(), + arr.len(), + "VARCHAR", + ); + } + let row_count = arr.len(); + let data_base = varlen_data_base(data, "VARCHAR")?; + let mut cumulative: u32 = 0; + try_reserve_typed( + offsets, + non_null_count(arr, "VARCHAR column")?, + "VARCHAR offsets", + )?; + try_reserve_bytes(data, arr.value_data().len(), "VARCHAR data")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + let s = arr.value(row).as_bytes(); + cumulative = cumulative + .checked_add(s.len() as u32) + .ok_or_else(|| fmt!(ArrowIngest, "VARCHAR cumulative offset exceeds u32::MAX"))?; + let absolute = data_base + .checked_add(cumulative) + .ok_or_else(|| fmt!(ArrowIngest, "VARCHAR cumulative offset exceeds u32::MAX"))?; + data.extend_from_slice(s); + offsets.push(absolute); + } + Ok(()) +} + +fn varlen_no_null_i32_into( + offsets: &mut Vec, + data: &mut Vec, + arr_offsets: &[i32], + arr_data: &[u8], + arr_len: usize, + label: &str, +) -> Result<()> { + if arr_offsets.len() != arr_len + 1 { + return Err(fmt!( + ArrowIngest, + "{} offsets length {} != arr_len + 1 ({})", + label, + arr_offsets.len(), + arr_len + 1 + )); + } + // Per-element validation. `arrow::ffi::from_ffi` uses `new_unchecked` + // and does not enforce monotonic non-negative offsets; without this + // pass an intermediate negative offset would reinterpret as a giant + // u32 in the fast path and produce wire-format garbage. + let mut prev = 0i32; + for (i, &off) in arr_offsets.iter().enumerate() { + if off < 0 { + return Err(fmt!( + ArrowIngest, + "{} offset[{}] = {} is negative", + label, + i, + off + )); + } + if i > 0 && off < prev { + return Err(fmt!( + ArrowIngest, + "{} offsets not monotonic: offset[{}] = {} < offset[{}] = {}", + label, + i, + off, + i - 1, + prev + )); + } + prev = off; + } + let first = arr_offsets[0]; + let last = arr_offsets[arr_len]; + let first_u = first as u32; + let last_u = last as u32; + let used = last_u - first_u; + let last_usize = last as usize; + if last_usize > arr_data.len() { + return Err(fmt!( + ArrowIngest, + "{} last offset {} exceeds data len {}", + label, + last_usize, + arr_data.len() + )); + } + let data_base = varlen_data_base(data, label)?; + data_base + .checked_add(used) + .ok_or_else(|| fmt!(ArrowIngest, "{} cumulative offset exceeds u32::MAX", label))?; + try_reserve_typed(offsets, arr_len, "varlen offsets")?; + try_reserve_bytes(data, used as usize, "varlen data")?; + let rebase = data_base.wrapping_sub(first_u); + if first == 0 && data_base == 0 { + // SAFETY: every offset validated non-negative above; i32 and u32 + // have identical layout so the cast is a no-op bit reinterpret. + let as_u32: &[u32] = + unsafe { std::slice::from_raw_parts(arr_offsets[1..].as_ptr() as *const u32, arr_len) }; + offsets.extend_from_slice(as_u32); + } else { + for &off in &arr_offsets[1..] { + offsets.push(rebase.wrapping_add(off as u32)); + } + } + data.extend_from_slice(&arr_data[first as usize..last_usize]); + Ok(()) +} + +fn varlen_no_null_i64_narrow_into( + offsets: &mut Vec, + data: &mut Vec, + arr_offsets: &[i64], + arr_data: &[u8], + arr_len: usize, + label: &str, +) -> Result<()> { + if arr_offsets.len() != arr_len + 1 { + return Err(fmt!( + ArrowIngest, + "{} offsets length {} != arr_len + 1 ({})", + label, + arr_offsets.len(), + arr_len + 1 + )); + } + let mut prev = 0i64; + for (i, &off) in arr_offsets.iter().enumerate() { + if off < 0 { + return Err(fmt!( + ArrowIngest, + "{} offset[{}] = {} is negative", + label, + i, + off + )); + } + if i > 0 && off < prev { + return Err(fmt!( + ArrowIngest, + "{} offsets not monotonic: offset[{}] = {} < offset[{}] = {}", + label, + i, + off, + i - 1, + prev + )); + } + prev = off; + } + let first = arr_offsets[0]; + let last = arr_offsets[arr_len]; + let first_u: u32 = u32::try_from(first).map_err(|_| { + fmt!( + ArrowIngest, + "{} first offset {} exceeds u32::MAX", + label, + first + ) + })?; + let last_u: u32 = u32::try_from(last).map_err(|_| { + fmt!( + ArrowIngest, + "{} last offset {} exceeds u32::MAX", + label, + last + ) + })?; + let used = last_u - first_u; + let last_usize = last as usize; + if last_usize > arr_data.len() { + return Err(fmt!( + ArrowIngest, + "{} last offset {} exceeds data len {}", + label, + last_usize, + arr_data.len() + )); + } + let data_base = varlen_data_base(data, label)?; + data_base + .checked_add(used) + .ok_or_else(|| fmt!(ArrowIngest, "{} cumulative offset exceeds u32::MAX", label))?; + try_reserve_typed(offsets, arr_len, "varlen offsets")?; + try_reserve_bytes(data, used as usize, "varlen data")?; + let rebase = data_base.wrapping_sub(first_u); + for &off in &arr_offsets[1..] { + offsets.push(rebase.wrapping_add(off as u32)); + } + data.extend_from_slice(&arr_data[first as usize..last_usize]); + Ok(()) +} + +fn build_varlen_from_large_string_into( + offsets: &mut Vec, + data: &mut Vec, + arr: &LargeStringArray, +) -> Result<()> { + if arr.null_count() == 0 && arr.offset() == 0 { + return varlen_no_null_i64_narrow_into( + offsets, + data, + arr.value_offsets(), + arr.value_data(), + arr.len(), + "LargeUtf8", + ); + } + let row_count = arr.len(); + let data_base = varlen_data_base(data, "LargeUtf8")?; + let mut cumulative: u32 = 0; + try_reserve_typed( + offsets, + non_null_count(arr, "LargeUtf8 column")?, + "LargeUtf8 offsets", + )?; + try_reserve_bytes(data, arr.value_data().len(), "LargeUtf8 data")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + let s = arr.value(row).as_bytes(); + let len_u32 = u32::try_from(s.len()) + .map_err(|_| fmt!(ArrowIngest, "LargeUtf8 row length exceeds u32::MAX"))?; + cumulative = cumulative + .checked_add(len_u32) + .ok_or_else(|| fmt!(ArrowIngest, "LargeUtf8 cumulative offset exceeds u32::MAX"))?; + let absolute = data_base + .checked_add(cumulative) + .ok_or_else(|| fmt!(ArrowIngest, "LargeUtf8 cumulative offset exceeds u32::MAX"))?; + data.extend_from_slice(s); + offsets.push(absolute); + } + Ok(()) +} + +fn build_varlen_from_string_view_into( + offsets: &mut Vec, + data: &mut Vec, + arr: &StringViewArray, +) -> Result<()> { + let row_count = arr.len(); + let data_base = varlen_data_base(data, "VARCHAR")?; + let mut cumulative: u32 = 0; + try_reserve_typed( + offsets, + non_null_count(arr, "Utf8View column")?, + "Utf8View offsets", + )?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + let s = arr.value(row).as_bytes(); + cumulative = cumulative + .checked_add(s.len() as u32) + .ok_or_else(|| fmt!(ArrowIngest, "VARCHAR cumulative offset exceeds u32::MAX"))?; + let absolute = data_base + .checked_add(cumulative) + .ok_or_else(|| fmt!(ArrowIngest, "VARCHAR cumulative offset exceeds u32::MAX"))?; + data.extend_from_slice(s); + offsets.push(absolute); + } + Ok(()) +} + +fn build_varlen_from_binary_into( + offsets: &mut Vec, + data: &mut Vec, + arr: &BinaryArray, +) -> Result<()> { + if arr.null_count() == 0 && arr.offset() == 0 { + return varlen_no_null_i32_into( + offsets, + data, + arr.value_offsets(), + arr.value_data(), + arr.len(), + "BINARY", + ); + } + let row_count = arr.len(); + let data_base = varlen_data_base(data, "BINARY")?; + let mut cumulative: u32 = 0; + try_reserve_typed( + offsets, + non_null_count(arr, "Binary column")?, + "Binary offsets", + )?; + try_reserve_bytes(data, arr.value_data().len(), "Binary data")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + let s = arr.value(row); + cumulative = cumulative + .checked_add(s.len() as u32) + .ok_or_else(|| fmt!(ArrowIngest, "BINARY cumulative offset exceeds u32::MAX"))?; + let absolute = data_base + .checked_add(cumulative) + .ok_or_else(|| fmt!(ArrowIngest, "BINARY cumulative offset exceeds u32::MAX"))?; + data.extend_from_slice(s); + offsets.push(absolute); + } + Ok(()) +} + +fn build_varlen_from_large_binary_into( + offsets: &mut Vec, + data: &mut Vec, + arr: &LargeBinaryArray, +) -> Result<()> { + if arr.null_count() == 0 && arr.offset() == 0 { + return varlen_no_null_i64_narrow_into( + offsets, + data, + arr.value_offsets(), + arr.value_data(), + arr.len(), + "LargeBinary", + ); + } + let row_count = arr.len(); + let data_base = varlen_data_base(data, "LargeBinary")?; + let mut cumulative: u32 = 0; + try_reserve_typed( + offsets, + non_null_count(arr, "LargeBinary column")?, + "LargeBinary offsets", + )?; + try_reserve_bytes(data, arr.value_data().len(), "LargeBinary data")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + let s = arr.value(row); + let len_u32 = u32::try_from(s.len()) + .map_err(|_| fmt!(ArrowIngest, "LargeBinary row length exceeds u32::MAX"))?; + cumulative = cumulative.checked_add(len_u32).ok_or_else(|| { + fmt!( + ArrowIngest, + "LargeBinary cumulative offset exceeds u32::MAX" + ) + })?; + let absolute = data_base.checked_add(cumulative).ok_or_else(|| { + fmt!( + ArrowIngest, + "LargeBinary cumulative offset exceeds u32::MAX" + ) + })?; + data.extend_from_slice(s); + offsets.push(absolute); + } + Ok(()) +} + +fn build_varlen_from_binary_view_into( + offsets: &mut Vec, + data: &mut Vec, + arr: &BinaryViewArray, +) -> Result<()> { + let row_count = arr.len(); + let data_base = varlen_data_base(data, "BINARY")?; + let mut cumulative: u32 = 0; + try_reserve_typed( + offsets, + non_null_count(arr, "BinaryView column")?, + "BinaryView offsets", + )?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + let s = arr.value(row); + cumulative = cumulative + .checked_add(s.len() as u32) + .ok_or_else(|| fmt!(ArrowIngest, "BINARY cumulative offset exceeds u32::MAX"))?; + let absolute = data_base + .checked_add(cumulative) + .ok_or_else(|| fmt!(ArrowIngest, "BINARY cumulative offset exceeds u32::MAX"))?; + data.extend_from_slice(s); + offsets.push(absolute); + } + Ok(()) +} + +fn build_geohash_bytes_into(out: &mut Vec, arr: &dyn Array, precision_bits: u8) -> Result<()> { + if !(1..=60).contains(&precision_bits) { + return Err(fmt!( + ArrowIngest, + "geohash precision_bits {} out of range (1..=60)", + precision_bits + )); + } + let row_count = arr.len(); + let width = (precision_bits as usize).div_ceil(8); + let non_null = non_null_count(arr, "Geohash column")?; + let bytes = non_null + .checked_mul(width) + .ok_or_else(|| fmt!(ArrowIngest, "Geohash byte-buffer reservation overflow"))?; + try_reserve_bytes(out, bytes, "Geohash column")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + let v = geohash_value_from_array(arr, row)?; + let le = v.to_le_bytes(); + out.extend_from_slice(&le[..width]); + } + Ok(()) +} + +fn decimal_scale_u8(scale_i8: i8, label: &str, max_scale: u8) -> Result { + if scale_i8 < 0 { + return Err(fmt!( + ArrowIngest, + "Arrow {} negative scale {} not supported", + label, + scale_i8 + )); + } + let scale = scale_i8 as u8; + if scale > max_scale { + return Err(fmt!( + ArrowIngest, + "Arrow {} scale {} exceeds maximum {} for this Arrow decimal width", + label, + scale, + max_scale + )); + } + Ok(scale) +} + +fn build_decimal_bytes_i32_widen_into(out: &mut Vec, arr: &Decimal32Array) -> Result<()> { + if arr.null_count() == 0 { + let src = arr.values(); + let bytes = src + .len() + .checked_mul(8) + .ok_or_else(|| fmt!(ArrowIngest, "Decimal32 byte-buffer reservation overflow"))?; + try_reserve_bytes(out, bytes, "Decimal32 column")?; + for &v in src { + out.extend_from_slice(&(v as i64).to_le_bytes()); + } + return Ok(()); + } + let non_null = non_null_count(arr, "Decimal32 column")?; + let row_count = arr.len(); + let bytes = non_null + .checked_mul(8) + .ok_or_else(|| fmt!(ArrowIngest, "Decimal32 byte-buffer reservation overflow"))?; + try_reserve_bytes(out, bytes, "Decimal32 column")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + out.extend_from_slice(&(arr.value(row) as i64).to_le_bytes()); + } + Ok(()) +} + +fn build_decimal_bytes_i64_into(out: &mut Vec, arr: &Decimal64Array) -> Result<()> { + let non_null = non_null_count(arr, "Decimal64 column")?; + let row_count = arr.len(); + let bytes = non_null + .checked_mul(8) + .ok_or_else(|| fmt!(ArrowIngest, "Decimal64 byte-buffer reservation overflow"))?; + try_reserve_bytes(out, bytes, "Decimal64 column")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + out.extend_from_slice(&arr.value(row).to_le_bytes()); + } + Ok(()) +} + +fn build_decimal_bytes_i128_into(out: &mut Vec, arr: &Decimal128Array) -> Result<()> { + let non_null = non_null_count(arr, "Decimal128 column")?; + let row_count = arr.len(); + let bytes = non_null + .checked_mul(16) + .ok_or_else(|| fmt!(ArrowIngest, "Decimal128 byte-buffer reservation overflow"))?; + try_reserve_bytes(out, bytes, "Decimal128 column")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + out.extend_from_slice(&arr.value(row).to_le_bytes()); + } + Ok(()) +} + +fn build_decimal_bytes_i256_into(out: &mut Vec, arr: &Decimal256Array) -> Result<()> { + let non_null = non_null_count(arr, "Decimal256 column")?; + let row_count = arr.len(); + let bytes = non_null + .checked_mul(32) + .ok_or_else(|| fmt!(ArrowIngest, "Decimal256 byte-buffer reservation overflow"))?; + try_reserve_bytes(out, bytes, "Decimal256 column")?; + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + out.extend_from_slice(&arr.value(row).to_le_bytes()); + } + Ok(()) +} + +fn build_array_blob_data_into(data: &mut Vec, arr: &dyn Array, ndim: usize) -> Result<()> { + let row_count = arr.len(); + let ndim_u8 = + u8::try_from(ndim).map_err(|_| fmt!(ArrowIngest, "ARRAY ndim {} exceeds u8::MAX", ndim))?; + let mut shape: Vec = Vec::with_capacity(ndim); + for row in 0..row_count { + if arr.is_null(row) { + continue; + } + shape.clear(); + let extract = extract_array_row(arr, ndim, row, &mut shape)?; + let leaf = extract + .leaf + .as_any() + .downcast_ref::() + .ok_or_else(|| { + Error::new( + ErrorCode::ArrowUnsupportedColumnKind, + format!( + "ARRAY leaf must be Float64, got {:?}", + extract.leaf.data_type() + ), + ) + })?; + let leaf_values = &leaf.values()[extract.leaf_start..extract.leaf_end]; + data.push(ndim_u8); + for &dim in shape.iter() { + let dim_u32 = u32::try_from(dim) + .map_err(|_| fmt!(ArrowIngest, "ARRAY dimension {} exceeds u32::MAX", dim))?; + data.extend_from_slice(&dim_u32.to_le_bytes()); + } + if cfg!(target_endian = "little") { + // SAFETY: f64 has no padding; LE target → wire-format bytes. + data.extend_from_slice(unsafe { typed_slice_as_le_bytes(leaf_values) }); + } else { + for &v in leaf_values { + data.extend_from_slice(&v.to_le_bytes()); + } + } + } + Ok(()) +} + +fn walk_list_leaf(dt: &DataType) -> (DataType, usize) { + let mut current = dt; + let mut ndim = 0; + loop { + match current { + DataType::List(inner) | DataType::LargeList(inner) => { + ndim += 1; + current = inner.data_type(); + } + DataType::FixedSizeList(inner, _) => { + ndim += 1; + current = inner.data_type(); + } + _ => return (current.clone(), ndim), + } + } +} + +fn dict_key_for(dt: &DataType) -> Option { + match dt { + DataType::UInt8 => Some(DictKey::U8), + DataType::UInt16 => Some(DictKey::U16), + DataType::UInt32 => Some(DictKey::U32), + _ => None, + } +} + +fn dict_value_for(dt: &DataType) -> Option { + match dt { + DataType::Utf8 => Some(DictValue::Utf8), + DataType::LargeUtf8 => Some(DictValue::LargeUtf8), + DataType::Utf8View => Some(DictValue::Utf8View), + _ => None, + } +} + +fn emit_i32_widen_to_i64_full(out: &mut Vec, arr: &dyn Array, values: &[i32]) -> Result<()> { + let sentinel = i64::MIN.to_le_bytes(); + if arr.null_count() == 0 { + let bytes = values + .len() + .checked_mul(8) + .ok_or_else(|| fmt!(ArrowIngest, "i32→i64 widen dense extend size overflow"))?; + try_reserve_bytes(out, bytes, "i32→i64 column")?; + for &v in values { + out.extend_from_slice(&(v as i64).to_le_bytes()); + } + } else { + full_with_sentinel_into(out, arr, sentinel, |row| (values[row] as i64).to_le_bytes())?; + } + Ok(()) +} + +fn emit_i64_full(out: &mut Vec, arr: &dyn Array, values: &[i64]) -> Result<()> { + let sentinel = i64::MIN.to_le_bytes(); + if arr.null_count() == 0 && cfg!(target_endian = "little") { + // SAFETY: i64 has no padding; LE target → wire-format bytes. + extend_le_bytes_checked(out, unsafe { typed_slice_as_le_bytes(values) })?; + } else if arr.null_count() == 0 { + let bytes = values + .len() + .checked_mul(8) + .ok_or_else(|| fmt!(ArrowIngest, "i64 dense extend size overflow"))?; + try_reserve_bytes(out, bytes, "i64 column")?; + for &v in values { + out.extend_from_slice(&v.to_le_bytes()); + } + } else { + full_with_sentinel_into(out, arr, sentinel, |row| values[row].to_le_bytes())?; + } + Ok(()) +} + +fn build_time_as_long_into(out: &mut Vec, arr: &dyn Array, unit: TimeUnit) -> Result<()> { + match unit { + TimeUnit::Second => { + let a = arr.as_any().downcast_ref::().unwrap(); + emit_i32_widen_to_i64_full(out, arr, a.values())?; + } + TimeUnit::Millisecond => { + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + emit_i32_widen_to_i64_full(out, arr, a.values())?; + } + TimeUnit::Microsecond => { + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + emit_i64_full(out, arr, a.values())?; + } + TimeUnit::Nanosecond => { + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + emit_i64_full(out, arr, a.values())?; + } + } + Ok(()) +} + +fn build_duration_as_long_into(out: &mut Vec, arr: &dyn Array, unit: TimeUnit) -> Result<()> { + match unit { + TimeUnit::Second => { + let a = arr.as_any().downcast_ref::().unwrap(); + emit_i64_full(out, arr, a.values())?; + } + TimeUnit::Millisecond => { + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + emit_i64_full(out, arr, a.values())?; + } + TimeUnit::Microsecond => { + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + emit_i64_full(out, arr, a.values())?; + } + TimeUnit::Nanosecond => { + let a = arr + .as_any() + .downcast_ref::() + .unwrap(); + emit_i64_full(out, arr, a.values())?; + } + } + Ok(()) +} + +fn dict_lookup_str(values: &ArrayRef, key_idx: usize, value: DictValue) -> Result<&str> { + fn check(arr: &A, key_idx: usize) -> Result<()> { + if key_idx >= arr.len() { + return Err(fmt!( + ArrowIngest, + "dict key {} out of range (dict size {})", + key_idx, + arr.len() + )); + } + if arr.is_null(key_idx) { + return Err(fmt!( + ArrowIngest, + "dictionary values for SYMBOL / VARCHAR must not contain nulls" + )); + } + Ok(()) + } + match value { + DictValue::Utf8 => { + let utf8 = values + .as_any() + .downcast_ref::() + .ok_or_else(|| { + fmt!( + ArrowIngest, + "dictionary values must be Utf8 for this column" + ) + })?; + check(utf8, key_idx)?; + Ok(utf8.value(key_idx)) + } + DictValue::LargeUtf8 => { + let utf8 = values + .as_any() + .downcast_ref::() + .ok_or_else(|| { + fmt!( + ArrowIngest, + "dictionary values must be LargeUtf8 for this column" + ) + })?; + check(utf8, key_idx)?; + Ok(utf8.value(key_idx)) + } + DictValue::Utf8View => { + let utf8 = values + .as_any() + .downcast_ref::() + .ok_or_else(|| { + fmt!( + ArrowIngest, + "dictionary values must be Utf8View for this column" + ) + })?; + check(utf8, key_idx)?; + Ok(utf8.value(key_idx)) + } + } +} + +fn dict_values_dyn(arr: &dyn Array, key: DictKey) -> &ArrayRef { + match key { + DictKey::U32 => arr + .as_any() + .downcast_ref::>() + .unwrap() + .values(), + DictKey::U16 => arr + .as_any() + .downcast_ref::>() + .unwrap() + .values(), + DictKey::U8 => arr + .as_any() + .downcast_ref::>() + .unwrap() + .values(), + } +} + +struct SymbolPayload { + keys: Vec, + entries: Vec<(u32, u32)>, + dict_data: Vec, +} + +// Bounds reserved sizes so a hostile FFI batch cannot trigger an +// allocator-OOM abort under `panic = "abort"`. +const MAX_ARROW_DICT_VALUES: usize = 16 * 1024 * 1024; +const MAX_ARROW_INGEST_ROWS: usize = 16 * 1024 * 1024; +const MAX_ARROW_INGEST_DATA_BYTES: usize = 1024 * 1024 * 1024; + +// Sum the data-buffer byte sizes that arrow-rs's internal validation / +// our own widening loops will visit, including dictionary value data, +// FixedSizeBinary backing bytes and the multi-buffer View arrays. Returns +// `None` for types whose data size is not bounded by a single byte-count +// (e.g. nested ListArray descends recursively below). +fn check_array_data_bounds_inner(arr: &dyn Array, depth: usize) -> Result<()> { + if depth > 32 { + return Err(fmt!( + ArrowIngest, + "nested array depth exceeds 32 in data-bounds check" + )); + } + let dt = arr.data_type(); + let bytes: Option = match dt { + DataType::Utf8 => arr + .as_any() + .downcast_ref::() + .map(|a| a.value_data().len()), + DataType::LargeUtf8 => arr + .as_any() + .downcast_ref::() + .map(|a| a.value_data().len()), + DataType::Binary => arr + .as_any() + .downcast_ref::() + .map(|a| a.value_data().len()), + DataType::LargeBinary => arr + .as_any() + .downcast_ref::() + .map(|a| a.value_data().len()), + DataType::Utf8View => arr + .as_any() + .downcast_ref::() + .map(|a| a.data_buffers().iter().map(|b| b.len()).sum()), + DataType::BinaryView => arr + .as_any() + .downcast_ref::() + .map(|a| a.data_buffers().iter().map(|b| b.len()).sum()), + DataType::FixedSizeBinary(width) => arr + .as_any() + .downcast_ref::() + .map(|a| (*width as usize).saturating_mul(a.len())), + DataType::Float64 => arr + .as_any() + .downcast_ref::() + .map(|a| a.values().len().saturating_mul(8)), + _ => None, + }; + if let Some(b) = bytes + && b > MAX_ARROW_INGEST_DATA_BYTES + { + return Err(fmt!( + ArrowIngest, + "data-buffer length {} exceeds {} byte cap", + b, + MAX_ARROW_INGEST_DATA_BYTES + )); + } + // Recurse into dictionary values, list/fixed-size-list children. + if let Some(d) = arr.as_any().downcast_ref::>() { + check_array_data_bounds_inner(d.values().as_ref(), depth + 1)?; + } else if let Some(d) = arr.as_any().downcast_ref::>() { + check_array_data_bounds_inner(d.values().as_ref(), depth + 1)?; + } else if let Some(d) = arr.as_any().downcast_ref::>() { + check_array_data_bounds_inner(d.values().as_ref(), depth + 1)?; + } else if let Some(l) = arr.as_any().downcast_ref::() { + check_array_data_bounds_inner(l.values().as_ref(), depth + 1)?; + } else if let Some(l) = arr.as_any().downcast_ref::() { + check_array_data_bounds_inner(l.values().as_ref(), depth + 1)?; + } else if let Some(l) = arr.as_any().downcast_ref::() { + check_array_data_bounds_inner(l.values().as_ref(), depth + 1)?; + } + Ok(()) +} + +fn check_batch_data_bounds(batch: &RecordBatch) -> Result<()> { + for (idx, col) in batch.columns().iter().enumerate() { + check_array_data_bounds_inner(col.as_ref(), 0) + .map_err(|e| fmt!(ArrowIngest, "column #{}: {}", idx, e.msg()))?; + } + Ok(()) +} + +fn build_symbol_payload_dyn( + arr: &dyn Array, + key: DictKey, + value: DictValue, +) -> Result { + let values = dict_values_dyn(arr, key); + let value_count = values.len(); + if value_count > MAX_ARROW_DICT_VALUES { + return Err(fmt!( + ArrowIngest, + "SYMBOL dictionary has {} values exceeding limit {}", + value_count, + MAX_ARROW_DICT_VALUES + )); + } + let row_count = arr.len(); + let mut keys: Vec = Vec::new(); + try_reserve_typed(&mut keys, row_count, "SYMBOL keys")?; + fill_dict_keys_into(&mut keys, arr, key); + debug_assert_eq!(keys.len(), row_count); + // Skip unreferenced dict entries (Polars/Datafusion may leave + // nulls there after filter/projection); emit zero-length stubs + // so key→entry indexing on the wire stays intact. + let mut referenced: Vec = Vec::new(); + try_reserve_typed(&mut referenced, value_count, "SYMBOL referenced bitmap")?; + referenced.resize(value_count, false); + let has_nulls = arr.null_count() != 0; + for (row, &k) in keys.iter().enumerate() { + if has_nulls && arr.is_null(row) { + continue; + } + let idx = k as usize; + if idx >= value_count { + return Err(fmt!( + ArrowIngest, + "SYMBOL dictionary key {} at row {} exceeds dict size {}", + k, + row, + value_count + )); + } + referenced[idx] = true; + } + let mut entries: Vec<(u32, u32)> = Vec::new(); + try_reserve_typed(&mut entries, value_count, "SYMBOL entries")?; + let mut dict_data: Vec = Vec::new(); + let mut cumulative: u32 = 0; + for (i, used) in referenced.iter().enumerate() { + if !*used { + entries.push((cumulative, 0)); + continue; + } + let s = dict_lookup_str(values, i, value)?; + let bytes = s.as_bytes(); + let len = u32::try_from(bytes.len()) + .map_err(|_| fmt!(ArrowIngest, "SYMBOL entry length exceeds u32::MAX"))?; + let next_cumulative = cumulative + .checked_add(len) + .ok_or_else(|| fmt!(ArrowIngest, "SYMBOL cumulative data exceeds u32::MAX"))?; + if (next_cumulative as usize) > MAX_ARROW_INGEST_DATA_BYTES { + return Err(fmt!( + ArrowIngest, + "SYMBOL cumulative data {} exceeds {} byte cap", + next_cumulative, + MAX_ARROW_INGEST_DATA_BYTES + )); + } + try_reserve_bytes(&mut dict_data, bytes.len(), "SYMBOL dict_data")?; + dict_data.extend_from_slice(bytes); + entries.push((cumulative, len)); + cumulative = next_cumulative; + } + Ok(SymbolPayload { + keys, + entries, + dict_data, + }) +} + +fn fill_dict_keys_into(out: &mut Vec, arr: &dyn Array, key: DictKey) { + let has_nulls = arr.null_count() != 0; + match key { + DictKey::U32 => { + let dict = arr + .as_any() + .downcast_ref::>() + .unwrap(); + let raw = dict.keys().values(); + if !has_nulls { + out.extend_from_slice(raw); + return; + } + for (row, &k) in raw.iter().enumerate() { + out.push(if arr.is_null(row) { 0 } else { k }); + } + } + DictKey::U16 => { + let dict = arr + .as_any() + .downcast_ref::>() + .unwrap(); + let raw = dict.keys().values(); + if !has_nulls { + for &k in raw { + out.push(k as u32); + } + } else { + for (row, &k) in raw.iter().enumerate() { + out.push(if arr.is_null(row) { 0 } else { k as u32 }); + } + } + } + DictKey::U8 => { + let dict = arr + .as_any() + .downcast_ref::>() + .unwrap(); + let raw = dict.keys().values(); + if !has_nulls { + for &k in raw { + out.push(k as u32); + } + } else { + for (row, &k) in raw.iter().enumerate() { + out.push(if arr.is_null(row) { 0 } else { k as u32 }); + } + } + } + } +} + +struct ArrayRowExtract { + leaf: ArrayRef, + leaf_start: usize, + leaf_end: usize, +} + +fn extract_array_row( + outer: &dyn Array, + ndim: usize, + row: usize, + shape: &mut Vec, +) -> Result { + let (mut start, mut end) = list_row_range(outer, row)?; + shape.push(end - start); + let mut current_values: ArrayRef = list_values(outer)?; + for _ in 1..ndim { + let (level_start, level_end, level_dim, next_values) = + list_level_descend(&*current_values, start, end)?; + shape.push(level_dim); + start = level_start; + end = level_end; + current_values = next_values; + } + Ok(ArrayRowExtract { + leaf: current_values, + leaf_start: start, + leaf_end: end, + }) +} + +fn checked_offset_i32(off: i32, idx: usize) -> Result { + if off < 0 { + return Err(fmt!( + ArrowIngest, + "ARRAY List offset[{}] = {} is negative", + idx, + off + )); + } + Ok(off as usize) +} + +fn checked_offset_i64(off: i64, idx: usize) -> Result { + if off < 0 { + return Err(fmt!( + ArrowIngest, + "ARRAY LargeList offset[{}] = {} is negative", + idx, + off + )); + } + usize::try_from(off).map_err(|_| { + fmt!( + ArrowIngest, + "ARRAY LargeList offset[{}] = {} exceeds usize::MAX", + idx, + off + ) + }) +} + +fn list_row_range(arr: &dyn Array, row: usize) -> Result<(usize, usize)> { + if let Some(la) = arr.as_any().downcast_ref::() { + let offsets = la.offsets(); + let start = checked_offset_i32(offsets[row], row)?; + let end = checked_offset_i32(offsets[row + 1], row + 1)?; + if end < start { + return Err(fmt!( + ArrowIngest, + "ARRAY List outer offsets non-monotonic at row {} (start={}, end={})", + row, + start, + end + )); + } + Ok((start, end)) + } else if let Some(la) = arr.as_any().downcast_ref::() { + let offsets = la.offsets(); + let start = checked_offset_i64(offsets[row], row)?; + let end = checked_offset_i64(offsets[row + 1], row + 1)?; + if end < start { + return Err(fmt!( + ArrowIngest, + "ARRAY LargeList outer offsets non-monotonic at row {} (start={}, end={})", + row, + start, + end + )); + } + Ok((start, end)) + } else if let Some(la) = arr.as_any().downcast_ref::() { + let stride = la.value_length() as usize; + let start = row.checked_mul(stride).ok_or_else(|| { + fmt!( + ArrowIngest, + "ARRAY FixedSizeList row {} * stride {} overflows usize", + row, + stride + ) + })?; + let end = row + .checked_add(1) + .and_then(|n| n.checked_mul(stride)) + .ok_or_else(|| { + fmt!( + ArrowIngest, + "ARRAY FixedSizeList row {} * stride {} overflows usize", + row + 1, + stride + ) + })?; + Ok((start, end)) + } else { + Err(fmt!( + ArrowIngest, + "expected List / LargeList / FixedSizeList at outer ARRAY level, got {:?}", + arr.data_type() + )) + } +} + +fn list_values(arr: &dyn Array) -> Result { + if let Some(la) = arr.as_any().downcast_ref::() { + Ok(la.values().clone()) + } else if let Some(la) = arr.as_any().downcast_ref::() { + Ok(la.values().clone()) + } else if let Some(la) = arr.as_any().downcast_ref::() { + Ok(la.values().clone()) + } else { + Err(fmt!( + ArrowIngest, + "expected List / LargeList / FixedSizeList, got {:?}", + arr.data_type() + )) + } +} + +fn list_level_descend( + arr: &dyn Array, + start: usize, + end: usize, +) -> Result<(usize, usize, usize, ArrayRef)> { + if let Some(la) = arr.as_any().downcast_ref::() { + let offsets = la.offsets(); + if end <= start { + return Ok((0, 0, 0, la.values().clone())); + } + let next_start = checked_offset_i32(offsets[start], start)?; + let first_end = checked_offset_i32(offsets[start + 1], start + 1)?; + let dim = first_end.checked_sub(next_start).ok_or_else(|| { + fmt!( + ArrowIngest, + "ARRAY List inner offsets non-monotonic at row {}", + start + ) + })?; + let next_end = checked_offset_i32(offsets[end], end)?; + if next_end.checked_sub(next_start) != dim.checked_mul(end - start) { + return Err(ragged_inner_error_i32(&offsets[..], start, end, dim)); + } + Ok((next_start, next_end, dim, la.values().clone())) + } else if let Some(la) = arr.as_any().downcast_ref::() { + let offsets = la.offsets(); + if end <= start { + return Ok((0, 0, 0, la.values().clone())); + } + let next_start = checked_offset_i64(offsets[start], start)?; + let first_end = checked_offset_i64(offsets[start + 1], start + 1)?; + let dim = first_end.checked_sub(next_start).ok_or_else(|| { + fmt!( + ArrowIngest, + "ARRAY LargeList inner offsets non-monotonic at row {}", + start + ) + })?; + let next_end = checked_offset_i64(offsets[end], end)?; + if next_end.checked_sub(next_start) != dim.checked_mul(end - start) { + return Err(ragged_inner_error_i64(&offsets[..], start, end, dim)); + } + Ok((next_start, next_end, dim, la.values().clone())) + } else if let Some(la) = arr.as_any().downcast_ref::() { + let stride = la.value_length() as usize; + if end <= start { + return Ok((0, 0, 0, la.values().clone())); + } + let next_start = start.checked_mul(stride).ok_or_else(|| { + fmt!( + ArrowIngest, + "ARRAY FixedSizeList descent start {} * stride {} overflows usize", + start, + stride + ) + })?; + let next_end = end.checked_mul(stride).ok_or_else(|| { + fmt!( + ArrowIngest, + "ARRAY FixedSizeList descent end {} * stride {} overflows usize", + end, + stride + ) + })?; + Ok((next_start, next_end, stride, la.values().clone())) + } else { + Err(fmt!( + ArrowIngest, + "expected List / LargeList / FixedSizeList in ARRAY descent, got {:?}", + arr.data_type() + )) + } +} + +fn geohash_on_unsigned_error(field: &arrow_schema::Field, dtype_name: &str) -> Error { + fmt!( + ArrowIngest, + "column '{}': 'questdb.geohash_bits' metadata is not supported on {} columns; use a signed integer type (Int8/Int16/Int32/Int64)", + field.name(), + dtype_name + ) +} + +#[cold] +#[inline(never)] +fn ragged_inner_error_i32(offsets: &[i32], start: usize, end: usize, dim: usize) -> Error { + for i in start..end { + let sz = (offsets[i + 1] - offsets[i]) as usize; + if sz != dim { + return fmt!( + ArrowIngest, + "ARRAY row has ragged inner-list sizes: inner #{} has size {} but row's first inner is {}; N-dim ARRAY ingest requires uniform inner sizes per row", + i - start, + sz, + dim + ); + } + } + fmt!( + ArrowIngest, + "ARRAY row has ragged inner-list sizes (unable to locate offending inner)" + ) +} + +#[cold] +#[inline(never)] +fn ragged_inner_error_i64(offsets: &[i64], start: usize, end: usize, dim: usize) -> Error { + for i in start..end { + let sz = (offsets[i + 1] - offsets[i]) as usize; + if sz != dim { + return fmt!( + ArrowIngest, + "ARRAY row has ragged inner-list sizes: inner #{} has size {} but row's first inner is {}; N-dim ARRAY ingest requires uniform inner sizes per row", + i - start, + sz, + dim + ); + } + } + fmt!( + ArrowIngest, + "ARRAY row has ragged inner-list sizes (unable to locate offending inner)" + ) +} + +fn geohash_value_from_array(arr: &dyn Array, row: usize) -> Result { + if let Some(a) = arr.as_any().downcast_ref::() { + Ok(a.value(row) as u8 as u64) + } else if let Some(a) = arr.as_any().downcast_ref::() { + Ok(a.value(row) as u16 as u64) + } else if let Some(a) = arr.as_any().downcast_ref::() { + Ok(a.value(row) as u32 as u64) + } else if let Some(a) = arr.as_any().downcast_ref::() { + Ok(a.value(row) as u64) + } else { + Err(fmt!( + ArrowIngest, + "geohash column has unsupported Arrow type {:?}", + arr.data_type() + )) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum DictKey { + U8, + U16, + U32, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum DictValue { + Utf8, + LargeUtf8, + Utf8View, +} + +#[derive(Debug, Clone, Copy)] +enum ColumnKind { + Bool, + I8, + I16, + I32, + I64, + F16ToF32, + F32, + F64, + Char, + Ipv4, + U8WidenToI32, + U16WidenToI32, + U32WidenToI64, + U64WidenToI64Checked, + TimestampSecondToMicros, + TimestampMicros, + TimestampNanos, + Date, + Date32Days, + Date64Ms, + TimeAsLong(TimeUnit), + DurationAsLong(TimeUnit), + Utf8, + LargeUtf8, + Utf8View, + Binary, + LargeBinary, + BinaryView, + Uuid, + Long256, + Geohash(u8), + SymbolDict { key: DictKey, value: DictValue }, + Decimal32WidenToDecimal64, + Decimal64, + Decimal128, + Decimal256, + ArrayDouble(usize), +} + +fn classify(field: &arrow_schema::Field, _array: &dyn Array) -> Result { + let md_type = field + .metadata() + .get(crate::egress::arrow::metadata::COLUMN_TYPE) + .map(String::as_str); + let md_ext = field + .metadata() + .get(crate::egress::arrow::metadata::ARROW_EXTENSION_NAME) + .map(String::as_str); + let md_geo_bits = field + .metadata() + .get(crate::egress::arrow::metadata::GEOHASH_BITS) + .and_then(|s| s.parse::().ok()); + let check_geohash_width = |bits: u8, max_bits: u8, dtype_name: &str| -> Result { + if bits == 0 || bits > max_bits { + return Err(fmt!( + ArrowIngest, + "geohash precision_bits {} out of range for {} column (must be 1..={})", + bits, + dtype_name, + max_bits + )); + } + Ok(bits) + }; + Ok(match (field.data_type(), md_type, md_ext) { + (DataType::Boolean, _, _) => ColumnKind::Bool, + (DataType::Int8, Some("byte"), _) => ColumnKind::I8, + (DataType::Int8, Some(name), _) if name.starts_with("geohash") => { + let bits = md_geo_bits.ok_or_else(|| { + fmt!( + ArrowIngest, + "column '{}' has column_type='{}' but missing or invalid 'questdb.geohash_bits' metadata (1..=60 expected)", + field.name(), + name + ) + })?; + ColumnKind::Geohash(check_geohash_width(bits, 8, "Int8")?) + } + (DataType::Int8, _, _) if md_geo_bits.is_some() => { + ColumnKind::Geohash(check_geohash_width(md_geo_bits.unwrap(), 8, "Int8")?) + } + (DataType::Int8, _, _) => ColumnKind::I8, + (DataType::Int16, _, _) if md_geo_bits.is_some() => { + ColumnKind::Geohash(check_geohash_width(md_geo_bits.unwrap(), 16, "Int16")?) + } + (DataType::Int16, _, _) => ColumnKind::I16, + (DataType::Int32, _, _) if md_geo_bits.is_some() => { + ColumnKind::Geohash(check_geohash_width(md_geo_bits.unwrap(), 32, "Int32")?) + } + (DataType::Int32, _, _) => ColumnKind::I32, + (DataType::Int64, _, _) if md_geo_bits.is_some() => { + ColumnKind::Geohash(check_geohash_width(md_geo_bits.unwrap(), 60, "Int64")?) + } + (DataType::Int64, _, _) => ColumnKind::I64, + (DataType::Float16, _, _) => ColumnKind::F16ToF32, + (DataType::Float32, _, _) => ColumnKind::F32, + (DataType::Float64, _, _) => ColumnKind::F64, + (DataType::UInt8, _, _) if md_geo_bits.is_some() => { + return Err(geohash_on_unsigned_error(field, "UInt8")); + } + (DataType::UInt8, _, _) => ColumnKind::U8WidenToI32, + (DataType::UInt16, _, _) if md_geo_bits.is_some() => { + return Err(geohash_on_unsigned_error(field, "UInt16")); + } + (DataType::UInt16, Some("char"), _) => ColumnKind::Char, + (DataType::UInt16, _, _) => ColumnKind::U16WidenToI32, + (DataType::UInt32, _, _) if md_geo_bits.is_some() => { + return Err(geohash_on_unsigned_error(field, "UInt32")); + } + (DataType::UInt32, Some("ipv4"), _) => ColumnKind::Ipv4, + (DataType::UInt32, _, _) => ColumnKind::U32WidenToI64, + (DataType::UInt64, _, _) if md_geo_bits.is_some() => { + return Err(geohash_on_unsigned_error(field, "UInt64")); + } + (DataType::UInt64, _, _) => ColumnKind::U64WidenToI64Checked, + (DataType::Timestamp(TimeUnit::Second, _), _, _) => ColumnKind::TimestampSecondToMicros, + (DataType::Timestamp(TimeUnit::Microsecond, _), _, _) => ColumnKind::TimestampMicros, + (DataType::Timestamp(TimeUnit::Nanosecond, _), _, _) => ColumnKind::TimestampNanos, + (DataType::Timestamp(TimeUnit::Millisecond, _), _, _) => ColumnKind::Date, + (DataType::Date32, _, _) => ColumnKind::Date32Days, + (DataType::Date64, _, _) => ColumnKind::Date64Ms, + (DataType::Time32(unit), _, _) => ColumnKind::TimeAsLong(*unit), + (DataType::Time64(unit), _, _) => ColumnKind::TimeAsLong(*unit), + (DataType::Duration(unit), _, _) => ColumnKind::DurationAsLong(*unit), + (DataType::Utf8, _, _) => ColumnKind::Utf8, + (DataType::LargeUtf8, _, _) => ColumnKind::LargeUtf8, + (DataType::Utf8View, _, _) => ColumnKind::Utf8View, + (DataType::Binary, _, _) => ColumnKind::Binary, + (DataType::LargeBinary, _, _) => ColumnKind::LargeBinary, + (DataType::BinaryView, _, _) => ColumnKind::BinaryView, + (DataType::FixedSizeBinary(16), Some("uuid"), _) => ColumnKind::Uuid, + (DataType::FixedSizeBinary(16), _, Some("arrow.uuid")) => ColumnKind::Uuid, + (DataType::FixedSizeBinary(16), _, _) => { + return Err(Error::new( + ErrorCode::ArrowUnsupportedColumnKind, + format!( + "FixedSizeBinary(16) column '{}' lacks UUID metadata; LONG128 ingress is not yet wired", + field.name() + ), + )); + } + (DataType::FixedSizeBinary(32), _, _) => ColumnKind::Long256, + (DataType::Dictionary(key, value), _, _) + if dict_key_for(key).is_some() && dict_value_for(value).is_some() => + { + let k = dict_key_for(key).unwrap(); + let v = dict_value_for(value).unwrap(); + ColumnKind::SymbolDict { key: k, value: v } + } + (DataType::Decimal32(_, _), _, _) => ColumnKind::Decimal32WidenToDecimal64, + (DataType::Decimal64(_, _), _, _) => ColumnKind::Decimal64, + (DataType::Decimal128(_, _), _, _) => ColumnKind::Decimal128, + (DataType::Decimal256(_, _), _, _) => ColumnKind::Decimal256, + (DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _), _, _) => { + let (leaf, ndim) = walk_list_leaf(field.data_type()); + match leaf { + DataType::Float64 => ColumnKind::ArrayDouble(ndim), + other => { + return Err(Error::new( + ErrorCode::ArrowUnsupportedColumnKind, + format!( + "Arrow nested-list column '{}' leaf {:?} is not supported; QuestDB ARRAY ingress requires Float64 leaf", + field.name(), + other + ), + )); + } + } + } + (other, _, _) => { + return Err(Error::new( + ErrorCode::ArrowUnsupportedColumnKind, + format!( + "Arrow type {:?} on column '{}' is not supported by Buffer::append_arrow", + other, + field.name() + ), + )); + } + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + + use arrow_array::builder::{ + BinaryBuilder, Decimal64Builder, Decimal128Builder, FixedSizeBinaryBuilder, Float64Builder, + Int8Builder, Int16Builder, Int32Builder, Int64Builder, ListBuilder, StringBuilder, + StringDictionaryBuilder, TimestampMicrosecondBuilder, TimestampMillisecondBuilder, + TimestampNanosecondBuilder, UInt16Builder, UInt32Builder, + }; + use arrow_array::types::UInt32Type; + use arrow_array::{ArrayRef, RecordBatch}; + use arrow_schema::{DataType, Field, IntervalUnit, Schema as ArrowSchema, TimeUnit}; + + use crate::ingress::{Buffer, TableName}; + + fn arrow_schema_with(field: Field) -> Arc { + Arc::new(ArrowSchema::new(vec![field])) + } + + fn fresh_buffer() -> Buffer { + Buffer::qwp_ws_with_max_name_len(127) + } + + fn table(name: &str) -> TableName<'_> { + TableName::new(name).unwrap() + } + + #[test] + fn int_family_appends_through_widening_dispatch() { + let i8a = Int8Builder::new(); + let i16a = Int16Builder::new(); + let i32a = Int32Builder::new(); + let i64a = Int64Builder::new(); + let u16a = UInt16Builder::new(); + let u32a = UInt32Builder::new(); + let mut all_builders = (i8a, i16a, i32a, i64a, u16a, u32a); + all_builders.0.append_value(1); + all_builders.0.append_value(-1); + all_builders.1.append_value(2); + all_builders.1.append_value(-2); + all_builders.2.append_value(3); + all_builders.2.append_value(-3); + all_builders.3.append_value(4); + all_builders.3.append_value(-4); + all_builders.4.append_value(0x41); + all_builders.4.append_value(0x42); + all_builders.5.append_value(0x0100_007F); + all_builders.5.append_value(0x0101_A8C0); + let cols: Vec = vec![ + Arc::new(all_builders.0.finish()), + Arc::new(all_builders.1.finish()), + Arc::new(all_builders.2.finish()), + Arc::new(all_builders.3.finish()), + Arc::new(all_builders.4.finish()), + Arc::new(all_builders.5.finish()), + ]; + let fields = vec![ + Field::new("byte", DataType::Int8, true), + Field::new("short", DataType::Int16, true), + Field::new("int", DataType::Int32, true), + Field::new("long", DataType::Int64, true), + Field::new("char_u16", DataType::UInt16, true).with_metadata( + [( + crate::egress::arrow::metadata::COLUMN_TYPE.into(), + "char".into(), + )] + .into_iter() + .collect(), + ), + Field::new("ipv4", DataType::UInt32, true).with_metadata( + [( + crate::egress::arrow::metadata::COLUMN_TYPE.into(), + "ipv4".into(), + )] + .into_iter() + .collect(), + ), + ]; + let schema = Arc::new(ArrowSchema::new(fields)); + let rb = RecordBatch::try_new(schema, cols).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn float_double_columns_append() { + let mut f64b = Float64Builder::new(); + f64b.append_value(1.5); + f64b.append_value(-2.5); + let schema = arrow_schema_with(Field::new("d", DataType::Float64, true)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(f64b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn timestamp_columns_route_to_correct_setter() { + let mut us = TimestampMicrosecondBuilder::new(); + us.append_value(1_700_000_000_000_000); + let mut ns = TimestampNanosecondBuilder::new(); + ns.append_value(1_700_000_000_000_000_000); + let mut ms = TimestampMillisecondBuilder::new(); + ms.append_value(1_700_000_000_000); + let cols: Vec = vec![ + Arc::new(us.finish()), + Arc::new(ns.finish()), + Arc::new(ms.finish()), + ]; + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new( + "ts_us", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + ), + Field::new( + "ts_ns", + DataType::Timestamp(TimeUnit::Nanosecond, None), + true, + ), + Field::new( + "ts_ms", + DataType::Timestamp(TimeUnit::Millisecond, None), + true, + ), + ])); + let rb = RecordBatch::try_new(schema, cols).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 1); + } + + #[test] + fn utf8_and_binary_append() { + let mut s = StringBuilder::new(); + s.append_value("hello"); + s.append_value(""); + s.append_value("yo"); + let mut bin = BinaryBuilder::new(); + bin.append_value([1u8, 2, 3]); + bin.append_value([]); + bin.append_value([0xFFu8]); + let cols: Vec = vec![Arc::new(s.finish()), Arc::new(bin.finish())]; + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("name", DataType::Utf8, true), + Field::new("blob", DataType::Binary, true), + ])); + let rb = RecordBatch::try_new(schema, cols).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn uuid_with_arrow_uuid_extension_routes_to_column_uuid() { + let mut b = FixedSizeBinaryBuilder::new(16); + let bytes = [ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, + 0x0F, 0x10, + ]; + b.append_value(bytes).unwrap(); + let field = Field::new("id", DataType::FixedSizeBinary(16), true).with_metadata( + [( + crate::egress::arrow::metadata::ARROW_EXTENSION_NAME.into(), + "arrow.uuid".into(), + )] + .into_iter() + .collect(), + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 1); + } + + #[test] + fn uuid_without_metadata_rejected() { + let mut b = FixedSizeBinaryBuilder::new(16); + b.append_value([0u8; 16]).unwrap(); + let schema = arrow_schema_with(Field::new("id", DataType::FixedSizeBinary(16), true)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!( + err.code(), + crate::error::ErrorCode::ArrowUnsupportedColumnKind + ); + } + + #[test] + fn long256_routes_to_column_long256() { + let mut b = FixedSizeBinaryBuilder::new(32); + b.append_value([0u8; 32]).unwrap(); + let schema = arrow_schema_with(Field::new("l", DataType::FixedSizeBinary(32), true)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 1); + } + + #[test] + fn symbol_dictionary_routes_to_symbol_setter() { + let mut b = StringDictionaryBuilder::::new(); + b.append("AAPL").unwrap(); + b.append("MSFT").unwrap(); + b.append("AAPL").unwrap(); + let arr = b.finish(); + let field = Field::new( + "sym", + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), + true, + ) + .with_metadata( + [(crate::egress::arrow::metadata::SYMBOL.into(), "true".into())] + .into_iter() + .collect(), + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(arr) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn dictionary_without_metadata_routes_to_symbol() { + let mut b = StringDictionaryBuilder::::new(); + b.append("x").unwrap(); + b.append("y").unwrap(); + let arr = b.finish(); + let field = Field::new( + "v", + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), + true, + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(arr) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn geohash_routes_via_metadata() { + let mut b = Int32Builder::new(); + b.append_value(0x0001_FFFF); + let field = Field::new("g", DataType::Int32, true).with_metadata( + [( + crate::egress::arrow::metadata::GEOHASH_BITS.into(), + "20".into(), + )] + .into_iter() + .collect(), + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 1); + } + + #[test] + fn decimal64_appends_via_be_mantissa() { + let mut b = Decimal64Builder::new(); + b.append_value(12345); + let arr = b.finish().with_precision_and_scale(18, 2).unwrap(); + let schema = arrow_schema_with(Field::new("d", DataType::Decimal64(18, 2), true)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(arr) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 1); + } + + #[test] + fn decimal128_appends_via_be_mantissa() { + let mut b = Decimal128Builder::new(); + b.append_value(67890_i128); + let arr = b.finish().with_precision_and_scale(38, 3).unwrap(); + let schema = arrow_schema_with(Field::new("d", DataType::Decimal128(38, 3), true)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(arr) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 1); + } + + #[test] + fn designated_timestamp_column_picks_per_row_value() { + let mut ts = TimestampMicrosecondBuilder::new(); + ts.append_value(1_700_000_000_000_000); + ts.append_value(1_700_000_000_000_001); + let ts_arr = ts.finish().with_timezone("UTC"); + let mut v = Int64Builder::new(); + v.append_value(10); + v.append_value(20); + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new( + "ts", + DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())), + false, + ), + Field::new("v", DataType::Int64, false), + ])); + let rb = RecordBatch::try_new( + schema, + vec![ + Arc::new(ts_arr) as ArrayRef, + Arc::new(v.finish()) as ArrayRef, + ], + ) + .unwrap(); + let mut buf = fresh_buffer(); + let ts_col = ColumnName::new("ts").unwrap(); + buf.append_arrow_at_column(table("t"), &rb, ts_col).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn ts_column_not_found_returns_arrow_ingest_error() { + let mut v = Int64Builder::new(); + v.append_value(10); + let schema = arrow_schema_with(Field::new("v", DataType::Int64, false)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(v.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + let missing = ColumnName::new("missing_ts").unwrap(); + let err = buf + .append_arrow_at_column(table("t"), &rb, missing) + .unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + } + + #[test] + fn ts_column_wrong_dtype_returns_arrow_ingest_error() { + let mut v = Int64Builder::new(); + v.append_value(10); + let schema = arrow_schema_with(Field::new("v", DataType::Int64, false)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(v.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + let v_col = ColumnName::new("v").unwrap(); + let err = buf + .append_arrow_at_column(table("t"), &rb, v_col) + .unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + } + + #[test] + fn nested_double_list_routes_to_column_arr() { + let mut single = ListBuilder::new(Float64Builder::new()); + single.values().append_value(1.0); + single.values().append_value(2.0); + single.values().append_value(3.0); + single.append(true); + let arr = single.finish(); + let field = Field::new( + "a", + DataType::List(Arc::new(Field::new("item", DataType::Float64, true))), + true, + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(arr) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 1); + } + + #[test] + fn nested_int_list_rejected_as_unsupported() { + let mut single = ListBuilder::new(Int64Builder::new()); + single.values().append_value(1); + single.append(true); + let arr = single.finish(); + let field = Field::new( + "a", + DataType::List(Arc::new(Field::new("item", DataType::Int64, true))), + true, + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(arr) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!( + err.code(), + crate::error::ErrorCode::ArrowUnsupportedColumnKind + ); + } + + #[test] + fn empty_batch_is_noop() { + let mut v = Int64Builder::new(); + let schema = arrow_schema_with(Field::new("v", DataType::Int64, false)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(v.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 0); + } + + #[test] + fn ilp_buffer_rejects_append_arrow() { + let mut v = Int64Builder::new(); + v.append_value(1); + let schema = arrow_schema_with(Field::new("v", DataType::Int64, false)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(v.finish()) as ArrayRef]).unwrap(); + let mut buf = Buffer::new(crate::ingress::ProtocolVersion::V2); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::InvalidApiCall); + } + + #[test] + fn i32_arrow_uses_min_sentinel_for_null_rows() { + let mut b = Int32Builder::new(); + b.append_value(7); + b.append_null(); + b.append_value(-3); + let schema = arrow_schema_with(Field::new("n", DataType::Int32, true)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn f64_arrow_uses_nan_sentinel_for_null_rows() { + let mut b = Float64Builder::new(); + b.append_value(1.0); + b.append_null(); + b.append_value(2.0); + let schema = arrow_schema_with(Field::new("f", DataType::Float64, true)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn timestamp_arrow_encodes_nulls_via_bitmap() { + let mut b = TimestampMicrosecondBuilder::new(); + b.append_value(1_700_000_000_000_000); + b.append_null(); + b.append_value(1_700_000_000_000_100); + let field = Field::new("ts", DataType::Timestamp(TimeUnit::Microsecond, None), true); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn varchar_arrow_encodes_null_rows() { + let mut b = StringBuilder::new(); + b.append_value("hello"); + b.append_null(); + b.append_value("world"); + let schema = arrow_schema_with(Field::new("v", DataType::Utf8, true)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn symbol_arrow_builds_dict_and_dedups_keys() { + let mut b = StringDictionaryBuilder::::new(); + b.append_value("us-east"); + b.append_value("us-west"); + b.append_value("us-east"); + b.append_null(); + b.append_value("us-west"); + let arr = b.finish(); + let field = Field::new( + "region", + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), + true, + ) + .with_metadata( + [(crate::egress::arrow::metadata::SYMBOL.into(), "true".into())] + .into_iter() + .collect(), + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(arr) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 5); + } + + #[test] + fn decimal128_arrow_propagates_scale() { + let mut b = Decimal128Builder::new().with_data_type(DataType::Decimal128(10, 2)); + b.append_value(12345); + b.append_null(); + b.append_value(-67890); + let schema = arrow_schema_with(Field::new("amt", DataType::Decimal128(10, 2), true)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn geohash_arrow_encodes_null_rows_via_bitmap() { + let mut b = Int32Builder::new(); + b.append_value(0x1234_5678); + b.append_null(); + b.append_value(0x0DEA_DBEE); + let field = Field::new("g", DataType::Int32, true).with_metadata( + [( + crate::egress::arrow::metadata::GEOHASH_BITS.into(), + "32".into(), + )] + .into_iter() + .collect(), + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn array_double_2d_arrow_encodes_per_row_blobs() { + let mut outer = ListBuilder::new(ListBuilder::new(Float64Builder::new())); + { + let mid = outer.values(); + let leaf = mid.values(); + leaf.append_value(1.0); + leaf.append_value(2.0); + mid.append(true); + let leaf = mid.values(); + leaf.append_value(3.0); + leaf.append_value(4.0); + mid.append(true); + } + outer.append(true); + { + let mid = outer.values(); + let leaf = mid.values(); + leaf.append_value(5.0); + mid.append(true); + } + outer.append(true); + let arr = outer.finish(); + let inner_field = Arc::new(Field::new( + "item", + DataType::List(Arc::new(Field::new("item", DataType::Float64, true))), + true, + )); + let field = Field::new("a", DataType::List(inner_field), true); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(arr) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn multi_batch_append_accumulates_rows() { + let schema = arrow_schema_with(Field::new("v", DataType::Int64, false)); + let mut buf = fresh_buffer(); + for value in [10i64, 20, 30] { + let mut b = Int64Builder::new(); + b.append_value(value); + let rb = RecordBatch::try_new(schema.clone(), vec![Arc::new(b.finish()) as ArrayRef]) + .unwrap(); + buf.append_arrow(table("t"), &rb).unwrap(); + } + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn mixed_row_by_row_after_arrow_errors() { + let mut b = Int64Builder::new(); + b.append_value(1); + let schema = arrow_schema_with(Field::new("v", DataType::Int64, false)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + let err = buf + .table(table("t")) + .and_then(|b| b.column_i64("v", 99)) + .err(); + assert!(err.is_some()); + } + + #[test] + fn designated_ts_with_null_rejects() { + let mut v = Int64Builder::new(); + v.append_value(1); + v.append_value(2); + let mut ts = TimestampMicrosecondBuilder::new(); + ts.append_value(1_000); + ts.append_null(); + let cols: Vec = vec![Arc::new(v.finish()), Arc::new(ts.finish())]; + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("v", DataType::Int64, true), + Field::new("ts", DataType::Timestamp(TimeUnit::Microsecond, None), true), + ])); + let rb = RecordBatch::try_new(schema, cols).unwrap(); + let mut buf = fresh_buffer(); + let ts_name = ColumnName::new("ts").unwrap(); + let err = buf + .append_arrow_at_column(table("t"), &rb, ts_name) + .unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + } + + #[test] + fn uint8_widens_to_int_appends() { + use arrow_array::builder::UInt8Builder; + let mut u = UInt8Builder::new(); + u.append_value(0); + u.append_value(0xFF); + u.append_null(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("v", DataType::UInt8, true)), + vec![Arc::new(u.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn uint64_within_i64_range_appends() { + use arrow_array::builder::UInt64Builder; + let mut u = UInt64Builder::new(); + u.append_value(0); + u.append_value(i64::MAX as u64); + u.append_value(42); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("v", DataType::UInt64, true)), + vec![Arc::new(u.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn uint64_above_i64_max_is_rejected() { + use arrow_array::builder::UInt64Builder; + let mut u = UInt64Builder::new(); + u.append_value(0); + u.append_value(1u64 << 63); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("v", DataType::UInt64, true)), + vec![Arc::new(u.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + assert!(err.msg().contains("UInt64 value")); + } + + #[test] + fn uint64_max_value_is_rejected() { + use arrow_array::builder::UInt64Builder; + let mut u = UInt64Builder::new(); + u.append_value(u64::MAX); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("v", DataType::UInt64, true)), + vec![Arc::new(u.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + } + + #[test] + fn date32_days_appends_as_date_ms() { + use arrow_array::builder::Date32Builder; + let mut d = Date32Builder::new(); + d.append_value(0); + d.append_value(19_675); + d.append_null(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("d", DataType::Date32, true)), + vec![Arc::new(d.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn time32_seconds_appends() { + use arrow_array::builder::Time32SecondBuilder; + let mut t = Time32SecondBuilder::new(); + t.append_value(0); + t.append_value(86_399); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("t", DataType::Time32(TimeUnit::Second), true)), + vec![Arc::new(t.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn time64_nanoseconds_appends() { + use arrow_array::builder::Time64NanosecondBuilder; + let mut t = Time64NanosecondBuilder::new(); + t.append_value(0); + t.append_value(86_399 * 1_000_000_000); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new( + "t", + DataType::Time64(TimeUnit::Nanosecond), + true, + )), + vec![Arc::new(t.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn duration_microseconds_appends() { + use arrow_array::builder::DurationMicrosecondBuilder; + let mut d = DurationMicrosecondBuilder::new(); + d.append_value(1_000_000); + d.append_value(-1); + d.append_null(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new( + "d", + DataType::Duration(TimeUnit::Microsecond), + true, + )), + vec![Arc::new(d.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn dict_u32_large_utf8_routes_to_symbol() { + use arrow_array::DictionaryArray; + use arrow_array::types::UInt32Type; + let dict = DictionaryArray::::from_iter( + ["AAPL", "MSFT", "AAPL"].into_iter().map(Some), + ); + let large_values = LargeStringArray::from(vec!["AAPL", "MSFT"]); + let dict = + DictionaryArray::::try_new(dict.keys().clone(), Arc::new(large_values)) + .unwrap(); + let field = Field::new( + "s", + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::LargeUtf8)), + true, + ); + let rb = RecordBatch::try_new(arrow_schema_with(field), vec![Arc::new(dict) as ArrayRef]) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn dict_u8_utf8_routes_to_symbol() { + use arrow_array::DictionaryArray; + use arrow_array::types::UInt8Type; + let dict = DictionaryArray::::from_iter( + ["red", "green", "blue", "red"].into_iter().map(Some), + ); + let field = Field::new( + "s", + DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)), + true, + ); + let rb = RecordBatch::try_new(arrow_schema_with(field), vec![Arc::new(dict) as ArrayRef]) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 4); + } + + #[test] + fn dict_u32_utf8_view_routes_to_symbol() { + // polars 0.53 emits Categorical as Dictionary(UInt32, Utf8View). + use arrow_array::DictionaryArray; + use arrow_array::types::UInt32Type; + let dict = DictionaryArray::::from_iter( + ["AAPL", "MSFT", "AAPL"].into_iter().map(Some), + ); + let view_values = StringViewArray::from(vec!["AAPL", "MSFT"]); + let dict = + DictionaryArray::::try_new(dict.keys().clone(), Arc::new(view_values)) + .unwrap(); + let field = Field::new( + "s", + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8View)), + true, + ); + let rb = RecordBatch::try_new(arrow_schema_with(field), vec![Arc::new(dict) as ArrayRef]) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn large_utf8_no_null_takes_bulk_memcpy_path() { + let a = LargeStringArray::from(vec!["AAPL", "MSFT", "GOOG"]); + let b = LargeStringArray::from(vec!["alpha", "beta", "gamma"]); + let rb = RecordBatch::try_new( + Arc::new(ArrowSchema::new(vec![ + Field::new("a", DataType::LargeUtf8, true), + Field::new("b", DataType::LargeUtf8, true), + ])), + vec![Arc::new(a) as ArrayRef, Arc::new(b) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn large_binary_no_null_takes_bulk_memcpy_path() { + let rows: Vec<&[u8]> = vec![b"\x00\x01", b"\xff", b"\x02\x03\x04"]; + let a = LargeBinaryArray::from_iter_values(rows); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("a", DataType::LargeBinary, true)), + vec![Arc::new(a) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn large_utf8_with_nulls_still_works_via_slow_path() { + let a = LargeStringArray::from(vec![Some("x"), None, Some("yz")]); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("a", DataType::LargeUtf8, true)), + vec![Arc::new(a) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn fixed_size_list_float64_appends_as_array_1d() { + use arrow_array::builder::FixedSizeListBuilder; + let mut b = FixedSizeListBuilder::new(Float64Builder::new(), 3); + b.values().append_value(1.0); + b.values().append_value(2.0); + b.values().append_value(3.0); + b.append(true); + b.values().append_value(4.0); + b.values().append_value(5.0); + b.values().append_value(6.0); + b.append(true); + let arr = b.finish(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("a", arr.data_type().clone(), true)), + vec![Arc::new(arr) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn time32_milliseconds_appends() { + use arrow_array::builder::Time32MillisecondBuilder; + let mut t = Time32MillisecondBuilder::new(); + t.append_value(0); + t.append_value(86_399_999); + t.append_null(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new( + "t", + DataType::Time32(TimeUnit::Millisecond), + true, + )), + vec![Arc::new(t.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn time64_microseconds_appends() { + use arrow_array::builder::Time64MicrosecondBuilder; + let mut t = Time64MicrosecondBuilder::new(); + t.append_value(0); + t.append_value(86_399_999_999); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new( + "t", + DataType::Time64(TimeUnit::Microsecond), + true, + )), + vec![Arc::new(t.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn duration_seconds_appends() { + use arrow_array::builder::DurationSecondBuilder; + let mut d = DurationSecondBuilder::new(); + d.append_value(0); + d.append_value(-3600); + d.append_value(86_400); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("d", DataType::Duration(TimeUnit::Second), true)), + vec![Arc::new(d.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn duration_milliseconds_appends() { + use arrow_array::builder::DurationMillisecondBuilder; + let mut d = DurationMillisecondBuilder::new(); + d.append_value(1_500); + d.append_value(0); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new( + "d", + DataType::Duration(TimeUnit::Millisecond), + true, + )), + vec![Arc::new(d.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn duration_nanoseconds_appends() { + use arrow_array::builder::DurationNanosecondBuilder; + let mut d = DurationNanosecondBuilder::new(); + d.append_value(0); + d.append_value(1_500_000_000); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new( + "d", + DataType::Duration(TimeUnit::Nanosecond), + true, + )), + vec![Arc::new(d.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn dict_u16_utf8_routes_to_symbol() { + use arrow_array::DictionaryArray; + use arrow_array::types::UInt16Type; + let dict = + DictionaryArray::::from_iter(["x", "y", "x", "z"].into_iter().map(Some)); + let field = Field::new( + "s", + DataType::Dictionary(Box::new(DataType::UInt16), Box::new(DataType::Utf8)), + true, + ); + let rb = RecordBatch::try_new(arrow_schema_with(field), vec![Arc::new(dict) as ArrayRef]) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 4); + } + + #[test] + fn dict_u8_large_utf8_routes_to_symbol() { + use arrow_array::DictionaryArray; + use arrow_array::types::UInt8Type; + let keys = arrow_array::UInt8Array::from(vec![0u8, 1, 0, 1]); + let values = LargeStringArray::from(vec!["alpha", "beta"]); + let dict = DictionaryArray::::try_new(keys, Arc::new(values)).unwrap(); + let field = Field::new( + "s", + DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::LargeUtf8)), + true, + ); + let rb = RecordBatch::try_new(arrow_schema_with(field), vec![Arc::new(dict) as ArrayRef]) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 4); + } + + #[test] + fn symbol_dict_with_metadata_still_routes_to_symbol() { + use arrow_array::DictionaryArray; + use arrow_array::types::UInt32Type; + let dict = DictionaryArray::::from_iter(["A", "B", "A"].into_iter().map(Some)); + let field = Field::new( + "s", + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), + true, + ) + .with_metadata( + [( + crate::egress::arrow::metadata::SYMBOL.to_string(), + "true".to_string(), + )] + .into_iter() + .collect(), + ); + let rb = RecordBatch::try_new(arrow_schema_with(field), vec![Arc::new(dict) as ArrayRef]) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn date32_all_null_appends() { + use arrow_array::builder::Date32Builder; + let mut d = Date32Builder::new(); + d.append_null(); + d.append_null(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("d", DataType::Date32, true)), + vec![Arc::new(d.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn time64_ns_all_null_appends() { + use arrow_array::builder::Time64NanosecondBuilder; + let mut t = Time64NanosecondBuilder::new(); + t.append_null(); + t.append_null(); + t.append_null(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new( + "t", + DataType::Time64(TimeUnit::Nanosecond), + true, + )), + vec![Arc::new(t.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn nested_list_ragged_inner_within_row_errors() { + use arrow_array::builder::ListBuilder; + let mut outer = ListBuilder::new(ListBuilder::new(Float64Builder::new())); + outer.values().values().append_value(1.0); + outer.values().values().append_value(2.0); + outer.values().append(true); + outer.values().values().append_value(3.0); + outer.values().append(true); + outer.append(true); + let arr = outer.finish(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("a", arr.data_type().clone(), true)), + vec![Arc::new(arr) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + assert!( + format!("{err}").contains("ragged inner-list sizes"), + "unexpected error: {err}" + ); + } + + #[test] + fn large_list_nested_float64_appends_as_array_2d() { + use arrow_array::builder::LargeListBuilder; + let mut outer = LargeListBuilder::new(LargeListBuilder::new(Float64Builder::new())); + for v in [1.0, 2.0] { + outer.values().values().append_value(v); + } + outer.values().append(true); + for v in [3.0, 4.0] { + outer.values().values().append_value(v); + } + outer.values().append(true); + outer.append(true); + for v in [5.0, 6.0, 7.0] { + outer.values().values().append_value(v); + } + outer.values().append(true); + for v in [8.0, 9.0, 10.0] { + outer.values().values().append_value(v); + } + outer.values().append(true); + outer.append(true); + let arr = outer.finish(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("a", arr.data_type().clone(), true)), + vec![Arc::new(arr) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn float16_appends_as_double() { + use arrow_array::builder::Float16Builder; + use half::f16; + let mut b = Float16Builder::new(); + b.append_value(f16::from_f32(1.5)); + b.append_value(f16::from_f32(-2.5)); + b.append_null(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("h", DataType::Float16, true)), + vec![Arc::new(b.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn date64_ms_appends_as_date() { + use arrow_array::builder::Date64Builder; + let mut d = Date64Builder::new(); + d.append_value(0); + d.append_value(1_700_000_000_000); + d.append_null(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("d", DataType::Date64, true)), + vec![Arc::new(d.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn timestamp_second_widens_to_micros() { + use arrow_array::builder::TimestampSecondBuilder; + let mut ts = TimestampSecondBuilder::new(); + ts.append_value(1_700_000_000); + ts.append_value(0); + ts.append_null(); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new( + "ts", + DataType::Timestamp(TimeUnit::Second, None), + true, + )), + vec![Arc::new(ts.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn decimal32_widens_to_decimal64() { + use arrow_array::builder::Decimal32Builder; + let mut b = Decimal32Builder::new(); + b.append_value(12345); + b.append_value(-678); + b.append_null(); + let arr = b.finish().with_precision_and_scale(9, 2).unwrap(); + let schema = arrow_schema_with(Field::new("d", DataType::Decimal32(9, 2), true)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(arr) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn decimal32_negative_scale_errors() { + use arrow_array::builder::Decimal32Builder; + let mut b = Decimal32Builder::new(); + b.append_value(1); + let arr = b.finish().with_precision_and_scale(9, -2).unwrap(); + let schema = arrow_schema_with(Field::new("d", DataType::Decimal32(9, -2), true)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(arr) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + } + + #[test] + fn decimal_scale_u8_enforces_per_width_caps() { + assert!(decimal_scale_u8(9, "Decimal32", 9).is_ok()); + let err = decimal_scale_u8(10, "Decimal32", 9).unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + assert!(err.msg().contains("Decimal32")); + assert!(err.msg().contains("scale 10")); + + assert!(decimal_scale_u8(18, "Decimal64", 18).is_ok()); + assert!(decimal_scale_u8(19, "Decimal64", 18).is_err()); + + assert!(decimal_scale_u8(38, "Decimal128", 38).is_ok()); + assert!(decimal_scale_u8(39, "Decimal128", 38).is_err()); + + assert!( + decimal_scale_u8( + QWP_DECIMAL_MAX_SCALE as i8, + "Decimal256", + QWP_DECIMAL_MAX_SCALE + ) + .is_ok() + ); + assert!( + decimal_scale_u8( + (QWP_DECIMAL_MAX_SCALE as i8).saturating_add(1), + "Decimal256", + QWP_DECIMAL_MAX_SCALE, + ) + .is_err() + ); + + let err = decimal_scale_u8(-1, "Decimal64", 18).unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + assert!(err.msg().contains("negative")); + } + + fn assert_unsupported_column(field: Field, arr: ArrayRef) { + let rb = RecordBatch::try_new(arrow_schema_with(field), vec![arr]).unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!( + err.code(), + crate::error::ErrorCode::ArrowUnsupportedColumnKind, + "expected ArrowUnsupportedColumnKind, got: {err}" + ); + } + + #[test] + fn interval_year_month_rejected_as_unsupported() { + use arrow_array::builder::IntervalYearMonthBuilder; + let mut b = IntervalYearMonthBuilder::new(); + b.append_value(12); + assert_unsupported_column( + Field::new("c", DataType::Interval(IntervalUnit::YearMonth), true), + Arc::new(b.finish()) as ArrayRef, + ); + } + + #[test] + fn interval_day_time_rejected_as_unsupported() { + use arrow_array::builder::IntervalDayTimeBuilder; + use arrow_array::types::IntervalDayTime; + let mut b = IntervalDayTimeBuilder::new(); + b.append_value(IntervalDayTime::new(1, 0)); + assert_unsupported_column( + Field::new("c", DataType::Interval(IntervalUnit::DayTime), true), + Arc::new(b.finish()) as ArrayRef, + ); + } + + #[test] + fn interval_month_day_nano_rejected_as_unsupported() { + use arrow_array::builder::IntervalMonthDayNanoBuilder; + use arrow_array::types::IntervalMonthDayNano; + let mut b = IntervalMonthDayNanoBuilder::new(); + b.append_value(IntervalMonthDayNano::new(1, 1, 1)); + assert_unsupported_column( + Field::new("c", DataType::Interval(IntervalUnit::MonthDayNano), true), + Arc::new(b.finish()) as ArrayRef, + ); + } + + #[test] + fn fixed_size_binary_non_uuid_rejected_as_unsupported() { + let mut b = FixedSizeBinaryBuilder::new(16); + b.append_value([0u8; 16]).unwrap(); + let arr = b.finish(); + assert_unsupported_column( + Field::new("c", DataType::FixedSizeBinary(16), true), + Arc::new(arr) as ArrayRef, + ); + } + + #[test] + fn fixed_size_binary_arbitrary_width_rejected_as_unsupported() { + let mut b = FixedSizeBinaryBuilder::new(8); + b.append_value([0u8; 8]).unwrap(); + assert_unsupported_column( + Field::new("c", DataType::FixedSizeBinary(8), true), + Arc::new(b.finish()) as ArrayRef, + ); + } + + #[test] + fn null_column_rejected_as_unsupported() { + use arrow_array::NullArray; + let arr = NullArray::new(3); + assert_unsupported_column( + Field::new("c", DataType::Null, true), + Arc::new(arr) as ArrayRef, + ); + } + + #[test] + fn struct_column_rejected_as_unsupported() { + use arrow_array::StructArray; + let mut inner = Int32Builder::new(); + inner.append_value(1); + let inner_arr = Arc::new(inner.finish()) as ArrayRef; + let inner_field = Arc::new(Field::new("v", DataType::Int32, true)); + let arr = StructArray::from(vec![(inner_field.clone(), inner_arr)]); + assert_unsupported_column( + Field::new("c", DataType::Struct(vec![inner_field].into()), true), + Arc::new(arr) as ArrayRef, + ); + } + + #[test] + fn map_column_rejected_as_unsupported() { + use arrow_array::builder::{MapBuilder, StringBuilder}; + let mut b = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new()); + b.keys().append_value("k"); + b.values().append_value(1); + b.append(true).unwrap(); + let arr = b.finish(); + let dtype = arr.data_type().clone(); + assert_unsupported_column(Field::new("c", dtype, true), Arc::new(arr) as ArrayRef); + } + + #[test] + fn run_end_encoded_column_rejected_as_unsupported() { + use arrow_array::builder::PrimitiveRunBuilder; + use arrow_array::types::{Int32Type, Int64Type}; + let mut b = PrimitiveRunBuilder::::new(); + b.append_value(42); + b.append_value(42); + b.append_value(7); + let arr = b.finish(); + let dtype = arr.data_type().clone(); + assert_unsupported_column(Field::new("c", dtype, true), Arc::new(arr) as ArrayRef); + } + + #[test] + fn referenced_null_dict_entry_rejected_for_symbol() { + use arrow_array::DictionaryArray; + use arrow_array::types::UInt32Type; + let mut vb = StringBuilder::new(); + vb.append_value("a"); + vb.append_null(); + vb.append_value("c"); + let values = vb.finish(); + let keys = arrow_array::UInt32Array::from(vec![0u32, 1, 2]); + let dict = + DictionaryArray::::try_new(keys, Arc::new(values) as ArrayRef).unwrap(); + let field = Field::new( + "sym", + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), + true, + ) + .with_metadata( + [(crate::egress::arrow::metadata::SYMBOL.into(), "true".into())] + .into_iter() + .collect(), + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(dict) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!(err.code(), ErrorCode::ArrowIngest); + assert!( + err.msg().contains("dictionary values"), + "unexpected error message: {}", + err.msg() + ); + assert_eq!(buf.row_count(), 0, "buffer should roll back to 0 rows"); + } + + #[test] + fn referenced_null_dict_entry_rejected() { + use arrow_array::DictionaryArray; + use arrow_array::types::UInt32Type; + let mut vb = StringBuilder::new(); + vb.append_value("a"); + vb.append_null(); + let values = vb.finish(); + let keys = arrow_array::UInt32Array::from(vec![0u32, 1]); + let dict = + DictionaryArray::::try_new(keys, Arc::new(values) as ArrayRef).unwrap(); + let field = Field::new( + "v", + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), + true, + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(dict) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!(err.code(), ErrorCode::ArrowIngest); + assert!(err.msg().contains("dictionary values")); + } + + #[test] + fn unreferenced_null_dict_entry_accepted_for_symbol() { + use arrow_array::DictionaryArray; + use arrow_array::types::UInt32Type; + let mut vb = StringBuilder::new(); + vb.append_value("a"); + vb.append_null(); + vb.append_value("c"); + let values = vb.finish(); + let keys = arrow_array::UInt32Array::from(vec![0u32, 2, 0]); + let dict = + DictionaryArray::::try_new(keys, Arc::new(values) as ArrayRef).unwrap(); + let field = Field::new( + "sym", + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), + true, + ) + .with_metadata( + [(crate::egress::arrow::metadata::SYMBOL.into(), "true".into())] + .into_iter() + .collect(), + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(dict) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn unreferenced_null_dict_entry_accepted() { + use arrow_array::DictionaryArray; + use arrow_array::types::UInt32Type; + let mut vb = StringBuilder::new(); + vb.append_value("a"); + vb.append_null(); + let values = vb.finish(); + let keys = arrow_array::UInt32Array::from(vec![0u32, 0]); + let dict = + DictionaryArray::::try_new(keys, Arc::new(values) as ArrayRef).unwrap(); + let field = Field::new( + "v", + DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), + true, + ); + let schema = arrow_schema_with(field); + let rb = RecordBatch::try_new(schema, vec![Arc::new(dict) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + } + + #[test] + fn timestamp_ms_designated_overflow_rejected() { + let mut ts = TimestampMillisecondBuilder::new(); + ts.append_value(i64::MAX / 1000 + 1); + ts.append_value(0); + let mut v = Int64Builder::new(); + v.append_value(1); + v.append_value(2); + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new( + "ts", + DataType::Timestamp(TimeUnit::Millisecond, None), + false, + ), + Field::new("v", DataType::Int64, false), + ])); + let rb = RecordBatch::try_new( + schema, + vec![ + Arc::new(ts.finish()) as ArrayRef, + Arc::new(v.finish()) as ArrayRef, + ], + ) + .unwrap(); + let mut buf = fresh_buffer(); + let err = buf + .append_arrow_at_column(table("t"), &rb, ColumnName::new("ts").unwrap()) + .unwrap_err(); + assert_eq!(err.code(), ErrorCode::ArrowIngest); + assert!( + err.msg().contains("ms→µs overflow"), + "expected overflow message, got: {}", + err.msg() + ); + assert_eq!(buf.row_count(), 0); + } + + #[test] + fn timestamp_second_to_micros_overflow_rejected() { + use arrow_array::builder::TimestampSecondBuilder; + let mut b = TimestampSecondBuilder::new(); + b.append_value(i64::MAX / 1_000_000 + 1); + let schema = arrow_schema_with(Field::new( + "t", + DataType::Timestamp(TimeUnit::Second, None), + true, + )); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("u"), &rb).unwrap_err(); + assert_eq!(err.code(), ErrorCode::ArrowIngest); + assert!( + err.msg().contains("s→µs overflow"), + "expected overflow message, got: {}", + err.msg() + ); + } + + #[test] + fn buffer_clear_after_arrow_allows_row_by_row_reuse() { + let mut buf = fresh_buffer(); + let mut b = Int64Builder::new(); + b.append_value(1); + b.append_value(2); + let schema = arrow_schema_with(Field::new("v", DataType::Int64, false)); + let rb = RecordBatch::try_new(schema, vec![Arc::new(b.finish()) as ArrayRef]).unwrap(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 2); + buf.clear(); + assert_eq!(buf.row_count(), 0); + buf.table(table("t")).unwrap(); + buf.column_i64("v", 99).unwrap(); + buf.at_now().unwrap(); + assert_eq!(buf.row_count(), 1); + } + + #[test] + fn append_arrow_error_rolls_back_columns() { + // Two columns: the second one will fail classification (Map), + // so the first column's bytes must not stick. + use arrow_array::builder::{Int64Builder, MapBuilder, StringBuilder}; + let mut col1 = Int64Builder::new(); + col1.append_value(11); + col1.append_value(22); + let mut map = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new()); + map.keys().append_value("k1"); + map.values().append_value(1); + map.append(true).unwrap(); + map.keys().append_value("k2"); + map.values().append_value(2); + map.append(true).unwrap(); + let map_arr = map.finish(); + let map_dtype = map_arr.data_type().clone(); + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("good", DataType::Int64, false), + Field::new("bad", map_dtype, true), + ])); + let rb = RecordBatch::try_new( + schema, + vec![ + Arc::new(col1.finish()) as ArrayRef, + Arc::new(map_arr) as ArrayRef, + ], + ) + .unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!(err.code(), ErrorCode::ArrowUnsupportedColumnKind); + assert_eq!( + buf.row_count(), + 0, + "rollback should leave buffer with 0 rows" + ); + // A retry on a valid batch must succeed cleanly. + let mut c2 = Int64Builder::new(); + c2.append_value(7); + let schema2 = arrow_schema_with(Field::new("good", DataType::Int64, false)); + let rb2 = RecordBatch::try_new(schema2, vec![Arc::new(c2.finish()) as ArrayRef]).unwrap(); + buf.append_arrow(table("t"), &rb2).unwrap(); + assert_eq!(buf.row_count(), 1); + } + + #[test] + fn error_message_carries_column_name() { + let inner_field = Arc::new(Field::new("x", DataType::Int32, true)); + let mut b = Int32Builder::new(); + b.append_value(1); + let inner_arr = b.finish(); + let struct_arr = arrow_array::StructArray::from(vec![( + inner_field.clone(), + Arc::new(inner_arr) as ArrayRef, + )]); + let schema = arrow_schema_with(Field::new( + "my_struct_col", + DataType::Struct(vec![inner_field].into()), + true, + )); + let rb = RecordBatch::try_new(schema, vec![Arc::new(struct_arr) as ArrayRef]).unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert!( + err.msg().contains("my_struct_col"), + "column name missing from error: {}", + err.msg() + ); + } + + #[test] + fn multi_batch_arrow_appends_accumulate_rows() { + let mut buf = fresh_buffer(); + let schema = arrow_schema_with(Field::new("v", DataType::Int64, false)); + + let mut b1 = Int64Builder::new(); + b1.append_value(1); + b1.append_value(2); + let rb1 = + RecordBatch::try_new(schema.clone(), vec![Arc::new(b1.finish()) as ArrayRef]).unwrap(); + buf.append_arrow(table("t"), &rb1).unwrap(); + assert_eq!(buf.row_count(), 2); + + let mut b2 = Int64Builder::new(); + b2.append_value(3); + b2.append_value(4); + b2.append_value(5); + let rb2 = RecordBatch::try_new(schema, vec![Arc::new(b2.finish()) as ArrayRef]).unwrap(); + buf.append_arrow(table("t"), &rb2).unwrap(); + assert_eq!(buf.row_count(), 5); + } + + #[test] + fn sliced_int32_array_emits_sliced_window_only() { + let mut b = Int32Builder::new(); + for v in 0..8 { + b.append_value(v); + } + let full = b.finish(); + let sliced = full.slice(2, 4); + assert_eq!(sliced.len(), 4); + + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("v", DataType::Int32, false)), + vec![Arc::new(sliced) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 4); + } + + #[test] + fn sliced_utf8_array_emits_sliced_window_only() { + let mut b = arrow_array::builder::StringBuilder::new(); + for s in ["a", "bb", "ccc", "dddd", "eeeee"] { + b.append_value(s); + } + let full = b.finish(); + let sliced = full.slice(1, 3); + assert_eq!(sliced.len(), 3); + + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("s", DataType::Utf8, false)), + vec![Arc::new(sliced) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 3); + } + + #[test] + fn sliced_bool_array_with_offset_emits_sliced_window() { + let mut b = arrow_array::builder::BooleanBuilder::new(); + for v in [true, false, true, false, true, false, true, false, true] { + b.append_value(v); + } + let full = b.finish(); + let sliced = full.slice(3, 5); + assert_eq!(sliced.len(), 5); + + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("flag", DataType::Boolean, false)), + vec![Arc::new(sliced) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 5); + } + + #[test] + fn decimal256_negative_scale_rejected() { + use arrow_array::builder::Decimal256Builder; + use arrow_buffer::i256; + let mut b = Decimal256Builder::new() + .with_precision_and_scale(76, -1) + .unwrap(); + b.append_value(i256::ZERO); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("d", DataType::Decimal256(76, -1), false)), + vec![Arc::new(b.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + assert!(err.msg().to_lowercase().contains("negative")); + } + + #[test] + fn geohash_int8_precision_above_8_rejected() { + let mut b = Int8Builder::new(); + b.append_value(0); + let mut md = std::collections::HashMap::new(); + md.insert("questdb.geohash_bits".to_string(), "20".to_string()); + let field = Field::new("g", DataType::Int8, true).with_metadata(md); + let rb = RecordBatch::try_new( + arrow_schema_with(field), + vec![Arc::new(b.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + let err = buf.append_arrow(table("t"), &rb).unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + assert!(err.msg().contains("geohash")); + } + + #[test] + fn varlen_no_user_columns_rejected() { + let mut ts = TimestampMicrosecondBuilder::new(); + ts.append_value(0); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new( + "ts", + DataType::Timestamp(TimeUnit::Microsecond, None), + false, + )), + vec![Arc::new(ts.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + let err = buf + .append_arrow_at_column(table("t"), &rb, ColumnName::new("ts").unwrap()) + .unwrap_err(); + assert_eq!(err.code(), crate::error::ErrorCode::ArrowIngest); + assert!(err.msg().contains("non-timestamp column")); + } + + #[test] + fn single_row_int64_appends_one_row() { + let mut b = Int64Builder::new(); + b.append_value(0); + let rb = RecordBatch::try_new( + arrow_schema_with(Field::new("v", DataType::Int64, false)), + vec![Arc::new(b.finish()) as ArrayRef], + ) + .unwrap(); + let mut buf = fresh_buffer(); + buf.append_arrow(table("t"), &rb).unwrap(); + assert_eq!(buf.row_count(), 1); + } +} diff --git a/questdb-rs/src/ingress/buffer.rs b/questdb-rs/src/ingress/buffer.rs index 16d546c3..6f84facc 100644 --- a/questdb-rs/src/ingress/buffer.rs +++ b/questdb-rs/src/ingress/buffer.rs @@ -43,6 +43,11 @@ pub(crate) use self::qwp::QwpBuffer; pub(crate) use self::qwp::QwpSendScratch; #[cfg(all(test, feature = "_sender-qwp-ws"))] pub(crate) use self::qwp::SchemaRegistry; +#[cfg(all(feature = "_sender-qwp-ws", feature = "arrow"))] +pub(crate) use self::qwp::{ + ArrowBatchInfo, ArrowBulkCtx, ArrowDecimalSpec, ColumnKind as QwpColumnKind, + QWP_DECIMAL_MAX_SCALE, +}; #[cfg(feature = "_sender-qwp-ws")] pub(crate) use self::qwp::{QwpWsColumnarBuffer, QwpWsEncodeScratch, SymbolGlobalDict}; @@ -415,21 +420,33 @@ impl Buffer { } #[cfg(any(feature = "_sender-qwp-udp", feature = "_sender-qwp-ws"))] - /// Creates a new QWP/UDP buffer with default parameters. + /// Creates a new row-major QWP buffer with default parameters. + /// Used by the QWP/UDP transport and any QWP path that does not + /// require columnar layout. For the QWP/WebSocket Arrow ingest + /// path see [`Buffer::new_qwp_ws`]. pub fn new_qwp() -> Self { Self::qwp_with_max_name_len(127) } #[cfg(any(feature = "_sender-qwp-udp", feature = "_sender-qwp-ws"))] - /// Creates a new QWP/UDP buffer with a custom maximum name length. + /// Like [`Buffer::new_qwp`] with an explicit maximum name length. pub fn qwp_with_max_name_len(max_name_len: usize) -> Self { Self { inner: BufferInner::Qwp(Box::new(QwpBuffer::new(max_name_len))), } } + /// Creates a new QWP/WebSocket columnar buffer with a 127-byte name + /// length limit. Required by [`Buffer::append_arrow`]; also accepts + /// the row-by-row `table` / `symbol` / `column_*` / `at` API. #[cfg(feature = "_sender-qwp-ws")] - pub(crate) fn qwp_ws_with_max_name_len(max_name_len: usize) -> Self { + pub fn new_qwp_ws() -> Self { + Self::qwp_ws_with_max_name_len(127) + } + + /// Like [`Buffer::new_qwp_ws`] with an explicit maximum name length. + #[cfg(feature = "_sender-qwp-ws")] + pub fn qwp_ws_with_max_name_len(max_name_len: usize) -> Self { Self { inner: BufferInner::QwpWs(Box::new(QwpWsColumnarBuffer::new(max_name_len))), } @@ -465,6 +482,16 @@ impl Buffer { } } + #[cfg(all(feature = "_sender-qwp-ws", feature = "arrow"))] + pub(crate) fn as_qwp_ws_mut(&mut self) -> Option<&mut QwpWsColumnarBuffer> { + match &mut self.inner { + BufferInner::Ilp(_) => None, + #[cfg(any(feature = "_sender-qwp-udp", feature = "_sender-qwp-ws"))] + BufferInner::Qwp(_) => None, + BufferInner::QwpWs(inner) => Some(inner.as_mut()), + } + } + /// Returns the protocol version associated with this buffer. /// /// For ILP buffers this is the ILP protocol version. For QWP/UDP buffers diff --git a/questdb-rs/src/ingress/buffer/qwp.rs b/questdb-rs/src/ingress/buffer/qwp.rs index 7446fa25..fc913a45 100644 --- a/questdb-rs/src/ingress/buffer/qwp.rs +++ b/questdb-rs/src/ingress/buffer/qwp.rs @@ -44,6 +44,8 @@ use std::hash::{BuildHasher, Hash, Hasher}; use super::op_state::{Op, OpState}; use super::{Bookmark, BufferBookmarkMeta, ColumnName, StoredBookmark, TableName}; +#[cfg(feature = "arrow")] +use arrow_buffer::NullBuffer; /// Wire layout of a QWP datagram header. /// @@ -109,7 +111,7 @@ pub(crate) const QWP_TYPE_IPV4: u8 = 0x18; const QWP_LONG256_BYTES: usize = 32; pub(crate) const QWP_VERSION_1: u8 = 1; const QWP_INLINE_SCHEMA_ID: u64 = 0; -const QWP_DECIMAL_MAX_SCALE: u8 = 76; +pub(crate) const QWP_DECIMAL_MAX_SCALE: u8 = 76; const QWP_DECIMAL_SCALE_UNSET: u8 = u8::MAX; const QWP_DECIMAL_MAG_LIMBS: usize = 4; const QWP_DECIMAL_MAG_BYTES: usize = QWP_DECIMAL_MAG_LIMBS * 8; @@ -565,7 +567,7 @@ impl DecimalValue { // --- Column kind --- #[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum ColumnKind { +pub(crate) enum ColumnKind { Bool, Symbol, I8, @@ -2423,7 +2425,7 @@ struct QwpWsTableBuffer { in_progress_column_count: usize, column_access_cursor: usize, columns: Vec, - column_lookup: std::collections::HashMap, + column_lookup: std::collections::HashMap, usize>, row_mark: Option, } @@ -2431,8 +2433,9 @@ struct QwpWsTableBuffer { #[derive(Clone, Debug)] struct QwpWsColumnBuffer { name: Vec, - lower_ascii_name: Vec, + lower_name: Vec, packed_lower_ascii_name: u64, + name_is_ascii: bool, kind: ColumnKind, last_written_row: Option, non_null_count: u32, @@ -2523,6 +2526,55 @@ enum QwpWsColumnValues { cells: Vec, data: Vec, }, + #[cfg(feature = "arrow")] + ArrowFixed { + bitmap: Option>, + values: Vec, + row_count: u32, + }, + #[cfg(feature = "arrow")] + ArrowVarLen { + bitmap: Option>, + offsets: Vec, + data: Vec, + row_count: u32, + }, + #[cfg(feature = "arrow")] + ArrowBool { + bitmap: Option>, + packed_bits: Vec, + row_count: u32, + }, + #[cfg(feature = "arrow")] + ArrowSymbol { + bitmap: Option>, + dict: Vec, + dict_lookup: QwpWsLocalSymbolLookup, + dict_data: Vec, + keys: Vec, + row_count: u32, + }, + #[cfg(feature = "arrow")] + ArrowDecimal { + bitmap: Option>, + values: Vec, + decimal_scale: u8, + element_width: u8, + row_count: u32, + }, + #[cfg(feature = "arrow")] + ArrowGeohash { + bitmap: Option>, + values: Vec, + precision_bits: u8, + row_count: u32, + }, + #[cfg(feature = "arrow")] + ArrowArray { + bitmap: Option>, + data: Vec, + row_count: u32, + }, } #[cfg(feature = "_sender-qwp-ws")] @@ -2628,6 +2680,8 @@ pub(crate) struct QwpWsColumnarBuffer { bookmark: StoredBookmark, snapshots: Vec, max_name_len: usize, + #[cfg(feature = "arrow")] + arrow_rollback_marks_cache: Vec, } #[cfg(feature = "_sender-qwp-ws")] @@ -2645,6 +2699,8 @@ impl Clone for QwpWsColumnarBuffer { bookmark: self.bookmark, snapshots: self.snapshots.clone(), max_name_len: self.max_name_len, + #[cfg(feature = "arrow")] + arrow_rollback_marks_cache: Vec::new(), } } } @@ -2661,6 +2717,8 @@ impl QwpWsColumnarBuffer { bookmark: StoredBookmark::new(), snapshots: Vec::new(), max_name_len, + #[cfg(feature = "arrow")] + arrow_rollback_marks_cache: Vec::new(), } } @@ -2689,13 +2747,27 @@ impl QwpWsColumnarBuffer { for column in &table.columns { total += qwp_string_byte_len(column.name.len()) + 1; total += column.estimated_payload_len(table.row_count as usize); - if let QwpWsColumnValues::Symbol { dict, data, .. } = &column.values { - symbol_dict_count += dict.len(); - for entry in dict { - let bytes = - &data[entry.offset as usize..(entry.offset + entry.len) as usize]; - symbol_dict_bytes += qwp_string_byte_len(bytes.len()); + match &column.values { + QwpWsColumnValues::Symbol { dict, data, .. } => { + symbol_dict_count += dict.len(); + for entry in dict { + let bytes = + &data[entry.offset as usize..(entry.offset + entry.len) as usize]; + symbol_dict_bytes += qwp_string_byte_len(bytes.len()); + } + } + #[cfg(feature = "arrow")] + QwpWsColumnValues::ArrowSymbol { + dict, dict_data, .. + } => { + symbol_dict_count += dict.len(); + for entry in dict { + let bytes = &dict_data + [entry.offset as usize..(entry.offset + entry.len) as usize]; + symbol_dict_bytes += qwp_string_byte_len(bytes.len()); + } } + _ => {} } } } @@ -2721,8 +2793,7 @@ impl QwpWsColumnarBuffer { cap += table.table_name.capacity(); cap += table.columns.capacity() * std::mem::size_of::(); for column in &table.columns { - cap += - column.name.capacity() + column.lower_ascii_name.capacity() + column.capacity(); + cap += column.name.capacity() + column.lower_name.capacity() + column.capacity(); } } cap @@ -3470,6 +3541,334 @@ impl QwpWsColumnarBuffer { Ok(()) } + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_begin( + &mut self, + table_name: TableName<'_>, + ) -> crate::Result { + self.check_op(Op::Table)?; + let table_bytes = table_name.as_ref().as_bytes(); + self.validate_max_name_len(table_name.as_ref())?; + let tables_len_before = self.tables.len(); + let idx = self.lookup_or_create_table(table_bytes)?; + if self.tables[idx].in_progress { + // Roll back any new entry pushed by `lookup_or_create_table` + // so a failed `arrow_bulk_begin` is byte-identical to no-op. + if self.tables.len() > tables_len_before { + self.tables.truncate(tables_len_before); + self.table_lookup.remove(table_bytes); + } + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS bulk arrow append cannot start while a row is in progress on table '{}'", + table_name.as_ref() + )); + } + self.current_table_idx = Some(idx); + let table = &self.tables[idx]; + let starting_rows = table.row_count; + let table_mark = QwpWsTableRollbackMark { + row_count: table.row_count, + in_progress: table.in_progress, + in_progress_column_count: table.in_progress_column_count, + column_access_cursor: table.column_access_cursor, + columns_len: table.columns.len(), + }; + // Recycle the rollback-marks Vec across `append_arrow` calls. + // Avoids the per-batch heap allocation that scales with column + // count on wide schemas. + let mut pre_column_marks = std::mem::take(&mut self.arrow_rollback_marks_cache); + pre_column_marks.clear(); + pre_column_marks.extend(table.columns.iter().map(|c| c.arrow_snapshot())); + Ok(ArrowBulkCtx { + table_idx: idx, + starting_rows, + table_mark, + pre_column_marks, + tables_len_before, + }) + } + + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_rollback(&mut self, mut ctx: ArrowBulkCtx) { + let table = &mut self.tables[ctx.table_idx]; + let pre_count = ctx.table_mark.columns_len; + if table.columns.len() > pre_count { + table.columns.truncate(pre_count); + } + for (col, mark) in table.columns.iter_mut().zip(ctx.pre_column_marks.drain(..)) { + col.arrow_restore(mark); + } + table.row_count = ctx.table_mark.row_count; + table.in_progress = ctx.table_mark.in_progress; + table.in_progress_column_count = ctx.table_mark.in_progress_column_count; + table.column_access_cursor = ctx.table_mark.column_access_cursor; + table.row_mark = None; + table.rebuild_column_lookup(); + if ctx.table_mark.row_count == 0 && !ctx.table_mark.in_progress { + self.current_table_idx = None; + } + if self.tables.len() > ctx.tables_len_before { + self.tables.truncate(ctx.tables_len_before); + self.rebuild_table_lookup(); + } + self.arrow_rollback_marks_cache = std::mem::take(&mut ctx.pre_column_marks); + } + + /// Reclaim the `pre_column_marks` Vec from a finished bulk-arrow ctx + /// into the per-buffer recycle cache. Call from the success path + /// (after `arrow_bulk_commit`) so the next batch can reuse the + /// allocation. No-op if the ctx has already been consumed by + /// `arrow_bulk_rollback`. + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_finish(&mut self, mut ctx: ArrowBulkCtx) { + ctx.pre_column_marks.clear(); + self.arrow_rollback_marks_cache = std::mem::take(&mut ctx.pre_column_marks); + } + + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_set_fixed( + &mut self, + ctx: &ArrowBulkCtx, + column_name: ColumnName<'_>, + kind: ColumnKind, + info: ArrowBatchInfo<'_>, + write_values: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec) -> crate::Result<()>, + { + let col_bytes = column_name.as_ref().as_bytes(); + self.validate_max_name_len(column_name.as_ref())?; + let col_idx = self.lookup_or_create_arrow_column(ctx, col_bytes, kind)?; + self.tables[ctx.table_idx].columns[col_idx].append_arrow_fixed_batch( + kind, + info, + write_values, + ) + } + + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_set_varlen( + &mut self, + ctx: &ArrowBulkCtx, + column_name: ColumnName<'_>, + kind: ColumnKind, + info: ArrowBatchInfo<'_>, + write: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec, &mut Vec) -> crate::Result<()>, + { + let col_bytes = column_name.as_ref().as_bytes(); + self.validate_max_name_len(column_name.as_ref())?; + let col_idx = self.lookup_or_create_arrow_column(ctx, col_bytes, kind)?; + self.tables[ctx.table_idx].columns[col_idx].append_arrow_varlen_batch(kind, info, write) + } + + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_set_bool( + &mut self, + ctx: &ArrowBulkCtx, + column_name: ColumnName<'_>, + info: ArrowBatchInfo<'_>, + pack: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec, usize) -> crate::Result<()>, + { + let col_bytes = column_name.as_ref().as_bytes(); + self.validate_max_name_len(column_name.as_ref())?; + let col_idx = self.lookup_or_create_arrow_column(ctx, col_bytes, ColumnKind::Bool)?; + self.tables[ctx.table_idx].columns[col_idx].append_arrow_bool_batch(info, pack) + } + + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_set_symbol( + &mut self, + ctx: &ArrowBulkCtx, + column_name: ColumnName<'_>, + batch_keys: &[u32], + batch_dict_entries: &[(u32, u32)], + batch_dict_data: &[u8], + info: ArrowBatchInfo<'_>, + ) -> crate::Result<()> { + let col_bytes = column_name.as_ref().as_bytes(); + self.validate_max_name_len(column_name.as_ref())?; + let col_idx = self.lookup_or_create_arrow_column(ctx, col_bytes, ColumnKind::Symbol)?; + self.tables[ctx.table_idx].columns[col_idx].append_arrow_symbol_batch( + batch_keys, + batch_dict_entries, + batch_dict_data, + info, + ) + } + + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_set_decimal( + &mut self, + ctx: &ArrowBulkCtx, + column_name: ColumnName<'_>, + kind: ColumnKind, + spec: ArrowDecimalSpec, + info: ArrowBatchInfo<'_>, + write_values: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec) -> crate::Result<()>, + { + let col_bytes = column_name.as_ref().as_bytes(); + self.validate_max_name_len(column_name.as_ref())?; + let col_idx = self.lookup_or_create_arrow_column(ctx, col_bytes, kind)?; + self.tables[ctx.table_idx].columns[col_idx].append_arrow_decimal_batch( + kind, + spec, + info, + write_values, + ) + } + + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_set_geohash( + &mut self, + ctx: &ArrowBulkCtx, + column_name: ColumnName<'_>, + precision_bits: u8, + info: ArrowBatchInfo<'_>, + write_values: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec) -> crate::Result<()>, + { + let col_bytes = column_name.as_ref().as_bytes(); + self.validate_max_name_len(column_name.as_ref())?; + let col_idx = self.lookup_or_create_arrow_column(ctx, col_bytes, ColumnKind::Geohash)?; + self.tables[ctx.table_idx].columns[col_idx].append_arrow_geohash_batch( + precision_bits, + info, + write_values, + ) + } + + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_set_array( + &mut self, + ctx: &ArrowBulkCtx, + column_name: ColumnName<'_>, + kind: ColumnKind, + info: ArrowBatchInfo<'_>, + write_data: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec) -> crate::Result<()>, + { + let col_bytes = column_name.as_ref().as_bytes(); + self.validate_max_name_len(column_name.as_ref())?; + let col_idx = self.lookup_or_create_arrow_column(ctx, col_bytes, kind)?; + self.tables[ctx.table_idx].columns[col_idx].append_arrow_array_batch(kind, info, write_data) + } + + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_set_designated_ts( + &mut self, + ctx: &ArrowBulkCtx, + kind: ColumnKind, + info: ArrowBatchInfo<'_>, + write_values: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec) -> crate::Result<()>, + { + if !matches!( + kind, + ColumnKind::TimestampMicros | ColumnKind::TimestampNanos + ) { + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS designated timestamp must be TimestampMicros or TimestampNanos, got {:?}", + kind + )); + } + let col_idx = self.lookup_or_create_arrow_column(ctx, b"", kind)?; + self.tables[ctx.table_idx].columns[col_idx].append_arrow_fixed_batch( + kind, + info, + write_values, + ) + } + + #[cfg(feature = "arrow")] + pub(crate) fn arrow_bulk_commit( + &mut self, + ctx: &ArrowBulkCtx, + batch_rows: u32, + ) -> crate::Result<()> { + let table = &mut self.tables[ctx.table_idx]; + let expected_rows = ctx.starting_rows.checked_add(batch_rows).ok_or_else(|| { + error::fmt!( + InvalidApiCall, + "QWP/WS table row count overflow on '{}'", + String::from_utf8_lossy(&table.table_name) + ) + })?; + for column in &table.columns { + let arrow_rows = column.arrow_row_count(); + match arrow_rows { + Some(rows) if rows == expected_rows => {} + Some(rows) => { + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow column '{}' has {} rows after bulk batch but table expects {}", + String::from_utf8_lossy(&column.name), + rows, + expected_rows + )); + } + None => { + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS column '{}' is not in arrow-fed mode; mixed bulk + row-by-row batches are not supported", + String::from_utf8_lossy(&column.name) + )); + } + } + } + table.row_count = expected_rows; + table.in_progress = false; + table.in_progress_column_count = 0; + table.column_access_cursor = 0; + table.row_mark = None; + let added = batch_rows as usize; + self.state.row_count = self + .state + .row_count + .checked_add(added) + .ok_or_else(|| error::fmt!(InvalidApiCall, "QWP/WS buffer row count overflow"))?; + self.state.op_state.finish_row(); + Ok(()) + } + + #[cfg(feature = "arrow")] + fn lookup_or_create_arrow_column( + &mut self, + ctx: &ArrowBulkCtx, + column_name_bytes: &[u8], + kind: ColumnKind, + ) -> crate::Result { + let table = &mut self.tables[ctx.table_idx]; + let idx = match table.lookup_column(column_name_bytes)? { + Some(idx) => { + if table.columns[idx].kind != kind { + return Err(batched_type_change_error_ws(column_name_bytes)); + } + idx + } + None => table.create_column(column_name_bytes, kind)?, + }; + table.column_access_cursor = idx + 1; + Ok(idx) + } + fn rollback_current_row(&mut self) { let Some(table_idx) = self.current_table_idx else { return; @@ -3579,17 +3978,37 @@ impl QwpWsColumnarBuffer { for (col_idx, column) in table.columns.iter().enumerate() { let globals = &mut per_col[col_idx]; globals.clear(); - if let QwpWsColumnValues::Symbol { dict, data, .. } = &column.values { - globals.reserve(dict.len()); - for entry in dict { - let bytes = - &data[entry.offset as usize..(entry.offset + entry.len) as usize]; - let (gid, _) = global_dict.intern(bytes); - highest_referenced_symbol_id = Some( - highest_referenced_symbol_id.map_or(gid, |highest| highest.max(gid)), - ); - globals.push(gid); + match &column.values { + QwpWsColumnValues::Symbol { dict, data, .. } => { + globals.reserve(dict.len()); + for entry in dict { + let bytes = + &data[entry.offset as usize..(entry.offset + entry.len) as usize]; + let (gid, _) = global_dict.intern(bytes); + highest_referenced_symbol_id = Some( + highest_referenced_symbol_id + .map_or(gid, |highest| highest.max(gid)), + ); + globals.push(gid); + } + } + #[cfg(feature = "arrow")] + QwpWsColumnValues::ArrowSymbol { + dict, dict_data, .. + } => { + globals.reserve(dict.len()); + for entry in dict { + let bytes = &dict_data + [entry.offset as usize..(entry.offset + entry.len) as usize]; + let (gid, _) = global_dict.intern(bytes); + highest_referenced_symbol_id = Some( + highest_referenced_symbol_id + .map_or(gid, |highest| highest.max(gid)), + ); + globals.push(gid); + } } + _ => {} } } } @@ -3714,9 +4133,12 @@ impl QwpWsTableBuffer { #[inline(always)] fn lookup_column(&mut self, name: &[u8]) -> crate::Result> { - if self.column_access_cursor < self.columns.len() + let name_is_ascii = name.is_ascii(); + if name_is_ascii + && self.column_access_cursor < self.columns.len() + && self.columns[self.column_access_cursor].name_is_ascii && names_equal_lower_ascii( - &self.columns[self.column_access_cursor].lower_ascii_name, + &self.columns[self.column_access_cursor].lower_name, self.columns[self.column_access_cursor].packed_lower_ascii_name, name, ) @@ -3724,11 +4146,22 @@ impl QwpWsTableBuffer { return Ok(Some(self.column_access_cursor)); } + if name_is_ascii { + let mut stack: [u8; 128] = [0; 128]; + if name.len() <= stack.len() { + for (dst, src) in stack[..name.len()].iter_mut().zip(name.iter()) { + *dst = src.to_ascii_lowercase(); + } + if let Some(&idx) = self.column_lookup.get(&stack[..name.len()]) { + return Ok(Some(idx)); + } + return Ok(None); + } + } let lookup_key = column_lookup_key(name)?; - if let Some(&idx) = self.column_lookup.get(&lookup_key) { + if let Some(&idx) = self.column_lookup.get(&lookup_key[..]) { return Ok(Some(idx)); } - Ok(None) } @@ -3754,10 +4187,16 @@ impl QwpWsTableBuffer { #[cfg(feature = "_sender-qwp-ws")] impl QwpWsColumnBuffer { fn new(name: &[u8], kind: ColumnKind) -> Self { + let name_is_ascii = name.is_ascii(); Self { name: name.to_vec(), - lower_ascii_name: lowercase_ascii_bytes(name), - packed_lower_ascii_name: packed_lower_ascii_name(name), + lower_name: lowercase_name_bytes(name, name_is_ascii), + packed_lower_ascii_name: if name_is_ascii { + packed_lower_ascii_name(name) + } else { + 0 + }, + name_is_ascii, kind, last_written_row: None, non_null_count: 0, @@ -3816,12 +4255,49 @@ impl QwpWsColumnBuffer { cells.reserve(rows); data.reserve(rows * 16); } + #[cfg(feature = "arrow")] + QwpWsColumnValues::ArrowFixed { values, .. } + | QwpWsColumnValues::ArrowGeohash { values, .. } + | QwpWsColumnValues::ArrowDecimal { values, .. } => values.reserve(rows), + #[cfg(feature = "arrow")] + QwpWsColumnValues::ArrowVarLen { offsets, data, .. } => { + offsets.reserve(rows.saturating_add(1)); + data.reserve(rows.saturating_mul(8)); + } + #[cfg(feature = "arrow")] + QwpWsColumnValues::ArrowBool { packed_bits, .. } => { + packed_bits.reserve(rows.div_ceil(8)); + } + #[cfg(feature = "arrow")] + QwpWsColumnValues::ArrowSymbol { + dict, + dict_lookup, + dict_data, + keys, + .. + } => { + dict.reserve(rows); + dict_lookup.reserve(rows); + dict_data.reserve(rows.saturating_mul(8)); + keys.reserve(rows); + } + #[cfg(feature = "arrow")] + QwpWsColumnValues::ArrowArray { data, .. } => { + data.reserve(rows.saturating_mul(16)); + } } } fn clear_rows(&mut self) { self.last_written_row = None; self.non_null_count = 0; + // After Arrow bulk usage, reset the variant tag so the row-by-row + // setters don't reject the cleared column with type_mismatch_error_ws. + #[cfg(feature = "arrow")] + if self.arrow_row_count().is_some() { + self.values = QwpWsColumnValues::new(self.kind); + return; + } self.values.clear_rows(); } @@ -4235,98 +4711,644 @@ impl QwpWsColumnBuffer { Ok(()) } - fn encode(&self, row_count: usize, globals: &[u64], out: &mut Vec) -> crate::Result<()> { - out.push(u8::from(self.uses_null_bitmap(row_count))); - if self.uses_null_bitmap(row_count) { - self.values.encode_null_bitmap(row_count, out)?; - } - self.values.encode(row_count, globals, out) + #[cfg(feature = "arrow")] + fn precheck_arrow_batch_overflows( + &self, + prior_row_count: u32, + info: &ArrowBatchInfo<'_>, + ) -> crate::Result<(u32, u32)> { + let new_row_count = prior_row_count.checked_add(info.rows).ok_or_else(|| { + error::fmt!( + InvalidApiCall, + "QWP/WS arrow row count overflow on column '{}'", + String::from_utf8_lossy(&self.name) + ) + })?; + let new_non_null = self + .non_null_count + .checked_add(info.non_null) + .ok_or_else(|| { + error::fmt!( + InvalidApiCall, + "QWP/WebSocket non-null value count exceeds maximum of {}", + u32::MAX + ) + })?; + Ok((new_row_count, new_non_null)) } -} -#[cfg(feature = "_sender-qwp-ws")] -impl QwpWsColumnValues { - fn new(kind: ColumnKind) -> Self { - match kind { - ColumnKind::Bool => Self::Bool { cells: Vec::new() }, - ColumnKind::I8 => Self::I8 { cells: Vec::new() }, - ColumnKind::I16 => Self::I16 { cells: Vec::new() }, - ColumnKind::I32 => Self::I32 { cells: Vec::new() }, - ColumnKind::I64 => Self::I64 { cells: Vec::new() }, - ColumnKind::F32 => Self::F32 { cells: Vec::new() }, - ColumnKind::F64 => Self::F64 { cells: Vec::new() }, - ColumnKind::TimestampMicros => Self::TimestampMicros { cells: Vec::new() }, - ColumnKind::TimestampNanos => Self::TimestampNanos { cells: Vec::new() }, - ColumnKind::String => Self::String { - cells: Vec::new(), - data: Vec::new(), - }, - ColumnKind::Symbol => Self::Symbol { - cells: Vec::new(), - dict: Vec::new(), - lookup: QwpWsLocalSymbolLookup::default(), - data: Vec::new(), - }, - ColumnKind::Decimal => Self::Decimal { - cells: Vec::new(), - decimal_scale: QWP_DECIMAL_SCALE_UNSET, - }, - ColumnKind::Decimal64 => Self::Decimal64 { - cells: Vec::new(), - decimal_scale: QWP_DECIMAL_SCALE_UNSET, - }, - ColumnKind::Decimal128 => Self::Decimal128 { - cells: Vec::new(), - decimal_scale: QWP_DECIMAL_SCALE_UNSET, - }, - ColumnKind::DoubleArray => Self::DoubleArray { - cells: Vec::new(), - data: Vec::new(), - }, - ColumnKind::Uuid => Self::Uuid { cells: Vec::new() }, - ColumnKind::Long256 => Self::Long256 { - cells: Vec::new(), - data: Vec::new(), - }, - ColumnKind::Ipv4 => Self::Ipv4 { cells: Vec::new() }, - ColumnKind::Date => Self::Date { cells: Vec::new() }, - ColumnKind::Char => Self::Char { cells: Vec::new() }, - ColumnKind::Binary => Self::Binary { - cells: Vec::new(), - data: Vec::new(), - }, - ColumnKind::Geohash => Self::Geohash { - cells: Vec::new(), - precision_bits: 0, - }, - ColumnKind::LongArray => Self::LongArray { - cells: Vec::new(), - data: Vec::new(), - }, + #[cfg(feature = "arrow")] + fn is_fresh(&self) -> bool { + self.last_written_row.is_none() && self.non_null_count == 0 + } + + #[cfg(feature = "arrow")] + fn arrow_row_count(&self) -> Option { + match &self.values { + QwpWsColumnValues::ArrowFixed { row_count, .. } + | QwpWsColumnValues::ArrowVarLen { row_count, .. } + | QwpWsColumnValues::ArrowBool { row_count, .. } + | QwpWsColumnValues::ArrowSymbol { row_count, .. } + | QwpWsColumnValues::ArrowDecimal { row_count, .. } + | QwpWsColumnValues::ArrowGeohash { row_count, .. } + | QwpWsColumnValues::ArrowArray { row_count, .. } => Some(*row_count), + _ => None, } } - fn clear_rows(&mut self) { - match self { - Self::Bool { cells } => cells.clear(), - Self::I8 { cells } => cells.clear(), - Self::I16 { cells } => cells.clear(), - Self::I32 { cells } => cells.clear(), - Self::I64 { cells } => cells.clear(), - Self::F32 { cells } => cells.clear(), - Self::F64 { cells } => cells.clear(), - Self::TimestampMicros { cells } => cells.clear(), - Self::TimestampNanos { cells } => cells.clear(), - Self::String { cells, data } - | Self::DoubleArray { cells, data } - | Self::Long256 { cells, data } - | Self::Binary { cells, data } - | Self::LongArray { cells, data } => { - cells.clear(); - data.clear(); - } - Self::Uuid { cells } => cells.clear(), - Self::Ipv4 { cells } => cells.clear(), + #[cfg(feature = "arrow")] + fn append_arrow_fixed_batch( + &mut self, + kind: ColumnKind, + info: ArrowBatchInfo<'_>, + write_values: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec) -> crate::Result<()>, + { + if self.kind != kind { + return Err(type_mismatch_error_ws(&self.name)); + } + let element_width = fixed_element_width(kind).ok_or_else(|| { + error::fmt!( + InvalidApiCall, + "QWP/WS arrow-fixed not valid for {:?} on column '{}'", + kind, + String::from_utf8_lossy(&self.name) + ) + })?; + let expected_rows = if kind_supports_sparse_nulls(kind) { + info.non_null as usize + } else { + info.rows as usize + }; + let expected_bytes = expected_rows.saturating_mul(element_width); + if !matches!(self.values, QwpWsColumnValues::ArrowFixed { .. }) { + if !self.is_fresh() { + return Err(arrow_bulk_mixing_error(&self.name)); + } + self.values = QwpWsColumnValues::ArrowFixed { + bitmap: None, + values: Vec::new(), + row_count: 0, + }; + } + let prior_rows = match &self.values { + QwpWsColumnValues::ArrowFixed { row_count, .. } => *row_count, + _ => unreachable!(), + }; + let (new_row_count, new_non_null) = + self.precheck_arrow_batch_overflows(prior_rows, &info)?; + let QwpWsColumnValues::ArrowFixed { + bitmap, + values, + row_count, + } = &mut self.values + else { + unreachable!() + }; + let prior_len = values.len(); + if let Err(e) = write_values(values) { + values.truncate(prior_len); + return Err(e); + } + let written = values.len() - prior_len; + if written != expected_bytes { + values.truncate(prior_len); + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow-fixed expects {} bytes ({} rows × {}), got {}", + expected_bytes, + expected_rows, + element_width, + written + )); + } + extend_qwp_bitmap(bitmap, prior_rows as usize, info.bitmap, info.rows as usize); + *row_count = new_row_count; + self.non_null_count = new_non_null; + Ok(()) + } + + #[cfg(feature = "arrow")] + fn append_arrow_varlen_batch( + &mut self, + kind: ColumnKind, + info: ArrowBatchInfo<'_>, + write: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec, &mut Vec) -> crate::Result<()>, + { + if self.kind != kind { + return Err(type_mismatch_error_ws(&self.name)); + } + if !matches!(self.values, QwpWsColumnValues::ArrowVarLen { .. }) { + if !self.is_fresh() { + return Err(arrow_bulk_mixing_error(&self.name)); + } + self.values = QwpWsColumnValues::ArrowVarLen { + bitmap: None, + offsets: vec![0u32], + data: Vec::new(), + row_count: 0, + }; + } + let prior_rows = match &self.values { + QwpWsColumnValues::ArrowVarLen { row_count, .. } => *row_count, + _ => unreachable!(), + }; + let (new_row_count, new_non_null) = + self.precheck_arrow_batch_overflows(prior_rows, &info)?; + let QwpWsColumnValues::ArrowVarLen { + bitmap, + offsets, + data, + row_count, + } = &mut self.values + else { + unreachable!() + }; + let prior_offsets_len = offsets.len(); + let prior_data_len = data.len(); + if let Err(e) = write(offsets, data) { + offsets.truncate(prior_offsets_len); + data.truncate(prior_data_len); + return Err(e); + } + let pushed = offsets.len() - prior_offsets_len; + if pushed != info.non_null as usize { + offsets.truncate(prior_offsets_len); + data.truncate(prior_data_len); + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow-varlen expects {} offsets pushed for {} non-null rows, got {}", + info.non_null, + info.non_null, + pushed + )); + } + extend_qwp_bitmap(bitmap, prior_rows as usize, info.bitmap, info.rows as usize); + *row_count = new_row_count; + self.non_null_count = new_non_null; + Ok(()) + } + + #[cfg(feature = "arrow")] + fn append_arrow_bool_batch(&mut self, info: ArrowBatchInfo<'_>, pack: F) -> crate::Result<()> + where + F: FnOnce(&mut Vec, usize) -> crate::Result<()>, + { + if self.kind != ColumnKind::Bool { + return Err(type_mismatch_error_ws(&self.name)); + } + if !matches!(self.values, QwpWsColumnValues::ArrowBool { .. }) { + if !self.is_fresh() { + return Err(arrow_bulk_mixing_error(&self.name)); + } + self.values = QwpWsColumnValues::ArrowBool { + bitmap: None, + packed_bits: Vec::new(), + row_count: 0, + }; + } + let prior_rows = match &self.values { + QwpWsColumnValues::ArrowBool { row_count, .. } => *row_count, + _ => unreachable!(), + }; + let (new_row_count, new_non_null) = + self.precheck_arrow_batch_overflows(prior_rows, &info)?; + let QwpWsColumnValues::ArrowBool { + bitmap, + packed_bits, + row_count, + } = &mut self.values + else { + unreachable!() + }; + pack(packed_bits, prior_rows as usize)?; + extend_qwp_bitmap(bitmap, prior_rows as usize, info.bitmap, info.rows as usize); + *row_count = new_row_count; + self.non_null_count = new_non_null; + Ok(()) + } + + #[cfg(feature = "arrow")] + fn append_arrow_symbol_batch( + &mut self, + batch_keys: &[u32], + batch_dict_entries: &[(u32, u32)], + batch_dict_data: &[u8], + info: ArrowBatchInfo<'_>, + ) -> crate::Result<()> { + if self.kind != ColumnKind::Symbol { + return Err(type_mismatch_error_ws(&self.name)); + } + if batch_keys.len() != info.rows as usize { + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow-symbol expects {} keys, got {}", + info.rows, + batch_keys.len() + )); + } + if !matches!(self.values, QwpWsColumnValues::ArrowSymbol { .. }) { + if !self.is_fresh() { + return Err(arrow_bulk_mixing_error(&self.name)); + } + self.values = QwpWsColumnValues::ArrowSymbol { + bitmap: None, + dict: Vec::new(), + dict_lookup: QwpWsLocalSymbolLookup::default(), + dict_data: Vec::new(), + keys: Vec::new(), + row_count: 0, + }; + } + let prior_rows = match &self.values { + QwpWsColumnValues::ArrowSymbol { row_count, .. } => *row_count, + _ => unreachable!(), + }; + let (new_row_count, new_non_null) = + self.precheck_arrow_batch_overflows(prior_rows, &info)?; + let QwpWsColumnValues::ArrowSymbol { + bitmap, + dict, + dict_lookup, + dict_data, + keys, + row_count, + } = &mut self.values + else { + unreachable!() + }; + let mut batch_to_local: Vec = Vec::with_capacity(batch_dict_entries.len()); + for &(off, len) in batch_dict_entries { + let bytes = &batch_dict_data[off as usize..(off + len) as usize]; + let hash = qwp_ws_symbol_hash(bytes); + let local_id = if let Some(existing) = dict_lookup.get(hash, bytes, dict, dict_data) { + existing + } else { + let id = checked_qwp_push_index(dict.len(), "QWP/WS symbol dictionary length")?; + let data_offset = + QwpBuffer::checked_arena_offset(dict_data.len(), bytes.len(), "QWP/WS symbol")?; + let qwp_len = checked_qwp_u32(bytes.len(), "QWP/WS symbol length")?; + dict_data.extend_from_slice(bytes); + dict.push(QwpWsSymbolEntry { + offset: data_offset, + len: qwp_len, + }); + dict_lookup.insert(hash, id); + id + }; + batch_to_local.push(local_id); + } + keys.reserve(info.rows as usize); + for (row_idx, &batch_key) in batch_keys.iter().enumerate() { + let is_null = info.bitmap.is_some_and(|nb| nb.is_null(row_idx)); + if is_null { + keys.push(0); + continue; + } + let mapped = batch_to_local + .get(batch_key as usize) + .copied() + .ok_or_else(|| { + error::fmt!( + InvalidApiCall, + "QWP/WS arrow-symbol key {} out of range (dict size {})", + batch_key, + batch_to_local.len() + ) + })?; + keys.push(mapped); + } + extend_qwp_bitmap(bitmap, prior_rows as usize, info.bitmap, info.rows as usize); + *row_count = new_row_count; + self.non_null_count = new_non_null; + Ok(()) + } + + #[cfg(feature = "arrow")] + fn append_arrow_decimal_batch( + &mut self, + kind: ColumnKind, + spec: ArrowDecimalSpec, + info: ArrowBatchInfo<'_>, + write_values: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec) -> crate::Result<()>, + { + if self.kind != kind { + return Err(type_mismatch_error_ws(&self.name)); + } + if !matches!( + kind, + ColumnKind::Decimal | ColumnKind::Decimal64 | ColumnKind::Decimal128 + ) { + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow-decimal only valid for Decimal / Decimal64 / Decimal128, got {:?}", + kind + )); + } + let expected_bytes = (info.non_null as usize).saturating_mul(spec.element_width as usize); + if !matches!(self.values, QwpWsColumnValues::ArrowDecimal { .. }) { + if !self.is_fresh() { + return Err(arrow_bulk_mixing_error(&self.name)); + } + self.values = QwpWsColumnValues::ArrowDecimal { + bitmap: None, + values: Vec::new(), + decimal_scale: spec.scale, + element_width: spec.element_width, + row_count: 0, + }; + } + let prior_rows = match &self.values { + QwpWsColumnValues::ArrowDecimal { row_count, .. } => *row_count, + _ => unreachable!(), + }; + let (new_row_count, new_non_null) = + self.precheck_arrow_batch_overflows(prior_rows, &info)?; + let QwpWsColumnValues::ArrowDecimal { + bitmap, + values, + decimal_scale, + element_width: stored_width, + row_count, + } = &mut self.values + else { + unreachable!() + }; + if *stored_width != spec.element_width { + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow-decimal element width mismatch on '{}': existing={}, batch={}", + String::from_utf8_lossy(&self.name), + stored_width, + spec.element_width + )); + } + if info.non_null > 0 + && *decimal_scale != QWP_DECIMAL_SCALE_UNSET + && *decimal_scale != spec.scale + { + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow-decimal scale changed on '{}': existing={}, batch={}", + String::from_utf8_lossy(&self.name), + decimal_scale, + spec.scale + )); + } + let prior_len = values.len(); + if let Err(e) = write_values(values) { + values.truncate(prior_len); + return Err(e); + } + let written = values.len() - prior_len; + if written != expected_bytes { + values.truncate(prior_len); + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow-decimal expects {} value bytes for {} non-null rows of width {}, got {}", + expected_bytes, + info.non_null, + spec.element_width, + written + )); + } + if info.non_null > 0 { + *decimal_scale = spec.scale; + } + extend_qwp_bitmap(bitmap, prior_rows as usize, info.bitmap, info.rows as usize); + *row_count = new_row_count; + self.non_null_count = new_non_null; + Ok(()) + } + + #[cfg(feature = "arrow")] + fn append_arrow_geohash_batch( + &mut self, + precision_bits: u8, + info: ArrowBatchInfo<'_>, + write_values: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec) -> crate::Result<()>, + { + if self.kind != ColumnKind::Geohash { + return Err(type_mismatch_error_ws(&self.name)); + } + let element_width = geohash_bytes_per_value(precision_bits); + let expected_bytes = (info.non_null as usize).saturating_mul(element_width); + if !matches!(self.values, QwpWsColumnValues::ArrowGeohash { .. }) { + if !self.is_fresh() { + return Err(arrow_bulk_mixing_error(&self.name)); + } + self.values = QwpWsColumnValues::ArrowGeohash { + bitmap: None, + values: Vec::new(), + precision_bits, + row_count: 0, + }; + } + let prior_rows = match &self.values { + QwpWsColumnValues::ArrowGeohash { row_count, .. } => *row_count, + _ => unreachable!(), + }; + let (new_row_count, new_non_null) = + self.precheck_arrow_batch_overflows(prior_rows, &info)?; + let QwpWsColumnValues::ArrowGeohash { + bitmap, + values, + precision_bits: stored_precision, + row_count, + } = &mut self.values + else { + unreachable!() + }; + if *stored_precision != precision_bits { + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow-geohash precision mismatch on '{}': existing={}, batch={}", + String::from_utf8_lossy(&self.name), + stored_precision, + precision_bits + )); + } + let prior_len = values.len(); + if let Err(e) = write_values(values) { + values.truncate(prior_len); + return Err(e); + } + let written = values.len() - prior_len; + if written != expected_bytes { + values.truncate(prior_len); + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow-geohash expects {} value bytes for {} non-null rows of width {}, got {}", + expected_bytes, + info.non_null, + element_width, + written + )); + } + extend_qwp_bitmap(bitmap, prior_rows as usize, info.bitmap, info.rows as usize); + *row_count = new_row_count; + self.non_null_count = new_non_null; + Ok(()) + } + + #[cfg(feature = "arrow")] + fn append_arrow_array_batch( + &mut self, + kind: ColumnKind, + info: ArrowBatchInfo<'_>, + write_data: F, + ) -> crate::Result<()> + where + F: FnOnce(&mut Vec) -> crate::Result<()>, + { + if self.kind != kind { + return Err(type_mismatch_error_ws(&self.name)); + } + if !matches!(kind, ColumnKind::DoubleArray | ColumnKind::LongArray) { + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow-array only valid for DoubleArray / LongArray, got {:?}", + kind + )); + } + if !matches!(self.values, QwpWsColumnValues::ArrowArray { .. }) { + if !self.is_fresh() { + return Err(arrow_bulk_mixing_error(&self.name)); + } + self.values = QwpWsColumnValues::ArrowArray { + bitmap: None, + data: Vec::new(), + row_count: 0, + }; + } + let prior_rows = match &self.values { + QwpWsColumnValues::ArrowArray { row_count, .. } => *row_count, + _ => unreachable!(), + }; + let (new_row_count, new_non_null) = + self.precheck_arrow_batch_overflows(prior_rows, &info)?; + let QwpWsColumnValues::ArrowArray { + bitmap, + data, + row_count, + } = &mut self.values + else { + unreachable!() + }; + let prior_len = data.len(); + if let Err(e) = write_data(data) { + data.truncate(prior_len); + return Err(e); + } + extend_qwp_bitmap(bitmap, prior_rows as usize, info.bitmap, info.rows as usize); + *row_count = new_row_count; + self.non_null_count = new_non_null; + Ok(()) + } + + fn encode(&self, row_count: usize, globals: &[u64], out: &mut Vec) -> crate::Result<()> { + out.push(u8::from(self.uses_null_bitmap(row_count))); + if self.uses_null_bitmap(row_count) { + self.values.encode_null_bitmap(row_count, out)?; + } + self.values.encode(row_count, globals, out) + } +} + +#[cfg(feature = "_sender-qwp-ws")] +impl QwpWsColumnValues { + fn new(kind: ColumnKind) -> Self { + match kind { + ColumnKind::Bool => Self::Bool { cells: Vec::new() }, + ColumnKind::I8 => Self::I8 { cells: Vec::new() }, + ColumnKind::I16 => Self::I16 { cells: Vec::new() }, + ColumnKind::I32 => Self::I32 { cells: Vec::new() }, + ColumnKind::I64 => Self::I64 { cells: Vec::new() }, + ColumnKind::F32 => Self::F32 { cells: Vec::new() }, + ColumnKind::F64 => Self::F64 { cells: Vec::new() }, + ColumnKind::TimestampMicros => Self::TimestampMicros { cells: Vec::new() }, + ColumnKind::TimestampNanos => Self::TimestampNanos { cells: Vec::new() }, + ColumnKind::String => Self::String { + cells: Vec::new(), + data: Vec::new(), + }, + ColumnKind::Symbol => Self::Symbol { + cells: Vec::new(), + dict: Vec::new(), + lookup: QwpWsLocalSymbolLookup::default(), + data: Vec::new(), + }, + ColumnKind::Decimal => Self::Decimal { + cells: Vec::new(), + decimal_scale: QWP_DECIMAL_SCALE_UNSET, + }, + ColumnKind::Decimal64 => Self::Decimal64 { + cells: Vec::new(), + decimal_scale: QWP_DECIMAL_SCALE_UNSET, + }, + ColumnKind::Decimal128 => Self::Decimal128 { + cells: Vec::new(), + decimal_scale: QWP_DECIMAL_SCALE_UNSET, + }, + ColumnKind::DoubleArray => Self::DoubleArray { + cells: Vec::new(), + data: Vec::new(), + }, + ColumnKind::Uuid => Self::Uuid { cells: Vec::new() }, + ColumnKind::Long256 => Self::Long256 { + cells: Vec::new(), + data: Vec::new(), + }, + ColumnKind::Ipv4 => Self::Ipv4 { cells: Vec::new() }, + ColumnKind::Date => Self::Date { cells: Vec::new() }, + ColumnKind::Char => Self::Char { cells: Vec::new() }, + ColumnKind::Binary => Self::Binary { + cells: Vec::new(), + data: Vec::new(), + }, + ColumnKind::Geohash => Self::Geohash { + cells: Vec::new(), + precision_bits: 0, + }, + ColumnKind::LongArray => Self::LongArray { + cells: Vec::new(), + data: Vec::new(), + }, + } + } + + fn clear_rows(&mut self) { + match self { + Self::Bool { cells } => cells.clear(), + Self::I8 { cells } => cells.clear(), + Self::I16 { cells } => cells.clear(), + Self::I32 { cells } => cells.clear(), + Self::I64 { cells } => cells.clear(), + Self::F32 { cells } => cells.clear(), + Self::F64 { cells } => cells.clear(), + Self::TimestampMicros { cells } => cells.clear(), + Self::TimestampNanos { cells } => cells.clear(), + Self::String { cells, data } + | Self::DoubleArray { cells, data } + | Self::Long256 { cells, data } + | Self::Binary { cells, data } + | Self::LongArray { cells, data } => { + cells.clear(); + data.clear(); + } + Self::Uuid { cells } => cells.clear(), + Self::Ipv4 { cells } => cells.clear(), Self::Date { cells } => cells.clear(), Self::Char { cells } => cells.clear(), Self::Geohash { cells, .. } => cells.clear(), @@ -4346,6 +5368,76 @@ impl QwpWsColumnValues { | Self::Decimal128 { cells, .. } => { cells.clear(); } + #[cfg(feature = "arrow")] + Self::ArrowFixed { + bitmap, + values, + row_count, + } + | Self::ArrowGeohash { + bitmap, + values, + row_count, + .. + } + | Self::ArrowDecimal { + bitmap, + values, + row_count, + .. + } => { + bitmap.take(); + values.clear(); + *row_count = 0; + } + #[cfg(feature = "arrow")] + Self::ArrowVarLen { + bitmap, + offsets, + data, + row_count, + } => { + bitmap.take(); + offsets.clear(); + data.clear(); + *row_count = 0; + } + #[cfg(feature = "arrow")] + Self::ArrowBool { + bitmap, + packed_bits, + row_count, + } => { + bitmap.take(); + packed_bits.clear(); + *row_count = 0; + } + #[cfg(feature = "arrow")] + Self::ArrowSymbol { + bitmap, + dict, + dict_lookup, + dict_data, + keys, + row_count, + } => { + bitmap.take(); + dict.clear(); + dict_lookup.clear(); + dict_data.clear(); + keys.clear(); + *row_count = 0; + } + #[cfg(feature = "arrow")] + Self::ArrowArray { + bitmap, + data, + row_count, + } => { + bitmap.take(); + data.clear(); + *row_count = 0; + } } } @@ -4390,6 +5482,46 @@ impl QwpWsColumnValues { | Self::Decimal128 { cells, .. } => { cells.capacity() * std::mem::size_of::() } + #[cfg(feature = "arrow")] + Self::ArrowFixed { bitmap, values, .. } + | Self::ArrowGeohash { bitmap, values, .. } + | Self::ArrowDecimal { bitmap, values, .. } => { + bitmap.as_ref().map(|b| b.capacity()).unwrap_or(0) + values.capacity() + } + #[cfg(feature = "arrow")] + Self::ArrowVarLen { + bitmap, + offsets, + data, + .. + } => { + bitmap.as_ref().map(|b| b.capacity()).unwrap_or(0) + + offsets.capacity() * std::mem::size_of::() + + data.capacity() + } + #[cfg(feature = "arrow")] + Self::ArrowBool { + bitmap, + packed_bits, + .. + } => bitmap.as_ref().map(|b| b.capacity()).unwrap_or(0) + packed_bits.capacity(), + #[cfg(feature = "arrow")] + Self::ArrowSymbol { + bitmap, + dict, + dict_data, + keys, + .. + } => { + bitmap.as_ref().map(|b| b.capacity()).unwrap_or(0) + + dict.capacity() * std::mem::size_of::() + + dict_data.capacity() + + keys.capacity() * std::mem::size_of::() + } + #[cfg(feature = "arrow")] + Self::ArrowArray { bitmap, data, .. } => { + bitmap.as_ref().map(|b| b.capacity()).unwrap_or(0) + data.capacity() + } } } @@ -4483,6 +5615,14 @@ impl QwpWsColumnValues { false } } + #[cfg(feature = "arrow")] + Self::ArrowFixed { .. } + | Self::ArrowVarLen { .. } + | Self::ArrowBool { .. } + | Self::ArrowSymbol { .. } + | Self::ArrowDecimal { .. } + | Self::ArrowGeohash { .. } + | Self::ArrowArray { .. } => false, } } @@ -4539,10 +5679,29 @@ impl QwpWsColumnValues { .saturating_mul(geohash_bytes_per_value(*precision_bits)) } Self::LongArray { data, .. } => data.len(), + #[cfg(feature = "arrow")] + Self::ArrowFixed { values, .. } => values.len(), + #[cfg(feature = "arrow")] + Self::ArrowDecimal { values, .. } => 1 + values.len(), + #[cfg(feature = "arrow")] + Self::ArrowGeohash { values, .. } => 1 + values.len(), + #[cfg(feature = "arrow")] + Self::ArrowVarLen { offsets, data, .. } => offsets.len().saturating_mul(4) + data.len(), + #[cfg(feature = "arrow")] + Self::ArrowBool { packed_bits, .. } => packed_bits.len(), + #[cfg(feature = "arrow")] + Self::ArrowSymbol { keys, .. } => keys.iter().map(|&k| qwp_varint_size(k as u64)).sum(), + #[cfg(feature = "arrow")] + Self::ArrowArray { data, .. } => data.len(), } } fn encode_null_bitmap(&self, row_count: usize, out: &mut Vec) -> crate::Result<()> { + #[cfg(feature = "arrow")] + if let Some(prebuilt) = self.prebuilt_qwp_bitmap(row_count)? { + out.extend_from_slice(prebuilt); + return Ok(()); + } let mut packed = 0u8; let mut bit_idx = 0u8; let mut cursor = self.first_row_cursor(); @@ -4574,6 +5733,43 @@ impl QwpWsColumnValues { Ok(()) } + #[cfg(feature = "arrow")] + fn prebuilt_qwp_bitmap(&self, row_count: usize) -> crate::Result> { + let (bitmap, arrow_rows) = match self { + Self::ArrowFixed { + bitmap, row_count, .. + } + | Self::ArrowVarLen { + bitmap, row_count, .. + } + | Self::ArrowBool { + bitmap, row_count, .. + } + | Self::ArrowSymbol { + bitmap, row_count, .. + } + | Self::ArrowDecimal { + bitmap, row_count, .. + } + | Self::ArrowGeohash { + bitmap, row_count, .. + } + | Self::ArrowArray { + bitmap, row_count, .. + } => (bitmap.as_deref(), *row_count as usize), + _ => return Ok(None), + }; + if arrow_rows != row_count { + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow column row mismatch: arrow holds {} rows, table has {}", + arrow_rows, + row_count + )); + } + Ok(bitmap) + } + fn encode(&self, row_count: usize, globals: &[u64], out: &mut Vec) -> crate::Result<()> { match self { Self::Bool { cells } => { @@ -4885,6 +6081,102 @@ impl QwpWsColumnValues { } Ok(()) } + #[cfg(feature = "arrow")] + Self::ArrowFixed { + values, + row_count: arrow_rows, + .. + } => { + ensure_arrow_row_count(*arrow_rows, row_count)?; + out.extend_from_slice(values); + Ok(()) + } + #[cfg(feature = "arrow")] + Self::ArrowVarLen { + offsets, + data, + row_count: arrow_rows, + .. + } => { + ensure_arrow_row_count(*arrow_rows, row_count)?; + for offset in offsets { + out.extend_from_slice(&offset.to_le_bytes()); + } + out.extend_from_slice(data); + Ok(()) + } + #[cfg(feature = "arrow")] + Self::ArrowBool { + packed_bits, + row_count: arrow_rows, + .. + } => { + ensure_arrow_row_count(*arrow_rows, row_count)?; + out.extend_from_slice(packed_bits); + Ok(()) + } + #[cfg(feature = "arrow")] + Self::ArrowSymbol { + bitmap, + keys, + row_count: arrow_rows, + .. + } => { + ensure_arrow_row_count(*arrow_rows, row_count)?; + for (row_idx, &local_id) in keys.iter().enumerate() { + if let Some(bm) = bitmap.as_deref() + && (bm[row_idx / 8] >> (row_idx % 8)) & 1 == 1 + { + continue; + } + let gid = globals + .get(local_id as usize) + .copied() + .ok_or_else(|| { + error::fmt!( + InvalidApiCall, + "internal QWP/WS encoder error: missing global symbol id for column-local index {}", + local_id + ) + })?; + write_qwp_varint(out, gid); + } + Ok(()) + } + #[cfg(feature = "arrow")] + Self::ArrowDecimal { + values, + decimal_scale, + row_count: arrow_rows, + .. + } => { + ensure_arrow_row_count(*arrow_rows, row_count)?; + out.push(*decimal_scale); + out.extend_from_slice(values); + Ok(()) + } + #[cfg(feature = "arrow")] + Self::ArrowGeohash { + values, + precision_bits, + row_count: arrow_rows, + .. + } => { + ensure_arrow_row_count(*arrow_rows, row_count)?; + write_qwp_varint(out, *precision_bits as u64); + out.extend_from_slice(values); + Ok(()) + } + #[cfg(feature = "arrow")] + Self::ArrowArray { + data, + row_count: arrow_rows, + .. + } => { + ensure_arrow_row_count(*arrow_rows, row_count)?; + out.extend_from_slice(data); + Ok(()) + } } } @@ -4918,6 +6210,14 @@ impl QwpWsColumnValues { Self::Binary { cells, .. } => cells.get(cursor).map(|cell| cell.row_idx), Self::Geohash { cells, .. } => cells.get(cursor).map(|cell| cell.row_idx), Self::LongArray { cells, .. } => cells.get(cursor).map(|cell| cell.row_idx), + #[cfg(feature = "arrow")] + Self::ArrowFixed { .. } + | Self::ArrowVarLen { .. } + | Self::ArrowBool { .. } + | Self::ArrowSymbol { .. } + | Self::ArrowDecimal { .. } + | Self::ArrowGeohash { .. } + | Self::ArrowArray { .. } => None, } } @@ -4934,8 +6234,14 @@ impl QwpWsColumnValues { } #[cfg(feature = "_sender-qwp-ws")] -fn lowercase_ascii_bytes(name: &[u8]) -> Vec { - name.iter().map(|byte| byte.to_ascii_lowercase()).collect() +fn lowercase_name_bytes(name: &[u8], is_ascii: bool) -> Vec { + if is_ascii { + return name.iter().map(|b| b.to_ascii_lowercase()).collect(); + } + match std::str::from_utf8(name) { + Ok(s) => s.to_lowercase().into_bytes(), + Err(_) => name.iter().map(|b| b.to_ascii_lowercase()).collect(), + } } #[cfg(feature = "_sender-qwp-ws")] @@ -4992,15 +6298,8 @@ fn names_equal_lower_ascii(left_lower: &[u8], packed_left_lower: u64, right: &[u } #[cfg(feature = "_sender-qwp-ws")] -fn column_lookup_key(name: &[u8]) -> crate::Result { - let name = std::str::from_utf8(name).map_err(|err| { - error::fmt!( - InvalidApiCall, - "internal QWP/WS column name is not UTF-8: {}", - err - ) - })?; - Ok(name.to_lowercase()) +fn column_lookup_key(name: &[u8]) -> crate::Result> { + Ok(lowercase_name_bytes(name, name.is_ascii()).into_boxed_slice()) } #[cfg(feature = "_sender-qwp-ws")] @@ -5020,6 +6319,478 @@ fn batched_type_change_error_ws(entry_name: &[u8]) -> crate::Error { } #[cfg(feature = "_sender-qwp-ws")] +#[cfg(feature = "arrow")] +#[derive(Debug)] +pub(crate) struct ArrowBulkCtx { + table_idx: usize, + starting_rows: u32, + table_mark: QwpWsTableRollbackMark, + pre_column_marks: Vec, + tables_len_before: usize, +} + +#[cfg(feature = "_sender-qwp-ws")] +#[cfg(feature = "arrow")] +#[derive(Clone, Debug)] +enum ArrowColRollbackMark { + NonArrow { + last_written_row: Option, + non_null_count: u32, + }, + ArrowFixed { + bitmap_len: Option, + values_len: usize, + row_count: u32, + non_null_count: u32, + }, + ArrowVarLen { + bitmap_len: Option, + offsets_len: usize, + data_len: usize, + row_count: u32, + non_null_count: u32, + }, + ArrowBool { + bitmap_len: Option, + packed_bits_len: usize, + row_count: u32, + non_null_count: u32, + }, + ArrowSymbol { + bitmap_len: Option, + dict_len: usize, + dict_data_len: usize, + keys_len: usize, + row_count: u32, + non_null_count: u32, + }, + ArrowDecimal { + bitmap_len: Option, + values_len: usize, + row_count: u32, + non_null_count: u32, + }, + ArrowGeohash { + bitmap_len: Option, + values_len: usize, + row_count: u32, + non_null_count: u32, + }, + ArrowArray { + bitmap_len: Option, + data_len: usize, + row_count: u32, + non_null_count: u32, + }, +} + +#[cfg(feature = "arrow")] +impl QwpWsColumnBuffer { + fn arrow_snapshot(&self) -> ArrowColRollbackMark { + let bitmap_to_len = |b: &Option>| b.as_ref().map(|v| v.len()); + let non_null_count = self.non_null_count; + match &self.values { + QwpWsColumnValues::ArrowFixed { + bitmap, + values, + row_count, + } => ArrowColRollbackMark::ArrowFixed { + bitmap_len: bitmap_to_len(bitmap), + values_len: values.len(), + row_count: *row_count, + non_null_count, + }, + QwpWsColumnValues::ArrowVarLen { + bitmap, + offsets, + data, + row_count, + } => ArrowColRollbackMark::ArrowVarLen { + bitmap_len: bitmap_to_len(bitmap), + offsets_len: offsets.len(), + data_len: data.len(), + row_count: *row_count, + non_null_count, + }, + QwpWsColumnValues::ArrowBool { + bitmap, + packed_bits, + row_count, + } => ArrowColRollbackMark::ArrowBool { + bitmap_len: bitmap_to_len(bitmap), + packed_bits_len: packed_bits.len(), + row_count: *row_count, + non_null_count, + }, + QwpWsColumnValues::ArrowSymbol { + bitmap, + dict, + dict_data, + keys, + row_count, + .. + } => ArrowColRollbackMark::ArrowSymbol { + bitmap_len: bitmap_to_len(bitmap), + dict_len: dict.len(), + dict_data_len: dict_data.len(), + keys_len: keys.len(), + row_count: *row_count, + non_null_count, + }, + QwpWsColumnValues::ArrowDecimal { + bitmap, + values, + row_count, + .. + } => ArrowColRollbackMark::ArrowDecimal { + bitmap_len: bitmap_to_len(bitmap), + values_len: values.len(), + row_count: *row_count, + non_null_count, + }, + QwpWsColumnValues::ArrowGeohash { + bitmap, + values, + row_count, + .. + } => ArrowColRollbackMark::ArrowGeohash { + bitmap_len: bitmap_to_len(bitmap), + values_len: values.len(), + row_count: *row_count, + non_null_count, + }, + QwpWsColumnValues::ArrowArray { + bitmap, + data, + row_count, + } => ArrowColRollbackMark::ArrowArray { + bitmap_len: bitmap_to_len(bitmap), + data_len: data.len(), + row_count: *row_count, + non_null_count, + }, + _ => ArrowColRollbackMark::NonArrow { + last_written_row: self.last_written_row, + non_null_count, + }, + } + } + + fn arrow_restore(&mut self, mark: ArrowColRollbackMark) { + let restore_bitmap = |bitmap: &mut Option>, target: Option| match target { + None => { + *bitmap = None; + } + Some(len) => { + debug_assert!( + bitmap.is_some(), + "arrow_restore: bitmap was Some({}) at snapshot but is None now \ + — invariant violated by a mid-batch reset", + len + ); + if let Some(b) = bitmap.as_mut() { + b.truncate(len); + } + } + }; + match (&mut self.values, mark) { + ( + QwpWsColumnValues::ArrowFixed { + bitmap, + values, + row_count, + }, + ArrowColRollbackMark::ArrowFixed { + bitmap_len, + values_len, + row_count: rc, + non_null_count: nn, + }, + ) => { + restore_bitmap(bitmap, bitmap_len); + values.truncate(values_len); + *row_count = rc; + self.non_null_count = nn; + } + ( + QwpWsColumnValues::ArrowVarLen { + bitmap, + offsets, + data, + row_count, + }, + ArrowColRollbackMark::ArrowVarLen { + bitmap_len, + offsets_len, + data_len, + row_count: rc, + non_null_count: nn, + }, + ) => { + restore_bitmap(bitmap, bitmap_len); + offsets.truncate(offsets_len); + data.truncate(data_len); + *row_count = rc; + self.non_null_count = nn; + } + ( + QwpWsColumnValues::ArrowBool { + bitmap, + packed_bits, + row_count, + }, + ArrowColRollbackMark::ArrowBool { + bitmap_len, + packed_bits_len, + row_count: rc, + non_null_count: nn, + }, + ) => { + restore_bitmap(bitmap, bitmap_len); + packed_bits.truncate(packed_bits_len); + *row_count = rc; + self.non_null_count = nn; + } + ( + QwpWsColumnValues::ArrowSymbol { + bitmap, + dict, + dict_lookup, + dict_data, + keys, + row_count, + }, + ArrowColRollbackMark::ArrowSymbol { + bitmap_len, + dict_len, + dict_data_len, + keys_len, + row_count: rc, + non_null_count: nn, + }, + ) => { + restore_bitmap(bitmap, bitmap_len); + dict.truncate(dict_len); + dict_data.truncate(dict_data_len); + keys.truncate(keys_len); + dict_lookup.retain_local_ids_below(dict_len); + *row_count = rc; + self.non_null_count = nn; + } + ( + QwpWsColumnValues::ArrowDecimal { + bitmap, + values, + row_count, + .. + }, + ArrowColRollbackMark::ArrowDecimal { + bitmap_len, + values_len, + row_count: rc, + non_null_count: nn, + }, + ) => { + restore_bitmap(bitmap, bitmap_len); + values.truncate(values_len); + *row_count = rc; + self.non_null_count = nn; + } + ( + QwpWsColumnValues::ArrowGeohash { + bitmap, + values, + row_count, + .. + }, + ArrowColRollbackMark::ArrowGeohash { + bitmap_len, + values_len, + row_count: rc, + non_null_count: nn, + }, + ) => { + restore_bitmap(bitmap, bitmap_len); + values.truncate(values_len); + *row_count = rc; + self.non_null_count = nn; + } + ( + QwpWsColumnValues::ArrowArray { + bitmap, + data, + row_count, + }, + ArrowColRollbackMark::ArrowArray { + bitmap_len, + data_len, + row_count: rc, + non_null_count: nn, + }, + ) => { + restore_bitmap(bitmap, bitmap_len); + data.truncate(data_len); + *row_count = rc; + self.non_null_count = nn; + } + ( + _, + ArrowColRollbackMark::NonArrow { + last_written_row, + non_null_count, + }, + ) => { + self.last_written_row = last_written_row; + self.non_null_count = non_null_count; + if self.arrow_row_count().is_some() { + self.values = QwpWsColumnValues::new(self.kind); + } + } + _ => { + self.values.clear_rows(); + self.non_null_count = 0; + } + } + } +} + +#[cfg(feature = "arrow")] +#[derive(Clone, Copy, Debug)] +pub(crate) struct ArrowBatchInfo<'a> { + pub bitmap: Option<&'a NullBuffer>, + pub rows: u32, + pub non_null: u32, +} + +#[cfg(feature = "arrow")] +#[derive(Clone, Copy, Debug)] +pub(crate) struct ArrowDecimalSpec { + pub scale: u8, + pub element_width: u8, +} + +#[cfg(feature = "arrow")] +fn fixed_element_width(kind: ColumnKind) -> Option { + Some(match kind { + ColumnKind::I8 => 1, + ColumnKind::I16 | ColumnKind::Char => 2, + ColumnKind::I32 | ColumnKind::F32 | ColumnKind::Ipv4 => 4, + ColumnKind::I64 + | ColumnKind::F64 + | ColumnKind::TimestampMicros + | ColumnKind::TimestampNanos + | ColumnKind::Date => 8, + ColumnKind::Uuid => 16, + ColumnKind::Long256 => 32, + _ => return None, + }) +} + +#[cfg(feature = "arrow")] +fn ensure_arrow_row_count(arrow_rows: u32, expected: usize) -> crate::Result<()> { + if arrow_rows as usize != expected { + return Err(error::fmt!( + InvalidApiCall, + "QWP/WS arrow column row mismatch: arrow={} table={}", + arrow_rows, + expected + )); + } + Ok(()) +} + +#[cfg(feature = "arrow")] +fn arrow_bulk_mixing_error(column_name: &[u8]) -> crate::Error { + error::fmt!( + InvalidApiCall, + "column '{}' has row-by-row writes; cannot switch to bulk arrow write within the same batch", + String::from_utf8_lossy(column_name) + ) +} + +// Arrow validity is valid=1; QWP wants null=1. OR-with-NOT inverts; the +// trailing-byte mask prevents setting nulls past `incoming_rows`. +#[cfg(feature = "arrow")] +fn extend_qwp_bitmap( + existing: &mut Option>, + existing_rows: usize, + incoming: Option<&NullBuffer>, + incoming_rows: usize, +) { + let total_rows = existing_rows + incoming_rows; + if existing.is_none() && incoming.is_none() { + return; + } + let total_bytes = total_rows.div_ceil(8); + let mut bm = existing + .take() + .unwrap_or_else(|| vec![0u8; existing_rows.div_ceil(8)]); + if bm.len() < total_bytes { + bm.resize(total_bytes, 0); + } + if let Some(nulls) = incoming + && nulls.null_count() > 0 + { + let arrow_offset_bits = nulls.offset(); + let src_off_byte = arrow_offset_bits / 8; + let shift = arrow_offset_bits % 8; + if shift == 0 && existing_rows.is_multiple_of(8) { + // Byte-aligned source AND byte-aligned destination: straight + // bitwise NOT into place. + let src = nulls.validity(); + let dst_off = existing_rows / 8; + let full_bytes = incoming_rows / 8; + for i in 0..full_bytes { + bm[dst_off + i] |= !src[src_off_byte + i]; + } + let trailing = incoming_rows % 8; + if trailing != 0 { + let mask = (1u8 << trailing) - 1; + bm[dst_off + full_bytes] |= (!src[src_off_byte + full_bytes]) & mask; + } + } else if existing_rows.is_multiple_of(8) { + // Bit-misaligned source (Polars slice at non-byte boundary), + // byte-aligned destination: shift-and-OR pass. Each destination + // byte combines the high (8 - shift) bits of one source byte + // with the low `shift` bits of the next, then is bitwise-NOTted. + let src = nulls.validity(); + let dst_off = existing_rows / 8; + let full_bytes = incoming_rows / 8; + let inv_shift = 8 - shift; + for i in 0..full_bytes { + let lo = src[src_off_byte + i] >> shift; + let hi = src[src_off_byte + i + 1] << inv_shift; + bm[dst_off + i] |= !(lo | hi); + } + let trailing = incoming_rows % 8; + if trailing != 0 { + let mask = (1u8 << trailing) - 1; + // The last byte may need one or two source bytes depending on + // whether the trailing window crosses a source byte boundary. + let lo = src[src_off_byte + full_bytes] >> shift; + let needs_next = shift + trailing > 8; + let merged = if needs_next { + lo | (src[src_off_byte + full_bytes + 1] << inv_shift) + } else { + lo + }; + bm[dst_off + full_bytes] |= (!merged) & mask; + } + } else { + // Non-byte-aligned destination — rare (would require a prior + // batch with a non-multiple-of-8 row count). Stay on the + // per-row loop. + for i in 0..incoming_rows { + if nulls.is_null(i) { + let target = existing_rows + i; + bm[target / 8] |= 1 << (target % 8); + } + } + } + } + *existing = Some(bm); +} + fn type_mismatch_error_ws(entry_name: &[u8]) -> crate::Error { batched_type_change_error_ws(entry_name) } diff --git a/questdb-rs/src/ingress/polars.rs b/questdb-rs/src/ingress/polars.rs new file mode 100644 index 00000000..6b31408a --- /dev/null +++ b/questdb-rs/src/ingress/polars.rs @@ -0,0 +1,557 @@ +//! Polars sub-feature: convert a [`DataFrame`] into Arrow +//! [`RecordBatch`]es for consumption by [`Buffer::append_arrow`]. +//! +//! [`dataframe_to_batches`] is the primary entry point. It returns an +//! iterator that yields slices of at most `max_rows` rows each. Each +//! emitted slice is taken from a single polars chunk per column. The +//! conversion cost depends on the dtype: +//! +//! * **Primitive, String, Binary, Decimal at the newest compat level**: +//! the per-chunk Arrow C Data Interface handoff is a pure refcount +//! bump and the per-batch slice is zero-copy. +//! * **`Column::Scalar` columns**: materialised once by polars (cached +//! in the column's `OnceLock`); subsequent batches slice that cache +//! zero-copy. Sending a scalar as columnar data requires the value to +//! exist in memory N times — there is no zero-copy alternative. +//! * **Polars *logical* dtypes that arrow-rs lacks natively** (Datetime, +//! Date, Time, Duration, Categorical, Enum): incur a `cast_default` +//! per chunk per emitted batch. The converted Arrow chunk is cached +//! only for the lifetime of the current chunk within the iterator +//! (not across `dataframe_to_batches` calls or across chunk +//! boundaries within one call), so a multi-chunk DataFrame with +//! timestamp/categorical columns re-pays the cast each time the +//! iterator crosses a chunk boundary. Acceptable for typical batch +//! sizes (10 K rows ≈ µs of cast vs ms of wire send) but worth +//! knowing if you slice into many small batches. +//! +//! # Per-chunk dtype stability +//! +//! `Categorical` (and other dictionary-backed) columns may emit +//! different Arrow value dtypes across chunks (e.g. `Utf8` vs +//! `LargeUtf8`) depending on per-chunk statistics. The iterator pins +//! the first chunk's dtype as the wire schema and rejects subsequent +//! chunks whose dtype differs with [`ErrorCode::ArrowIngest`]. To +//! avoid this, rechunk via `DataFrame::rechunk()` before calling +//! `dataframe_to_batches`, or cast Categorical columns to plain +//! `String` upstream. +//! +//! [`ErrorCode::ArrowIngest`]: crate::ErrorCode::ArrowIngest +//! +//! Flushing is the caller's responsibility: +//! +//! ```ignore +//! for rb in questdb::ingress::polars::dataframe_to_batches(&df, None) { +//! let rb = rb?; +//! buf.append_arrow(table, &rb)?; +//! sender.flush(&mut buf)?; +//! } +//! ``` +//! +//! [`Buffer::append_arrow`]: crate::ingress::Buffer::append_arrow + +use std::num::NonZeroUsize; +use std::sync::Arc; + +use arrow_array::{ArrayRef, RecordBatch}; +use arrow_schema::{Field, Schema as ArrowSchema}; +use polars::frame::DataFrame; +use polars::prelude::{Column, CompatLevel, Series}; + +use crate::{Result, fmt}; + +/// Suggested default chunk size for [`dataframe_to_batches`]. +pub const DEFAULT_MAX_BATCH_ROWS: usize = 10_000; + +// Both crates are `#[repr(C)]` impls of the same Arrow C Data Interface +// struct; size/align pinned by the spec, field order verified by the +// `dataframe_round_trip_*` tests. Re-validate on `polars-arrow` bumps. +const _: () = assert!( + std::mem::size_of::() + == std::mem::size_of::(), +); +const _: () = assert!( + std::mem::size_of::() + == std::mem::size_of::(), +); +const _: () = assert!( + std::mem::align_of::() + == std::mem::align_of::(), +); +const _: () = assert!( + std::mem::align_of::() + == std::mem::align_of::(), +); + +/// SAFETY: layout-identical `#[repr(C)]` Arrow C Data Interface structs; +/// release-callback ownership transfers — caller must not reuse input. +#[inline] +unsafe fn pa_array_into_rs(pa: polars_arrow::ffi::ArrowArray) -> arrow::ffi::FFI_ArrowArray { + unsafe { std::mem::transmute::(pa) } +} + +/// SAFETY: see [`pa_array_into_rs`]. +#[inline] +unsafe fn pa_schema_into_rs(pa: polars_arrow::ffi::ArrowSchema) -> arrow::ffi::FFI_ArrowSchema { + unsafe { + std::mem::transmute::(pa) + } +} + +/// SAFETY: see [`pa_array_into_rs`]. +#[inline] +pub(crate) unsafe fn rs_array_into_pa( + rs: arrow::ffi::FFI_ArrowArray, +) -> polars_arrow::ffi::ArrowArray { + unsafe { std::mem::transmute::(rs) } +} + +/// SAFETY: see [`pa_array_into_rs`]. +#[inline] +pub(crate) unsafe fn rs_schema_into_pa( + rs: arrow::ffi::FFI_ArrowSchema, +) -> polars_arrow::ffi::ArrowSchema { + unsafe { + std::mem::transmute::(rs) + } +} + +/// Yield [`RecordBatch`] slices of `df`, each capped at `max_rows` +/// rows. `None` uses [`DEFAULT_MAX_BATCH_ROWS`]. Every emitted slice +/// is taken from a single polars chunk per column, so row data is +/// shared via the Arrow C Data Interface and never copied. Conversion +/// errors surface through the iterator's `Item` rather than the +/// constructor. +pub fn dataframe_to_batches( + df: &DataFrame, + max_rows: Option, +) -> DataFrameBatches<'_> { + let max_rows = max_rows.map_or(DEFAULT_MAX_BATCH_ROWS, NonZeroUsize::get); + let compat = CompatLevel::newest(); + let cursors: Vec> = df + .columns() + .iter() + .map(|c| ColumnCursor::new(c, compat)) + .collect(); + DataFrameBatches { + max_rows, + compat, + total_rows: df.height(), + rows_emitted: 0, + cursors, + schema: None, + poisoned: false, + } +} + +/// Iterator returned by [`dataframe_to_batches`]. One-shot error +/// contract: a `Some(Err(_))` poisons the iterator; subsequent +/// `next()` returns `None`. +pub struct DataFrameBatches<'a> { + max_rows: usize, + compat: CompatLevel, + total_rows: usize, + rows_emitted: usize, + cursors: Vec>, + schema: Option>, + poisoned: bool, +} + +struct ColumnCursor<'a> { + name: String, + series: &'a Series, + pa_field: polars_arrow::datatypes::Field, + chunk_lengths: Vec, + chunk_idx: usize, + offset_in_chunk: usize, + current: Option>, +} + +impl<'a> ColumnCursor<'a> { + fn new(column: &'a Column, compat: CompatLevel) -> Self { + let series = column.as_materialized_series(); + let pa_field = polars_arrow::datatypes::Field::new( + series.name().clone(), + series.dtype().to_arrow(compat), + true, + ); + Self { + name: column.name().as_str().to_string(), + series, + pa_field, + chunk_lengths: series.chunk_lengths().collect(), + chunk_idx: 0, + offset_in_chunk: 0, + current: None, + } + } + + fn skip_empty_chunks(&mut self) { + while self.chunk_idx < self.chunk_lengths.len() && self.chunk_lengths[self.chunk_idx] == 0 { + self.chunk_idx += 1; + self.offset_in_chunk = 0; + self.current = None; + } + } + + fn remaining_in_chunk(&self) -> usize { + if self.chunk_idx >= self.chunk_lengths.len() { + return 0; + } + self.chunk_lengths[self.chunk_idx] - self.offset_in_chunk + } + + fn current_chunk(&mut self, compat: CompatLevel) -> &dyn polars_arrow::array::Array { + let chunk_idx = self.chunk_idx; + let series = self.series; + let boxed = self + .current + .get_or_insert_with(|| series.to_arrow(chunk_idx, compat)); + &**boxed + } + + fn advance(&mut self, n: usize) { + self.offset_in_chunk += n; + if self.offset_in_chunk >= self.chunk_lengths[self.chunk_idx] { + self.chunk_idx += 1; + self.offset_in_chunk = 0; + self.current = None; + } + } +} + +impl Iterator for DataFrameBatches<'_> { + type Item = Result; + + fn next(&mut self) -> Option { + if self.poisoned || self.cursors.is_empty() || self.rows_emitted >= self.total_rows { + return None; + } + for cursor in &mut self.cursors { + cursor.skip_empty_chunks(); + } + let mut seg_len = self.max_rows; + for cursor in &self.cursors { + seg_len = seg_len.min(cursor.remaining_in_chunk()); + } + if seg_len == 0 { + return None; + } + let compat = self.compat; + let need_schema = self.schema.is_none(); + let mut fields: Vec = if need_schema { + Vec::with_capacity(self.cursors.len()) + } else { + Vec::new() + }; + let mut arrays: Vec = Vec::with_capacity(self.cursors.len()); + for cursor in &mut self.cursors { + let offset = cursor.offset_in_chunk; + let sliced = cursor.current_chunk(compat).sliced(offset, seg_len); + let array_data = match ffi_polars_to_arrow_rs(&cursor.pa_field, sliced, &cursor.name) { + Ok(d) => d, + Err(e) => { + self.poisoned = true; + return Some(Err(e)); + } + }; + if need_schema { + fields.push(Field::new( + cursor.name.clone(), + array_data.data_type().clone(), + true, + )); + } + arrays.push(arrow_array::make_array(array_data)); + } + let schema = match &self.schema { + Some(s) => s.clone(), + None => { + let s = Arc::new(ArrowSchema::new(fields)); + self.schema = Some(s.clone()); + s + } + }; + let rb = match RecordBatch::try_new(schema, arrays) { + Ok(rb) => rb, + Err(e) => { + self.poisoned = true; + return Some(Err(fmt!(ArrowIngest, "RecordBatch::try_new failed: {}", e))); + } + }; + for cursor in &mut self.cursors { + cursor.advance(seg_len); + } + self.rows_emitted += seg_len; + Some(Ok(rb)) + } +} + +fn ffi_polars_to_arrow_rs( + pa_field: &polars_arrow::datatypes::Field, + pa_array_box: Box, + col_name: &str, +) -> Result { + let pa_schema = polars_arrow::ffi::export_field_to_c(pa_field); + let pa_array = polars_arrow::ffi::export_array_to_c(pa_array_box); + let rs_schema = unsafe { pa_schema_into_rs(pa_schema) }; + let rs_array = unsafe { pa_array_into_rs(pa_array) }; + unsafe { arrow::ffi::from_ffi(rs_array, &rs_schema) } + .map_err(|e| fmt!(ArrowIngest, "from_ffi('{}'): {}", col_name, e)) +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::Int64Array; + use arrow_array::cast::AsArray; + use arrow_array::types::Int64Type; + use polars::prelude::{IntoColumn, NamedFrom, PlSmallStr, Series}; + + const TWO: NonZeroUsize = NonZeroUsize::new(2).unwrap(); + const HUNDRED: NonZeroUsize = NonZeroUsize::new(100).unwrap(); + const THOUSAND: NonZeroUsize = NonZeroUsize::new(1000).unwrap(); + + fn make_df() -> DataFrame { + let i = Series::new(PlSmallStr::from("i"), &[1i64, 2, 3]).into_column(); + let f = Series::new(PlSmallStr::from("f"), &[1.5f64, 2.5, 3.5]).into_column(); + let s = Series::new(PlSmallStr::from("s"), &["a", "b", "c"]).into_column(); + DataFrame::new(3, vec![i, f, s]).unwrap() + } + + fn collect_ok(it: DataFrameBatches<'_>) -> Vec { + it.map(|rb| rb.expect("conversion failed")).collect() + } + + fn one_batch(df: &DataFrame) -> RecordBatch { + let mut batches = collect_ok(dataframe_to_batches(df, None)); + assert_eq!(batches.len(), 1); + batches.pop().unwrap() + } + + #[test] + fn dataframe_to_batches_preserves_columns_and_height() { + let df = make_df(); + let rb = one_batch(&df); + assert_eq!(rb.num_columns(), 3); + assert_eq!(rb.num_rows(), 3); + assert_eq!(rb.schema().field(0).name(), "i"); + assert_eq!(rb.schema().field(1).name(), "f"); + assert_eq!(rb.schema().field(2).name(), "s"); + } + + #[test] + fn dataframe_round_trip_int_values_match() { + let df = make_df(); + let rb = one_batch(&df); + let back = crate::egress::arrow::polars::record_batch_to_dataframe(rb).unwrap(); + let series = back.columns()[0].as_materialized_series(); + let i64s = series.i64().unwrap(); + assert_eq!(i64s.get(0), Some(1)); + assert_eq!(i64s.get(1), Some(2)); + assert_eq!(i64s.get(2), Some(3)); + } + + #[test] + fn dataframe_round_trip_string_values_match() { + let df = make_df(); + let rb = one_batch(&df); + let back = crate::egress::arrow::polars::record_batch_to_dataframe(rb).unwrap(); + let series = back.columns()[2].as_materialized_series(); + let s = series.str().unwrap(); + assert_eq!(s.get(0), Some("a")); + assert_eq!(s.get(1), Some("b")); + assert_eq!(s.get(2), Some("c")); + } + + #[test] + fn dataframe_to_batches_yields_capped_slices() { + let df = make_df(); + let batches = collect_ok(dataframe_to_batches(&df, Some(TWO))); + assert_eq!(batches.len(), 2); + assert_eq!(batches[0].num_rows(), 2); + assert_eq!(batches[1].num_rows(), 1); + } + + #[test] + fn dataframe_to_batches_default_max_rows_when_none() { + let df = make_df(); + let batches = collect_ok(dataframe_to_batches(&df, None)); + assert_eq!(batches.len(), 1); + assert_eq!(batches[0].num_rows(), 3); + } + + #[test] + fn dataframe_to_batches_single_yield_when_under_max() { + let df = make_df(); + let batches = collect_ok(dataframe_to_batches(&df, Some(HUNDRED))); + assert_eq!(batches.len(), 1); + assert_eq!(batches[0].num_rows(), 3); + } + + #[test] + fn dataframe_to_batches_chunk_aligned_is_zero_copy() { + let mut left = DataFrame::new( + 2, + vec![Series::new(PlSmallStr::from("i"), &[10i64, 20]).into_column()], + ) + .unwrap(); + let right = DataFrame::new( + 2, + vec![Series::new(PlSmallStr::from("i"), &[30i64, 40]).into_column()], + ) + .unwrap(); + left.vstack_mut(&right).unwrap(); + assert_eq!(left.columns()[0].n_chunks(), 2); + + let polars_chunks: Vec<*const i64> = { + let s = left.columns()[0].as_materialized_series(); + (0..s.n_chunks()) + .map(|i| { + let arr = &s.chunks()[i]; + let prim: &polars_arrow::array::PrimitiveArray = + arr.as_any().downcast_ref().unwrap(); + prim.values().as_slice().as_ptr() + }) + .collect() + }; + + let batches = collect_ok(dataframe_to_batches(&left, Some(THOUSAND))); + assert_eq!(batches.len(), 2); + for (idx, rb) in batches.iter().enumerate() { + assert_eq!(rb.num_rows(), 2); + let col: &Int64Array = rb.column(0).as_primitive::(); + assert_eq!(col.values().as_ptr(), polars_chunks[idx]); + } + } + + #[test] + fn dataframe_to_batches_chunk_aligned_splits_within_chunk() { + let mut left = DataFrame::new( + 3, + vec![Series::new(PlSmallStr::from("i"), &[1i64, 2, 3]).into_column()], + ) + .unwrap(); + let right = DataFrame::new( + 3, + vec![Series::new(PlSmallStr::from("i"), &[4i64, 5, 6]).into_column()], + ) + .unwrap(); + left.vstack_mut(&right).unwrap(); + + let batches = collect_ok(dataframe_to_batches(&left, Some(TWO))); + let lens: Vec = batches.iter().map(|rb| rb.num_rows()).collect(); + assert_eq!(lens, vec![2, 1, 2, 1]); + } + + #[test] + fn dataframe_to_batches_misaligned_chunks_zero_copy() { + let a1 = Series::new(PlSmallStr::from("a"), &[1i64, 2]); + let a2 = Series::new(PlSmallStr::from("a"), &[3i64, 4]); + let b = Series::new(PlSmallStr::from("b"), &[10i64, 20, 30, 40]); + let mut left = + DataFrame::new(2, vec![a1.into_column(), b.slice(0, 2).into_column()]).unwrap(); + let right = DataFrame::new(2, vec![a2.into_column(), b.slice(2, 2).into_column()]).unwrap(); + left.vstack_mut(&right).unwrap(); + left.with_column(b.into_column()).unwrap(); + assert_ne!( + left.columns()[0] + .as_materialized_series() + .chunk_lengths() + .collect::>(), + left.columns()[1] + .as_materialized_series() + .chunk_lengths() + .collect::>(), + ); + + let b_chunk_ptr = { + let s = left.columns()[1].as_materialized_series(); + let arr = &s.chunks()[0]; + let prim: &polars_arrow::array::PrimitiveArray = + arr.as_any().downcast_ref().unwrap(); + prim.values().as_slice().as_ptr() + }; + + let batches = collect_ok(dataframe_to_batches(&left, Some(THOUSAND))); + assert_eq!(batches.len(), 2); + let a0: &Int64Array = batches[0].column(0).as_primitive::(); + let b0: &Int64Array = batches[0].column(1).as_primitive::(); + let a1: &Int64Array = batches[1].column(0).as_primitive::(); + let b1: &Int64Array = batches[1].column(1).as_primitive::(); + assert_eq!(a0.values().as_ref(), &[1, 2]); + assert_eq!(b0.values().as_ref(), &[10, 20]); + assert_eq!(a1.values().as_ref(), &[3, 4]); + assert_eq!(b1.values().as_ref(), &[30, 40]); + assert_eq!(b0.values().as_ptr(), b_chunk_ptr); + assert_eq!(b1.values().as_ptr(), unsafe { b_chunk_ptr.add(2) }); + } + + #[test] + fn dataframe_to_batches_scalar_column_materialises_once() { + use polars::prelude::Scalar; + let values = Series::new(PlSmallStr::from("v"), &[1i64, 2, 3, 4]); + let scalar = Column::new_scalar(PlSmallStr::from("k"), Scalar::from(7i64), 4); + let df = DataFrame::new(4, vec![values.into_column(), scalar]).unwrap(); + + let batches = collect_ok(dataframe_to_batches(&df, Some(TWO))); + assert_eq!(batches.len(), 2); + for rb in &batches { + assert_eq!(rb.num_rows(), 2); + let k: &Int64Array = rb.column(1).as_primitive::(); + assert_eq!(k.values().as_ref(), &[7, 7]); + } + + let materialised_ptr = { + let s = df.columns()[1].as_materialized_series(); + let arr = &s.chunks()[0]; + let prim: &polars_arrow::array::PrimitiveArray = + arr.as_any().downcast_ref().unwrap(); + prim.values().as_slice().as_ptr() + }; + let k0: &Int64Array = batches[0].column(1).as_primitive::(); + let k1: &Int64Array = batches[1].column(1).as_primitive::(); + assert_eq!(k0.values().as_ptr(), materialised_ptr); + assert_eq!(k1.values().as_ptr(), unsafe { materialised_ptr.add(2) }); + } + + #[test] + fn polars_categorical_routes_through_dictionary_to_symbol() { + use crate::ingress::{Buffer, TableName}; + use arrow_schema::DataType as ArrowDataType; + use polars::prelude::{CategoricalPhysical, Categories, DataType as PlDataType}; + + // Polars Categorical → arrow Dictionary(UInt32, LargeUtf8) + let cats = Categories::new( + PlSmallStr::from("syms"), + PlSmallStr::from("test"), + CategoricalPhysical::U32, + ); + let mapping = cats.mapping(); + let dtype = PlDataType::Categorical(cats, mapping); + + let strings = Series::new(PlSmallStr::from("c"), &["A", "B", "A", "C"]); + let cat_series = strings.cast(&dtype).unwrap(); + assert!(matches!(cat_series.dtype(), PlDataType::Categorical(_, _))); + + let df = DataFrame::new(4, vec![cat_series.into_column()]).unwrap(); + let batches = collect_ok(dataframe_to_batches(&df, None)); + assert_eq!(batches.len(), 1); + let rb = &batches[0]; + + // Arrow side must be Dictionary-encoded for the SYMBOL routing to kick in. + assert!( + matches!( + rb.schema().field(0).data_type(), + ArrowDataType::Dictionary(_, _) + ), + "expected Dictionary column, got {:?}", + rb.schema().field(0).data_type() + ); + + // Buffer::append_arrow classifies Dictionary → SymbolDict → SYMBOL wire. + let mut buf = Buffer::qwp_ws_with_max_name_len(127); + let t = TableName::new("polars_cat_sym").unwrap(); + buf.append_arrow(t, rb).unwrap(); + assert_eq!(buf.row_count(), 4); + } +} diff --git a/questdb-rs/src/tests/qwp_ws.rs b/questdb-rs/src/tests/qwp_ws.rs index c4d8d5e1..1ea46dbd 100644 --- a/questdb-rs/src/tests/qwp_ws.rs +++ b/questdb-rs/src/tests/qwp_ws.rs @@ -4127,8 +4127,11 @@ fn qwp_ws_from_conf_parses_java_reconnect_keys() { let zone_ignored = "qwpws::addr=localhost:9000;zone=dc-amsterdam;"; SenderBuilder::from_conf(zone_ignored).unwrap(); - let tcp_zone = "tcp::addr=localhost:9009;zone=dc-amsterdam;"; - SenderBuilder::from_conf(tcp_zone).unwrap(); + #[cfg(feature = "sync-sender-tcp")] + { + let tcp_zone = "tcp::addr=localhost:9009;zone=dc-amsterdam;"; + SenderBuilder::from_conf(tcp_zone).unwrap(); + } // Java Sender ignores unknown keys; this is parser compatibility, not // target-selection support. @@ -4163,13 +4166,16 @@ fn qwp_ws_from_conf_parses_java_reconnect_keys() { let err = SenderBuilder::from_conf(zero_port).unwrap_err(); assert!(err.msg().contains("invalid port"), "got: {}", err.msg()); - let repeated_tcp_addr = "tcp::addr=localhost:9009;addr=localhost:9010;"; - let err = SenderBuilder::from_conf(repeated_tcp_addr).unwrap_err(); - assert!( - err.msg().contains("DuplicateKey") || err.msg().contains("duplicate"), - "got: {}", - err.msg() - ); + #[cfg(feature = "sync-sender-tcp")] + { + let repeated_tcp_addr = "tcp::addr=localhost:9009;addr=localhost:9010;"; + let err = SenderBuilder::from_conf(repeated_tcp_addr).unwrap_err(); + assert!( + err.msg().contains("DuplicateKey") || err.msg().contains("duplicate"), + "got: {}", + err.msg() + ); + } let conf_async = "qwpws::addr=localhost:9000;initial_connect_retry=async;"; SenderBuilder::from_conf(conf_async).unwrap(); diff --git a/questdb-rs/tests/qwp_egress_bounds_fuzz.rs b/questdb-rs/tests/qwp_egress_bounds_fuzz.rs index 22a293a8..7afbd868 100644 --- a/questdb-rs/tests/qwp_egress_bounds_fuzz.rs +++ b/questdb-rs/tests/qwp_egress_bounds_fuzz.rs @@ -319,9 +319,12 @@ fn write_geohash(out: &mut Vec, rng: &mut SplitMix64, row_count: usize) { fn write_decimal(out: &mut Vec, rng: &mut SplitMix64, row_count: usize, elem_size: usize) { let non_null = write_validity(out, rng, row_count); - // Decimal scale must be in `0..=MAX_DECIMAL_SCALE` (38 per - // `egress::binds::MAX_DECIMAL_SCALE`). Stay well inside. - let scale: u8 = (rng.next_u64() % 20) as u8; + let max_scale: u64 = match elem_size { + 8 => 18, + 16 => 38, + _ => 38, + }; + let scale: u8 = (rng.next_u64() % (max_scale + 1)) as u8; out.push(scale); write_random_bytes(out, rng, non_null * elem_size); } diff --git a/system_test/arrow_alignment_fuzz.py b/system_test/arrow_alignment_fuzz.py new file mode 100644 index 00000000..5c4e7f41 --- /dev/null +++ b/system_test/arrow_alignment_fuzz.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +import os +import sys +import unittest +from typing import Dict, List, Tuple + +import pyarrow as pa + +import arrow_fuzz_common as afc +from arrow_fuzz_common import KIND_REGISTRY, KindSpec + +_ITERATIONS = int(os.environ.get("ARROW_ALIGNMENT_FUZZ_ITERATIONS", "4")) +_ROWS_PER_ITER = int(os.environ.get("ARROW_ALIGNMENT_FUZZ_ROWS", "16")) + +# Each program forces a different pad-byte sum before the target +# column, exercising different residues mod each primitive width +# (1/2/4/8/16/32) on the wire. +_PAD_PROGRAM: List[List[str]] = [ + [], + ["boolean"], + ["byte"], + ["byte", "short"], + ["byte", "short", "int"], + ["byte", "short", "int", "long"], + ["short", "char"], + ["uuid", "byte"], + ["long256", "byte"], +] + +_TARGET_ROTATION = ["long", "double", "uuid", "long256", "timestamp"] + + +def _exercise_compute_kernels(rb: pa.RecordBatch, kinds: List[Tuple[str, KindSpec]]) -> None: + import pyarrow.compute as pc + for col_idx, (_, spec) in enumerate(kinds): + col = rb.column(col_idx) + name = spec.name + if name in {"boolean"}: + true_count = pc.sum(pc.cast(col, "int64")).as_py() or 0 + assert 0 <= int(true_count) <= rb.num_rows + elif name in {"byte", "short", "int", "long", "char", "ipv4"}: + total = pc.sum(pc.cast(col, "int64")).as_py() + min_v = pc.min(pc.cast(col, "int64")).as_py() + max_v = pc.max(pc.cast(col, "int64")).as_py() + assert total is not None + assert min_v is not None and max_v is not None + assert min_v <= max_v + elif name in {"float", "double"}: + total = pc.sum(col).as_py() + assert total is not None + elif name in {"uuid", "long256"}: + assert col.type.byte_width in (16, 32) + elif name in {"timestamp", "timestamp_ns", "date"}: + min_v = pc.min(col).as_py() + max_v = pc.max(col).as_py() + assert min_v is not None and max_v is not None + + +def _populate_via_ilp(sender, table: str, kinds, values_per_col, ts_base_us: int) -> None: + n = len(next(iter(values_per_col.values()))) + ordered = sorted(kinds, key=lambda kv: 0 if kv[1].name == "symbol" else 1) + for r in range(n): + sender.table(table) + for col_name, spec in ordered: + v = values_per_col[col_name][r] + if v is None: + continue + spec.ilp_set(sender, col_name, v) + sender.at_micros(ts_base_us + r) + sender.flush() + + +def _read_back(fixture, table: str, kinds) -> pa.RecordBatch: + cols_sql = ", ".join(f'"{c}"' for c, _ in kinds) + return afc.read_back_arrow_concat( + fixture, f"select {cols_sql} from '{table}' order by ts" + ) + + +class TestArrowAlignment(afc.ArrowFuzzBase): + SUITE_LABEL = "arrow_alignment_fuzz" + + def _run_program(self, iter_idx: int, kind_order: List[str]): + table = self.fresh_table(f"arrow_aln_{iter_idx}") + kinds = [(f"c{i}_{n}", KIND_REGISTRY[n]) for i, n in enumerate(kind_order)] + afc.create_table_from_kinds(self._fixture, table, kinds) + n = _ROWS_PER_ITER + rnd = self._master_rng + values_per_col: Dict[str, list] = {} + for col_name, spec in kinds: + mask = afc.all_valid_mask(n) + values_per_col[col_name] = spec.generate_values(rnd, n, mask, edge=False) + with afc.existing_sender(self._fixture) as sender: + _populate_via_ilp(sender, table, kinds, values_per_col, + ts_base_us=1_700_000_000_000_000 + iter_idx * 1_000_000) + afc.wait_for_rows(self._fixture, table, n) + rb = _read_back(self._fixture, table, kinds) + self.assertEqual(rb.num_rows, n, self.label()) + return rb, kinds + + def test_misalignment_schedule_imports_and_computes(self): + for it in range(_ITERATIONS): + for prog_idx, pad in enumerate(_PAD_PROGRAM): + with self.subTest(iter=it, prog_idx=prog_idx): + target = _TARGET_ROTATION[prog_idx % len(_TARGET_ROTATION)] + kind_order = pad + [target] + rb, kinds = self._run_program(prog_idx + it * len(_PAD_PROGRAM), + kind_order) + _exercise_compute_kernels(rb, kinds) + + +def register(loop_registry): + loop_registry.append(TestArrowAlignment) + + +if __name__ == "__main__": + print( + "Note: arrow_alignment_fuzz tests require a live QuestDB fixture. " + "Run via `python test.py run --existing HOST:ILP:HTTP TestArrowAlignment`.", + file=sys.stderr, + ) + unittest.main() diff --git a/system_test/arrow_egress_fuzz.py b/system_test/arrow_egress_fuzz.py new file mode 100644 index 00000000..e59bbf56 --- /dev/null +++ b/system_test/arrow_egress_fuzz.py @@ -0,0 +1,304 @@ +from __future__ import annotations + +import os +import sys +import unittest +from typing import List, Tuple + +import pyarrow as pa + +import arrow_fuzz_common as afc +from arrow_fuzz_common import KIND_REGISTRY, KindSpec + +_FUZZ_ITERATIONS = int(os.environ.get("ARROW_EGRESS_FUZZ_ITERATIONS", "6")) +_ROWS_PER_BATCH = int(os.environ.get("ARROW_EGRESS_FUZZ_ROWS", "16")) + + +def _ilp_capable_kinds() -> List[Tuple[str, KindSpec]]: + return [(k, s) for k, s in KIND_REGISTRY.items() if s.supports_ilp_setter] + + +def _populate_table_via_ilp(sender, table: str, kinds, values_per_col, ts_base_us: int) -> None: + n = len(next(iter(values_per_col.values()))) if values_per_col else 0 + ordered = sorted(kinds, key=lambda kv: 0 if kv[1].name == "symbol" else 1) + for r in range(n): + sender.table(table) + wrote_any = False + for col_name, spec in ordered: + v = values_per_col[col_name][r] + if v is None: + continue + spec.ilp_set(sender, col_name, v) + wrote_any = True + if not wrote_any: + sender.column("_keep", True) + sender.at_micros(ts_base_us + r) + sender.flush() + +def _read_back_arrow(fixture, table: str, kinds) -> pa.RecordBatch: + cols_sql = ", ".join(f'"{c}"' for c, _ in kinds) + sql = f"select {cols_sql} from '{table}' order by ts" + return afc.read_back_arrow_concat(fixture, sql) + +def _ingest_and_read_back(testcase, table: str, kinds, *, null_mode: str + ) -> Tuple[pa.RecordBatch, dict]: + afc.create_table_from_kinds(testcase._fixture, table, kinds) + rnd = testcase._master_rng + n = _ROWS_PER_BATCH + values_per_col: dict = {} + for col_name, spec in kinds: + if null_mode == "valid": + mask = afc.all_valid_mask(n) + edge = False + elif null_mode == "partial": + mask = afc.partial_null_mask(rnd, n, null_p=0.3) + edge = False + elif null_mode == "all_null": + mask = afc.all_null_mask(n) + edge = False + elif null_mode == "edge": + mask = afc.all_valid_mask(n) + edge = True + else: + raise ValueError(null_mode) + values_per_col[col_name] = spec.generate_values(rnd, n, mask, edge=edge) + ts_base = 1_700_000_000_000_000 + rnd.next_int(1_000_000) + with afc.existing_sender(testcase._fixture) as sender: + _populate_table_via_ilp(sender, table, kinds, values_per_col, ts_base) + afc.wait_for_rows(testcase._fixture, table, n) + rb = _read_back_arrow(testcase._fixture, table, kinds) + return rb, values_per_col + +def _build_expected_arrow(kinds, values_per_col, num_rows: int) -> pa.RecordBatch: + arrays = [] + fields = [] + for col_name, spec in kinds: + arr = spec.build_arrow_array(values_per_col[col_name]) + arrays.append(arr) + fields.append(spec.make_field(col_name)) + return pa.RecordBatch.from_arrays(arrays, schema=pa.schema(fields)) + +class TestArrowEgressPerKind(afc.ArrowFuzzBase): + """One test method per kind covering all four null modes via sub-tests.""" + + SUITE_LABEL = "arrow_egress_per_kind" + + def _exercise_kind(self, kind_name: str) -> None: + spec = KIND_REGISTRY[kind_name] + if not spec.supports_ilp_setter: + self.skipTest(f"kind {kind_name!r} has no ILP setter (Arrow-ingest only)") + modes = ["valid", "edge"] + if spec.supports_server_null: + modes[1:1] = ["partial", "all_null"] + for null_mode in modes: + with self.subTest(null_mode=null_mode): + table = self.fresh_table(f"arrow_eg_{kind_name}_{null_mode}") + kinds = [(f"c_{kind_name}", spec)] + rb, values_per_col = _ingest_and_read_back( + self, table, kinds, null_mode=null_mode, + ) + self._assert_kind_round_trip(rb, kinds, values_per_col, null_mode) + + def _assert_kind_round_trip(self, rb, kinds, values_per_col, null_mode: str) -> None: + col_name, spec = kinds[0] + self.assertEqual(rb.num_columns, 1, self.label(f"kind={spec.name}")) + self.assertEqual(rb.num_rows, _ROWS_PER_BATCH, + self.label(f"row count kind={spec.name}")) + expected_dtype = spec.arrow_type() + actual_dtype = _storage_type(rb.column(0).type) + if not _dtype_compatible(actual_dtype, expected_dtype): + self.fail(self.label( + f"DataType mismatch kind={spec.name}: " + f"want {expected_dtype}, got {actual_dtype}" + )) + self._assert_field_metadata(rb.schema.field(0), spec) + expected_values = values_per_col[col_name] + for r in range(rb.num_rows): + expected = expected_values[r] + actual = _scalar_to_python(rb.column(0)[r], spec) + expected_canon = _canonicalise_for_compare(expected, spec) + actual_canon = _canonicalise_for_compare(actual, spec) + if not spec.compare(actual_canon, expected_canon): + self.fail(self.label( + f"kind={spec.name} mode={null_mode} row={r}: " + f"expected {expected_canon!r}, got {actual_canon!r}" + )) + + def _assert_field_metadata(self, field: pa.Field, spec: KindSpec) -> None: + expected_md = spec.metadata() or {} + if not expected_md: + return + actual_md = dict(field.metadata or {}) + ext_name = getattr(field.type, "extension_name", None) + for k, v in expected_md.items(): + key_bytes = k if isinstance(k, bytes) else k.encode() + val_bytes = v if isinstance(v, bytes) else v.encode() + if key_bytes == b"ARROW:extension:name" and ext_name is not None: + if ext_name.encode() == val_bytes: + continue + self.assertEqual( + actual_md.get(key_bytes), val_bytes, + self.label( + f"kind={spec.name}: field metadata " + f"{key_bytes!r} expected={val_bytes!r} " + f"actual={actual_md.get(key_bytes)!r}" + ), + ) + +def _storage_type(t: pa.DataType) -> pa.DataType: + storage = getattr(t, "storage_type", None) + return storage if storage is not None else t + + +def _dtype_compatible(actual: pa.DataType, expected: pa.DataType) -> bool: + if str(actual) == str(expected): + return True + a_str = str(actual) + e_str = str(expected) + if a_str.startswith("decimal") and e_str.startswith("decimal"): + a_args = a_str[a_str.index("("):] + e_args = e_str[e_str.index("("):] + return a_args == e_args + if "list" in a_str and "list" in e_str: + return _leaf_type(actual) == _leaf_type(expected) + return False + + +def _leaf_type(t: pa.DataType) -> str: + while pa.types.is_list(t) or pa.types.is_large_list(t): + t = t.value_type + return str(t) + + +def _scalar_to_python(scalar, spec: KindSpec): + if scalar is None: + return None + if spec.name in ("timestamp", "timestamp_ns", "date") and hasattr(scalar, "value"): + if not scalar.is_valid: + return None + return scalar.value + try: + return scalar.as_py() + except (ValueError, OverflowError): + return getattr(scalar, "value", None) + + +def _canonicalise_for_compare(value, spec: KindSpec): + if value is None: + return None + import datetime as _dt + from decimal import Decimal + if isinstance(value, _dt.datetime): + unit = spec.params.get("unit", "us") + divisor = {"s": 1, "ms": 1_000, "us": 1_000_000, "ns": 1_000_000_000}[unit] + epoch = _dt.datetime(1970, 1, 1, tzinfo=_dt.timezone.utc) + if value.tzinfo is None: + value = value.replace(tzinfo=_dt.timezone.utc) + delta_s = (value - epoch).total_seconds() + return int(round(delta_s * divisor)) + if isinstance(value, Decimal): + scale = spec.params.get("scale", 0) + return int(value.scaleb(scale)) + if spec.name == "uuid": + import uuid as _uuid + if isinstance(value, _uuid.UUID): + value = value.bytes + if isinstance(value, (bytes, bytearray)): + lo = int.from_bytes(value[:8], "little") + hi = int.from_bytes(value[8:], "little") + return (lo, hi) + return value + +# Inject one test method per kind so failures pinpoint the offending type. +for _kind_name in list(KIND_REGISTRY.keys()): + def _make(name): + def test(self): + self._exercise_kind(name) + test.__name__ = f"test_kind_{name}" + test.__qualname__ = f"TestArrowEgressPerKind.test_kind_{name}" + return test + setattr(TestArrowEgressPerKind, f"test_kind_{_kind_name}", _make(_kind_name)) + +class TestArrowEgressEmpty(afc.ArrowFuzzBase): + """Zero-row stream → cursor terminates cleanly (no half-filled batch).""" + + SUITE_LABEL = "arrow_egress_empty" + + def _assert_no_rows(self, sql: str) -> None: + try: + batches = afc.read_back_arrow_batches(self._fixture, sql) + except afc.ReaderError as e: + from arrow_ffi import ReaderErrorCode + self.assertEqual( + e.code, ReaderErrorCode.NO_SCHEMA, + self.label(f"unexpected ReaderError code={e.code} msg={e.message!r}") + ) + return + total_rows = sum(rb.num_rows for rb in batches) + self.assertEqual( + total_rows, 0, + self.label( + f"expected 0 total rows, got {total_rows} across {len(batches)} batch(es)" + ), + ) + + def test_empty_select_returns_no_batches(self): + self._assert_no_rows("select 1 from long_sequence(0)") + + def test_filter_yielding_no_rows(self): + table = self.fresh_table("arrow_eg_filter_empty") + kinds = [("c_int", KIND_REGISTRY["int"])] + rb, _ = _ingest_and_read_back(self, table, kinds, null_mode="valid") + self.assertGreater(rb.num_rows, 0) + self._assert_no_rows( + f"select c_int from '{table}' where c_int = -999999999" + ) + +class TestArrowEgressFuzz(afc.ArrowFuzzBase): + """Random subsets of ILP-capable kinds per iteration.""" + + SUITE_LABEL = "arrow_egress_fuzz" + + def test_random_schemas(self): + full_pool = _ilp_capable_kinds() + nullable_pool = [(n, s) for n, s in full_pool if s.supports_server_null] + for it in range(_FUZZ_ITERATIONS): + with self.subTest(iter=it): + null_mode = ("valid", "partial", "all_null")[it % 3] + pool = full_pool if null_mode == "valid" else nullable_pool + self._master_rng.shuffle(pool) + picked_kinds = pool[:4 + (it % 4)] + kinds = [(f"c{i}_{n}", s) for i, (n, s) in enumerate(picked_kinds)] + table = self.fresh_table(f"arrow_eg_fuzz_{it}") + rb, values_per_col = _ingest_and_read_back( + self, table, kinds, null_mode=null_mode, + ) + self.assertEqual(rb.num_rows, _ROWS_PER_BATCH, + self.label(f"iter={it}")) + self.assertEqual(rb.num_columns, len(kinds), self.label()) + # Per-cell comparison via each spec's canonicaliser. + for col_idx, (col_name, spec) in enumerate(kinds): + expected = values_per_col[col_name] + for r in range(rb.num_rows): + a = _canonicalise_for_compare( + _scalar_to_python(rb.column(col_idx)[r], spec), spec) + e = _canonicalise_for_compare(expected[r], spec) + if not spec.compare(a, e): + self.fail(self.label( + f"iter={it} kind={spec.name} col={col_name} row={r}: " + f"expected {e!r}, got {a!r}" + )) + +def register(loop_registry): + loop_registry.append(TestArrowEgressPerKind) + loop_registry.append(TestArrowEgressEmpty) + loop_registry.append(TestArrowEgressFuzz) + +if __name__ == "__main__": + print( + "Note: arrow_egress_fuzz tests require a live QuestDB fixture. " + "Run via `python test.py run --existing HOST:ILP:HTTP " + "TestArrowEgressPerKind` (or any of the other arrow egress classes).", + file=sys.stderr, + ) + unittest.main() diff --git a/system_test/arrow_ffi.py b/system_test/arrow_ffi.py new file mode 100644 index 00000000..4ab78b81 --- /dev/null +++ b/system_test/arrow_ffi.py @@ -0,0 +1,261 @@ +"""ctypes bindings for the Apache Arrow C Data Interface exports. + +Wraps `line_reader_cursor_next_arrow_batch` (egress) and +`line_sender_buffer_append_arrow` (ingress) from `libquestdb_client`. +Layout of `ArrowArray` / `ArrowSchema` mirrors the Apache Arrow spec: +. +""" + +from __future__ import annotations + +import ctypes +from typing import Optional, Tuple + +from questdb_line_sender import ( # type: ignore[attr-defined] + _DLL, + SenderError as _SenderError, + c_line_sender_error as _LineSenderError, + c_line_sender_error_p as _LineSenderErrorPtr, + c_line_sender_table_name as _LineSenderTableName, + c_line_sender_buffer as _LineSenderBuffer, +) +from qwp_egress_reader import ( # type: ignore[attr-defined] + _LineReaderCursor, + _LineReaderError, +) + + +class ArrowSenderError(_SenderError): + """`SenderError` carrying the `line_sender_error_code` discriminant.""" + + def __init__(self, message: str, code: int, qwp_ws_error=None) -> None: + super().__init__(message, qwp_ws_error) + self.code = code + + def __str__(self) -> str: + base = super().__str__() + return f"[code={self.code}] {base}" + + +def _take_sender_error(err_ptr) -> ArrowSenderError: + code = int(_DLL.line_sender_error_get_code(err_ptr)) + c_len = ctypes.c_size_t(0) + raw = _DLL.line_sender_error_msg(err_ptr, ctypes.byref(c_len)) + msg = ( + ctypes.string_at(raw, c_len.value).decode("utf-8", "replace") + if raw and c_len.value + else "" + ) + from questdb_line_sender import _qwpws_error_from_sender_error # late bind + qwp_view = _qwpws_error_from_sender_error(err_ptr) + _DLL.line_sender_error_free(err_ptr) + return ArrowSenderError(msg, code, qwp_view) + + +class ArrowArray(ctypes.Structure): + pass + + +ArrowArray._fields_ = [ + ("length", ctypes.c_int64), + ("null_count", ctypes.c_int64), + ("offset", ctypes.c_int64), + ("n_buffers", ctypes.c_int64), + ("n_children", ctypes.c_int64), + ("buffers", ctypes.POINTER(ctypes.c_void_p)), + ("children", ctypes.POINTER(ctypes.POINTER(ArrowArray))), + ("dictionary", ctypes.POINTER(ArrowArray)), + ("release", ctypes.CFUNCTYPE(None, ctypes.POINTER(ArrowArray))), + ("private_data", ctypes.c_void_p), +] + + +class ArrowSchema(ctypes.Structure): + pass + + +ArrowSchema._fields_ = [ + ("format", ctypes.c_char_p), + ("name", ctypes.c_char_p), + ("metadata", ctypes.c_char_p), + ("flags", ctypes.c_int64), + ("n_children", ctypes.c_int64), + ("children", ctypes.POINTER(ctypes.POINTER(ArrowSchema))), + ("dictionary", ctypes.POINTER(ArrowSchema)), + ("release", ctypes.CFUNCTYPE(None, ctypes.POINTER(ArrowSchema))), + ("private_data", ctypes.c_void_p), +] + + +NEXT_ARROW_BATCH_OK = 0 +NEXT_ARROW_BATCH_END = 1 +NEXT_ARROW_BATCH_ERROR = 2 + + +class SenderErrorCode: + """`line_sender_error_code` discriminants. Pinned in + `questdb-rs-ffi/src/lib.rs::line_sender_error_code_discriminants_are_abi_stable`.""" + COULD_NOT_RESOLVE_ADDR = 0 + INVALID_API_CALL = 1 + SOCKET_ERROR = 2 + INVALID_UTF8 = 3 + INVALID_NAME = 4 + INVALID_TIMESTAMP = 5 + AUTH_ERROR = 6 + TLS_ERROR = 7 + HTTP_NOT_SUPPORTED = 8 + SERVER_FLUSH_ERROR = 9 + CONFIG_ERROR = 10 + ARRAY_ERROR = 11 + PROTOCOL_VERSION_ERROR = 12 + INVALID_DECIMAL = 13 + SERVER_REJECTION = 14 + ARROW_UNSUPPORTED_COLUMN_KIND = 15 + ARROW_INGEST = 16 + + +class ReaderErrorCode: + """`line_reader_error_code` discriminants. Pinned in + `questdb-rs-ffi/src/egress.rs::line_reader_error_code`.""" + COULD_NOT_RESOLVE_ADDR = 0 + CONFIG_ERROR = 1 + INVALID_API_CALL = 2 + SOCKET_ERROR = 3 + TLS_ERROR = 4 + HANDSHAKE_ERROR = 5 + AUTH_ERROR = 6 + UNSUPPORTED_SERVER = 7 + ROLE_MISMATCH = 8 + PROTOCOL_ERROR = 9 + INVALID_UTF8 = 10 + INVALID_BIND = 11 + SERVER_SCHEMA_MISMATCH = 14 + SERVER_PARSE_ERROR = 15 + SERVER_INTERNAL_ERROR = 16 + SERVER_SECURITY_ERROR = 17 + LIMIT_EXCEEDED = 18 + SERVER_LIMIT_EXCEEDED = 19 + CANCELLED = 20 + FAILOVER_WOULD_DUPLICATE = 21 + SCHEMA_DRIFT = 22 + NO_SCHEMA = 23 + ARROW_EXPORT = 24 + + +def _setsig(name, restype, *argtypes): + fn = getattr(_DLL, name) + fn.restype = restype + fn.argtypes = list(argtypes) + return fn + + +_next_arrow_batch = _setsig( + "line_reader_cursor_next_arrow_batch", + ctypes.c_int, + ctypes.POINTER(_LineReaderCursor), + ctypes.POINTER(ArrowArray), + ctypes.POINTER(ArrowSchema), + ctypes.POINTER(ctypes.POINTER(_LineReaderError)), +) + +_append_arrow = _setsig( + "line_sender_buffer_append_arrow", + ctypes.c_bool, + ctypes.POINTER(_LineSenderBuffer), + _LineSenderTableName, + ctypes.POINTER(ArrowArray), + ctypes.POINTER(ArrowSchema), + ctypes.POINTER(ctypes.POINTER(_LineSenderError)), +) + +from questdb_line_sender import c_line_sender_column_name # noqa: E402 + +_append_arrow_at_column = _setsig( + "line_sender_buffer_append_arrow_at_column", + ctypes.c_bool, + ctypes.POINTER(_LineSenderBuffer), + _LineSenderTableName, + ctypes.POINTER(ArrowArray), + ctypes.POINTER(ArrowSchema), + c_line_sender_column_name, + ctypes.POINTER(ctypes.POINTER(_LineSenderError)), +) + + +def next_arrow_batch(cursor_ptr) -> Tuple[int, ArrowArray, ArrowSchema]: + """Drive `line_reader_cursor_next_arrow_batch`. On OK, returns the + populated structs; the caller becomes responsible for invoking the + `release` callback inside each struct.""" + arr = ArrowArray() + sch = ArrowSchema() + err_ref = ctypes.POINTER(_LineReaderError)() + rc = _next_arrow_batch( + cursor_ptr, + ctypes.byref(arr), + ctypes.byref(sch), + ctypes.byref(err_ref), + ) + if rc == NEXT_ARROW_BATCH_ERROR: + from qwp_egress_reader import _take_error # type: ignore[attr-defined] + raise _take_error(err_ref) + return rc, arr, sch + + +def buffer_append_arrow( + buf_ptr, + table_name: _LineSenderTableName, + array_ptr, + schema_ptr, + ts_column_name: Optional[bytes] = None, +) -> None: + """Drive `line_sender_buffer_append_arrow` (or its `_at_column` + variant when `ts_column_name` is set). Consumes `array_ptr`'s + ownership; `schema_ptr` remains the caller's.""" + err_ref = ctypes.POINTER(_LineSenderError)() + if ts_column_name: + ts_col = c_line_sender_column_name( + len(ts_column_name), + ctypes.c_char_p(ts_column_name), + ) + ok = _append_arrow_at_column( + buf_ptr, + table_name, + array_ptr, + schema_ptr, + ts_col, + ctypes.byref(err_ref), + ) + else: + ok = _append_arrow( + buf_ptr, + table_name, + array_ptr, + schema_ptr, + ctypes.byref(err_ref), + ) + if not ok: + raise _take_sender_error(err_ref) + + +def pyarrow_export_record_batch(record_batch) -> Tuple[ArrowArray, ArrowSchema]: + """Materialize a pyarrow.RecordBatch as ArrowArray + ArrowSchema using + pyarrow's `_export_to_c`. Wraps the batch as a StructArray first because + the Arrow C Data Interface represents a record batch as a struct array.""" + import pyarrow as pa + struct_arr = pa.StructArray.from_arrays( + record_batch.columns, + fields=record_batch.schema, + ) + arr = ArrowArray() + sch = ArrowSchema() + arr_addr = ctypes.addressof(arr) + sch_addr = ctypes.addressof(sch) + struct_arr._export_to_c(arr_addr, sch_addr) + return arr, sch + + +def pyarrow_import_record_batch(arr: ArrowArray, sch: ArrowSchema): + """Reverse of `pyarrow_export_record_batch`. Consumes the structs.""" + import pyarrow as pa + struct_arr = pa.Array._import_from_c(ctypes.addressof(arr), ctypes.addressof(sch)) + return pa.RecordBatch.from_struct_array(struct_arr) diff --git a/system_test/arrow_fuzz_common.py b/system_test/arrow_fuzz_common.py new file mode 100644 index 00000000..212f64df --- /dev/null +++ b/system_test/arrow_fuzz_common.py @@ -0,0 +1,1307 @@ +from __future__ import annotations + +import contextlib +import ctypes +import math +import os +import shutil +import struct +import sys +import tempfile +import time +import unittest +import urllib.error +import uuid +from typing import Any, Callable, Dict, List, Optional, Tuple + +import pyarrow as pa + +import qwp_ws_fuzz +from qwp_ws_fuzz import Rng, derive_master_seed, format_seed + +from arrow_ffi import ( + ArrowArray, + ArrowSchema, + NEXT_ARROW_BATCH_END, + NEXT_ARROW_BATCH_ERROR, + NEXT_ARROW_BATCH_OK, + buffer_append_arrow, + next_arrow_batch, + pyarrow_export_record_batch, + pyarrow_import_record_batch, +) +from qwp_egress_reader import ( + ReaderError, + _DLL, + _LineReaderError, + _take_error, + _utf8, +) +from questdb_line_sender import ( + Buffer, + Sender, + SenderError, + _table_name as _c_table_name, +) + +__all__ = [ + "Rng", + "derive_master_seed", + "format_seed", + "ReaderError", + "SenderError", + "ArrowFuzzBase", + "KIND_REGISTRY", + "KindSpec", + "EDGE_INTS_I8", + "EDGE_INTS_I16", + "EDGE_INTS_I32", + "EDGE_INTS_I64", + "EDGE_INTS_U16", + "EDGE_INTS_U32", + "EDGE_FLOATS", + "EDGE_STRINGS", + "EDGE_GEOHASH_BITS", + "arrow_cursor", + "existing_sender", + "temp_sf_dir", + "wait_for_rows", + "make_table_name", + "drop_table_safe", + "egress_conf", + "ingress_conf", + "ingest_via_arrow", + "read_back_arrow_batches", + "read_back_arrow_concat", + "assert_pyarrow_records_equal", + "get_live_fixture", +] + +def get_live_fixture(testcase: unittest.TestCase): + from test import QDB_FIXTURE, QuestDbFixture, QuestDbExternalFixture + if not isinstance(QDB_FIXTURE, (QuestDbFixture, QuestDbExternalFixture)): + testcase.skipTest("requires a live QuestDB fixture") + return QDB_FIXTURE + +def egress_conf(fixture) -> str: + return f"ws::addr={fixture.host}:{fixture.http_server_port};" + +def ingress_conf(fixture, **extras: str) -> str: + parts = [f"qwpws::addr={fixture.host}:{fixture.http_server_port};"] + for k, v in extras.items(): + parts.append(f"{k}={v};") + return "".join(parts) + +@contextlib.contextmanager +def arrow_cursor(fixture, sql: str): + from test import skip_if_unsupported_qwp_ws_fixture + conf_utf8 = _utf8(egress_conf(fixture)) + err_ref = ctypes.POINTER(_LineReaderError)() + reader = _DLL.line_reader_from_conf(conf_utf8, ctypes.byref(err_ref)) + if not reader: + err = _take_error(err_ref) + skip_if_unsupported_qwp_ws_fixture(err, fixture) + raise err + try: + sql_utf8 = _utf8(sql) + err_ref = ctypes.POINTER(_LineReaderError)() + cursor = _DLL.line_reader_execute(reader, sql_utf8, ctypes.byref(err_ref)) + if not cursor: + raise _take_error(err_ref) + try: + yield cursor + finally: + _DLL.line_reader_cursor_free(cursor) + finally: + _DLL.line_reader_close(reader) + +@contextlib.contextmanager +def existing_sender(fixture, *, sender_id: Optional[str] = None, + **conf_extras: str): + from test import skip_if_unsupported_qwp_ws_fixture + with tempfile.TemporaryDirectory(prefix="arrow_sfa_") as sf_dir: + sid = sender_id or f"arrow-{uuid.uuid4().hex[:8]}" + conf = ingress_conf(fixture, sender_id=sid, sf_dir=sf_dir, + **conf_extras) + sender = Sender.from_conf(conf) + try: + try: + sender.connect() + except SenderError as e: + skip_if_unsupported_qwp_ws_fixture(e, fixture) + raise + sender._buffer = Buffer.from_sender(sender._impl) + yield sender + sender.flush() + sender.close_drain() + finally: + sender.close(flush=False) + +@contextlib.contextmanager +def temp_sf_dir(prefix: str = "arrow_"): + d = tempfile.mkdtemp(prefix=prefix) + try: + yield d + finally: + shutil.rmtree(d, ignore_errors=True) + +def wait_for_rows( + fixture, table: str, expected: int, *, timeout: float = 20.0 +) -> int: + import json + from fixture import QueryError + deadline = time.monotonic() + timeout + delay = 0.02 + last_seen = -1 + last_err: Optional[BaseException] = None + while time.monotonic() < deadline: + try: + resp = fixture.http_sql_query(f"select count() from '{table}'") + last_seen = int(resp["dataset"][0][0]) + if last_seen >= expected: + return last_seen + except (urllib.error.URLError, ConnectionError, + json.JSONDecodeError, QueryError) as e: + last_err = e + time.sleep(delay) + delay = min(delay * 1.5, 0.5) + raise AssertionError( + f"timed out waiting for {expected} rows in {table}; " + f"last_seen={last_seen}, last_err={last_err!r}" + ) + +def make_table_name(prefix: str, rnd: Rng) -> str: + return f"{prefix}_{rnd.next_int(2**32):08x}" + +def exec_ddl(fixture, sql: str) -> None: + """Run a DDL statement, tolerating QuestDB versions that return an + empty HTTP body on success (which makes the fixture's strict JSON + parse explode).""" + import json + try: + fixture.http_sql_query(sql) + except json.JSONDecodeError: + pass + + +def drop_table_safe(fixture, table: str) -> None: + try: + exec_ddl(fixture, f"DROP TABLE IF EXISTS '{table}'") + except Exception as e: + sys.stderr.write( + f"[arrow_fuzz_common] table drop failed for {table!r}: {e!r}\n" + ) + +def ingest_via_arrow( + fixture, + table: str, + record_batch: pa.RecordBatch, + *, + ts_col: Optional[bytes] = b"ts", + sender_conf_extras: Optional[Dict[str, str]] = None, +) -> None: + """Ingest one RecordBatch through `line_sender_buffer_append_arrow`. + If `ts_col` is None the server stamps each row on arrival.""" + extras = sender_conf_extras or {} + with existing_sender(fixture, **extras) as sender: + buf = Buffer.from_sender(sender._impl) + table_name = _c_table_name(table) + arr, sch = pyarrow_export_record_batch(record_batch) + try: + buffer_append_arrow( + buf._impl, table_name, + ctypes.byref(arr), ctypes.byref(sch), + ts_column_name=ts_col, + ) + finally: + if sch.release: + sch.release(ctypes.byref(sch)) + sender.flush(buf) + +def read_back_arrow_batches(fixture, sql: str) -> List[pa.RecordBatch]: + batches: List[pa.RecordBatch] = [] + with arrow_cursor(fixture, sql) as cursor: + while True: + rc, arr, sch = next_arrow_batch(cursor) + if rc == NEXT_ARROW_BATCH_END: + break + if rc != NEXT_ARROW_BATCH_OK: + raise AssertionError(f"unexpected next_arrow_batch rc={rc}") + batches.append(pyarrow_import_record_batch(arr, sch)) + return batches + +def read_back_arrow_concat(fixture, sql: str) -> pa.RecordBatch: + batches = read_back_arrow_batches(fixture, sql) + if not batches: + raise AssertionError(f"no Arrow batches returned for sql={sql!r}") + if len(batches) == 1: + return batches[0] + table = pa.Table.from_batches(batches).combine_chunks() + chunks = table.to_batches() + if len(chunks) != 1: + raise AssertionError( + f"combine_chunks() returned {len(chunks)} batches, expected 1" + ) + return chunks[0] + +def assert_pyarrow_records_equal( + testcase: unittest.TestCase, + expected: pa.RecordBatch, + actual: pa.RecordBatch, + kinds: List[Tuple[str, "KindSpec"]], + *, + label: str = "", +) -> None: + """Compare row-by-row, dispatching to KindSpec.compare for tolerant kinds.""" + testcase.assertEqual( + actual.num_rows, expected.num_rows, + f"row count {label}: got {actual.num_rows} vs expected {expected.num_rows}" + ) + for col_idx, (col_name, spec) in enumerate(kinds): + exp_col = expected.column(col_idx) + act_col = actual.column(col_idx) + for r in range(expected.num_rows): + ev = exp_col[r].as_py() + av = act_col[r].as_py() + if not spec.compare(av, ev): + testcase.fail( + f"{label} kind={spec.name} col={col_name} row={r}: " + f"expected {ev!r}, got {av!r}" + ) + +EDGE_INTS_I8 = [-128, -1, 0, 1, 127] +EDGE_INTS_I16 = [-32768, -1, 0, 1, 32767] +EDGE_INTS_I32 = [-(1 << 31), -1, 0, 1, (1 << 31) - 1] +EDGE_INTS_I64 = [-(1 << 63), -1, 0, 1, (1 << 63) - 1] +EDGE_INTS_U16 = [0, 1, 0x7FFF, 0xFFFE, 0xFFFF] +EDGE_INTS_U32 = [0, 1, 0x7FFF_FFFF, 0xFFFF_FFFE, 0xFFFF_FFFF] + +EDGE_FLOATS = [ + 0.0, + -0.0, + 1.0, + -1.0, + float("nan"), + float("inf"), + float("-inf"), + sys.float_info.min, + sys.float_info.max, + -sys.float_info.max, + 5e-324, +] + +EDGE_STRINGS = [ + "", + "a", + "ascii", + "日本語", + "🚀🌟", + "​", + "x" * 4096, +] + +EDGE_GEOHASH_BITS = [1, 5, 32, 60] + +def all_valid_mask(n: int) -> List[bool]: + return [True] * n + +def all_null_mask(n: int) -> List[bool]: + return [False] * n + +def partial_null_mask(rnd: Rng, n: int, *, null_p: float = 0.2) -> List[bool]: + return [rnd.next_int(1000) >= int(null_p * 1000) for _ in range(n)] + +def _apply_mask(values: List[Any], mask: List[bool]) -> List[Any]: + return [v if keep else None for v, keep in zip(values, mask)] + +def _gen_bool(rnd: Rng, n: int, mask, *, edge: bool) -> List[Any]: + vs = [rnd.next_boolean() for _ in range(n)] + if edge: + for i in range(min(n, 2)): + vs[i] = bool(i) + return _apply_mask(vs, mask) + +def _gen_signed_int(rnd: Rng, n: int, mask, *, edge: bool, corpus, bound) -> List[Any]: + vs = [rnd.next_int(2 * bound) - bound for _ in range(n)] + if edge: + for i, v in enumerate(corpus): + if i < n: + vs[i] = v + return _apply_mask(vs, mask) + +def _gen_unsigned_int(rnd: Rng, n: int, mask, *, edge: bool, corpus, ubound) -> List[Any]: + vs = [rnd.next_int(ubound) for _ in range(n)] + if edge: + for i, v in enumerate(corpus): + if i < n: + vs[i] = v + return _apply_mask(vs, mask) + +def _gen_float(rnd: Rng, n: int, mask, *, edge: bool, dtype: str) -> List[Any]: + span = 1e6 if dtype == "double" else 1e3 + vs = [(rnd.next_int(2_000_000) - 1_000_000) / 1_000_000.0 * span for _ in range(n)] + if edge: + for i, v in enumerate(EDGE_FLOATS): + if i < n: + vs[i] = float(v) if dtype == "double" else _f32_round(v) + return _apply_mask(vs, mask) + +def _f32_round(v: float) -> float: + if v != v: + return v + try: + return struct.unpack(" List[Any]: + def one() -> str: + length = rnd.next_int(16) + return "".join(chr(0x61 + rnd.next_int(26)) for _ in range(length)) + vs = [one() for _ in range(n)] + if edge: + for i, v in enumerate(EDGE_STRINGS): + if i < n: + vs[i] = v + return _apply_mask(vs, mask) + +def _gen_binary(rnd: Rng, n: int, mask, *, edge: bool) -> List[Any]: + def one() -> bytes: + length = rnd.next_int(32) + return bytes(rnd.next_int(256) for _ in range(length)) + vs = [one() for _ in range(n)] + if edge: + if n > 0: + vs[0] = b"" + if n > 1: + vs[1] = b"\x00" * 256 + return _apply_mask(vs, mask) + +def _gen_fixed_bytes(rnd: Rng, n: int, mask, *, edge: bool, width: int) -> List[Any]: + vs = [bytes(rnd.next_int(256) for _ in range(width)) for _ in range(n)] + if edge: + if n > 0: + vs[0] = b"\x00" * width + if n > 1: + vs[1] = b"\xff" * width + return _apply_mask(vs, mask) + +def _gen_uuid_lo_hi(rnd: Rng, n: int, mask, *, edge: bool) -> List[Any]: + vs = [(rnd.next_long() & ((1 << 64) - 1), rnd.next_long() & ((1 << 64) - 1)) + for _ in range(n)] + if edge: + if n > 0: + vs[0] = (0, 0) + if n > 1: + vs[1] = ((1 << 64) - 1, (1 << 64) - 1) + return _apply_mask(vs, mask) + +def _gen_char_codepoints(rnd: Rng, n: int, mask, *, edge: bool) -> List[Any]: + vs = [0x41 + rnd.next_int(26) for _ in range(n)] + if edge: + if n > 0: + vs[0] = 0 + if n > 1: + vs[1] = 0xFFFF + return _apply_mask(vs, mask) + +def _gen_ipv4(rnd: Rng, n: int, mask, *, edge: bool) -> List[Any]: + vs = [rnd.next_int(0xFFFF_FFFF) for _ in range(n)] + if edge: + if n > 0: + vs[0] = 0 + if n > 1: + vs[1] = 0x7F00_0001 # loopback + if n > 2: + vs[2] = 0xFFFF_FFFF + return _apply_mask(vs, mask) + +def _gen_date_ms(rnd: Rng, n: int, mask, *, edge: bool) -> List[Any]: + base = 1_700_000_000_000 + vs = [base + rnd.next_int(86_400_000) for _ in range(n)] + if edge: + if n > 0: + vs[0] = 0 + if n > 1: + vs[1] = base + return _apply_mask(vs, mask) + +def _gen_ts_us(rnd: Rng, n: int, mask, *, edge: bool, base: int) -> List[Any]: + vs = [base + rnd.next_int(1_000_000) for _ in range(n)] + return _apply_mask(vs, mask) + +def _gen_ts_ns(rnd: Rng, n: int, mask, *, edge: bool, base: int) -> List[Any]: + vs = [base + rnd.next_int(1_000_000_000) for _ in range(n)] + return _apply_mask(vs, mask) + +def _gen_symbol(rnd: Rng, n: int, mask, *, edge: bool) -> List[Any]: + palette = ["AAPL", "MSFT", "GOOG", "AMZN", "NVDA"] + vs = [palette[rnd.next_int(len(palette))] for _ in range(n)] + if edge: + if n > 0: + vs[0] = "" + if n > 1: + vs[1] = palette[0] + return _apply_mask(vs, mask) + +def _gen_geohash(rnd: Rng, n: int, mask, *, edge: bool, bits: int) -> List[Any]: + cap = (1 << bits) - 1 + vs = [rnd.next_int(cap + 1) for _ in range(n)] + if edge: + if n > 0: + vs[0] = 0 + if n > 1: + vs[1] = cap + return _apply_mask(vs, mask) + +def _gen_decimal_int(rnd: Rng, n: int, mask, *, edge: bool, bound: int) -> List[Any]: + vs = [rnd.next_int(2 * bound + 1) - bound for _ in range(n)] + if edge: + if n > 0: + vs[0] = 0 + if n > 1: + vs[1] = bound + if n > 2: + vs[2] = -bound + return _apply_mask(vs, mask) + +def _gen_double_array_1d(rnd: Rng, n: int, mask, *, edge: bool) -> List[Any]: + def one() -> List[float]: + ln = rnd.next_int(5) + 1 + return [(rnd.next_int(2000) - 1000) / 100.0 for _ in range(ln)] + vs = [one() for _ in range(n)] + if edge: + if n > 0: + vs[0] = [] + if n > 1: + vs[1] = [float("nan"), float("inf"), -0.0] + return _apply_mask(vs, mask) + +def _gen_double_array_2d(rnd: Rng, n: int, mask, *, edge: bool) -> List[Any]: + def one() -> List[List[float]]: + rows = rnd.next_int(3) + 1 + cols = rnd.next_int(3) + 1 + return [ + [(rnd.next_int(2000) - 1000) / 100.0 for _ in range(cols)] + for _ in range(rows) + ] + vs = [one() for _ in range(n)] + if edge: + if n > 0: + vs[0] = [[1.0]] + return _apply_mask(vs, mask) + +def _gen_double_array_3d(rnd: Rng, n: int, mask, *, edge: bool) -> List[Any]: + def one(): + a, b, c = (rnd.next_int(2) + 1 for _ in range(3)) + return [ + [ + [(rnd.next_int(1000) - 500) / 100.0 for _ in range(c)] + for _ in range(b) + ] + for _ in range(a) + ] + vs = [one() for _ in range(n)] + return _apply_mask(vs, mask) + +def _gen_long_array_1d(rnd: Rng, n: int, mask, *, edge: bool) -> List[Any]: + def one() -> List[int]: + ln = rnd.next_int(5) + 1 + return [rnd.next_int(1_000_000) - 500_000 for _ in range(ln)] + vs = [one() for _ in range(n)] + if edge: + if n > 0: + vs[0] = [] + if n > 1: + vs[1] = [-(1 << 63), 0, (1 << 63) - 1] + return _apply_mask(vs, mask) + +def _arr_bool(values, *, params) -> pa.Array: + return pa.array(values, type=pa.bool_()) + +def _arr_int(values, *, params) -> pa.Array: + return pa.array(values, type=params["arrow_dtype"]) + +def _arr_float(values, *, params) -> pa.Array: + return pa.array(values, type=params["arrow_dtype"]) + +def _arr_uint16(values, *, params) -> pa.Array: + return pa.array(values, type=pa.uint16()) + +def _arr_uint32(values, *, params) -> pa.Array: + return pa.array(values, type=pa.uint32()) + +def _arr_string(values, *, params) -> pa.Array: + return pa.array(values, type=pa.string()) + +def _arr_binary(values, *, params) -> pa.Array: + return pa.array(values, type=pa.binary()) + +def _arr_fsb(values, *, params) -> pa.Array: + return pa.array(values, type=pa.binary(params["width"])) + +def _arr_uuid_lo_hi(values, *, params) -> pa.Array: + payload: List[Optional[bytes]] = [] + for v in values: + if v is None: + payload.append(None) + else: + lo, hi = v + payload.append(lo.to_bytes(8, "little") + hi.to_bytes(8, "little")) + return pa.array(payload, type=pa.binary(16)) + +def _arr_timestamp(values, *, params) -> pa.Array: + return pa.array(values, type=pa.timestamp(params["unit"], tz="UTC")) + +def _arr_symbol(values, *, params) -> pa.Array: + seen: Dict[str, int] = {} + dict_vals: List[str] = [] + idxs: List[Optional[int]] = [] + for v in values: + if v is None: + idxs.append(None) + else: + if v not in seen: + seen[v] = len(dict_vals) + dict_vals.append(v) + idxs.append(seen[v]) + idx_arr = pa.array(idxs, type=pa.uint32()) + dict_arr = pa.array(dict_vals, type=pa.string()) + return pa.DictionaryArray.from_arrays(idx_arr, dict_arr) + +def _arr_geohash_int(values, *, params) -> pa.Array: + return pa.array(values, type=params["arrow_dtype"]) + +def _unscaled_to_decimal(values, scale): + from decimal import Decimal + return [None if v is None else Decimal(int(v)).scaleb(-scale) for v in values] + +def _arr_decimal64(values, *, params) -> pa.Array: + scale = params["scale"] + precision = params.get("precision", 18) + factory = getattr(pa, "decimal64", None) + dtype = factory(precision, scale) if factory else pa.decimal128(precision, scale) + return pa.array(_unscaled_to_decimal(values, scale), type=dtype) + +def _arr_decimal128(values, *, params) -> pa.Array: + scale = params["scale"] + precision = params.get("precision", 38) + return pa.array( + _unscaled_to_decimal(values, scale), + type=pa.decimal128(precision, scale), + ) + +def _arr_decimal256(values, *, params) -> pa.Array: + scale = params["scale"] + precision = params.get("precision", 76) + return pa.array( + _unscaled_to_decimal(values, scale), + type=pa.decimal256(precision, scale), + ) + +def _arr_double_list(values, *, params) -> pa.Array: + ndim = params["ndim"] + leaf = pa.float64() + if ndim == 1: + return pa.array(values, type=pa.list_(leaf)) + if ndim == 2: + inner = pa.list_(leaf) + return pa.array(values, type=pa.list_(inner)) + if ndim == 3: + inner = pa.list_(pa.list_(leaf)) + return pa.array(values, type=pa.list_(inner)) + raise ValueError(f"unsupported ndim={ndim}") + +def _arr_long_list(values, *, params) -> pa.Array: + return pa.array(values, type=pa.list_(pa.int64())) + +def _set_bool(buf, name, v, *, params): + buf.column(name, bool(v)) + +def _set_i8(buf, name, v, *, params): + buf.column_i8(name, int(v)) + +def _set_i16(buf, name, v, *, params): + buf.column_i16(name, int(v)) + +def _set_i32(buf, name, v, *, params): + buf.column_i32(name, int(v)) + +def _set_i64(buf, name, v, *, params): + buf.column(name, int(v)) + +def _set_f32(buf, name, v, *, params): + buf.column_f32(name, float(v)) + +def _set_f64(buf, name, v, *, params): + buf.column(name, float(v)) + +def _set_char(buf, name, v, *, params): + buf.column_char(name, int(v)) + +def _set_ipv4(buf, name, v, *, params): + buf.column_ipv4(name, int(v)) + +def _set_varchar(buf, name, v, *, params): + buf.column(name, str(v)) + +def _set_binary(buf, name, v, *, params): + buf.column_binary(name, bytes(v)) + +def _set_symbol(buf, name, v, *, params): + buf.symbol(name, str(v)) + +def _set_uuid(buf, name, v, *, params): + lo, hi = v + buf.column_uuid(name, int(lo), int(hi)) + +def _set_long256(buf, name, v, *, params): + buf.column_long256(name, bytes(v)) + +def _set_date(buf, name, v, *, params): + buf.column_date(name, int(v)) + +def _set_ts_us(buf, name, v, *, params): + from questdb_line_sender import TimestampMicros + buf.column(name, TimestampMicros(int(v))) + +def _set_ts_ns(buf, name, v, *, params): + from questdb_line_sender import TimestampNanos + buf.column(name, TimestampNanos(int(v))) + +def _set_geohash(buf, name, v, *, params): + buf.column_geohash(name, int(v), int(params["bits"])) + +def _set_decimal_str(buf, name, v, *, params): + buf.column_dec_str(name, _format_decimal(int(v), params["scale"])) + +def _set_double_array(buf, name, v, *, params): + import numpy as np + arr = np.ascontiguousarray(np.asarray(v, dtype=np.float64)) + buf.column_f64_arr(name, arr) + +def _format_decimal(unscaled: int, scale: int) -> str: + if scale == 0: + return str(unscaled) + sign = "-" if unscaled < 0 else "" + digits = str(abs(unscaled)).rjust(scale + 1, "0") + int_part = digits[:-scale] + frac_part = digits[-scale:] + return f"{sign}{int_part}.{frac_part}" + +_INT_NULL_SENTINEL = -(1 << 31) +_LONG_NULL_SENTINEL = -(1 << 63) +_IPV4_NULL_SENTINEL = 0 + + +def _is_null_for(value, sentinel): + if value is None: + return True + try: + return int(value) == sentinel + except (TypeError, ValueError): + return False + + +def _cmp_default(a, e, *, params): + if a is None or e is None: + return a is None and e is None + return a == e + + +def _cmp_int_sentinel(a, e, *, params): + if _is_null_for(a, _INT_NULL_SENTINEL) and _is_null_for(e, _INT_NULL_SENTINEL): + return True + if a is None or e is None: + return False + return int(a) == int(e) + + +def _cmp_long_sentinel(a, e, *, params): + if _is_null_for(a, _LONG_NULL_SENTINEL) and _is_null_for(e, _LONG_NULL_SENTINEL): + return True + if a is None or e is None: + return False + return int(a) == int(e) + + +def _cmp_ipv4_sentinel(a, e, *, params): + if _is_null_for(a, _IPV4_NULL_SENTINEL) and _is_null_for(e, _IPV4_NULL_SENTINEL): + return True + if a is None or e is None: + return False + return int(a) == int(e) + + +def _cmp_geohash_sentinel(a, e, *, params): + bits = params["bits"] + storage_w = 8 if bits <= 7 else 16 if bits <= 15 else 32 if bits <= 32 else 64 + storage_sentinel = (1 << storage_w) - 1 + def _is_null(v): + if v is None: + return True + try: + return int(v) == storage_sentinel + except (TypeError, ValueError): + return False + if _is_null(a) and _is_null(e): + return True + if a is None or e is None: + return False + return int(a) == int(e) + +def _is_null_or_nan(v): + if v is None: + return True + try: + f = float(v) + return math.isnan(f) or math.isinf(f) + except (TypeError, ValueError): + return False + + +def _cmp_float(a, e, *, params): + if _is_null_or_nan(a) and _is_null_or_nan(e): + return True + if a is None or e is None: + return False + return float(a) == float(e) + + +def _cmp_float32(a, e, *, params): + if _is_null_or_nan(a) and _is_null_or_nan(e): + return True + if a is None or e is None: + return False + return _f32_round(float(a)) == _f32_round(float(e)) + +def _cmp_uuid_bytes(a, e, *, params): + if a is None or e is None: + return a is None and e is None + return bytes(a) == bytes(e) + + +def _cmp_uuid_tuple(a, e, *, params): + if a is None or e is None: + return a is None and e is None + return tuple(a) == tuple(e) + +def _cmp_symbol(a, e, *, params): + if a is None or e is None: + return a is None and e is None + return str(a) == str(e) + +def _cmp_timestamp(a, e, *, params): + if a is None or e is None: + return a is None and e is None + import datetime as _dt + if isinstance(a, _dt.datetime) and isinstance(e, _dt.datetime): + return a == e + if isinstance(a, _dt.datetime): + unit = params.get("unit", "us") + divisor = {"s": 1, "ms": 1_000, "us": 1_000_000, "ns": 1_000_000_000}[unit] + return int(a.timestamp() * divisor) == int(e) + return a == e + +def _cmp_decimal(a, e, *, params): + if a is None or e is None: + return a is None and e is None + from decimal import Decimal + if not isinstance(a, Decimal): + a = Decimal(str(a)) + if not isinstance(e, Decimal): + e = Decimal(str(e)) + return a.normalize() == e.normalize() + +def _cmp_double_array(a, e, *, params): + if a is None or e is None: + return a is None and e is None + return True + +def _deep_float_equal(a, e) -> bool: + if isinstance(a, list) and isinstance(e, list): + if len(a) != len(e): + return False + return all(_deep_float_equal(x, y) for x, y in zip(a, e)) + if isinstance(a, float) and isinstance(e, float): + if math.isnan(a) and math.isnan(e): + return True + return a == e + return a == e + +class KindSpec: + """Catalog entry for one column type tested via Arrow.""" + + def __init__( + self, + name: str, + ddl: str, + arrow_type_factory: Callable[[Dict[str, Any]], pa.DataType], + metadata_factory: Callable[[Dict[str, Any]], Optional[Dict[bytes, bytes]]], + value_generator: Callable[..., List[Any]], + arrow_array_builder: Callable[..., pa.Array], + ilp_setter: Optional[Callable[..., None]], + compare_fn: Callable[..., bool] = _cmp_default, + *, + round_trip_capable: bool = True, + supports_ilp_setter: bool = True, + supports_arrow_ingest: bool = True, + supports_arrow_egress: bool = True, + supports_server_null: bool = True, + params: Optional[Dict[str, Any]] = None, + ): + self.name = name + self.ddl = ddl + self._arrow_type_factory = arrow_type_factory + self._metadata_factory = metadata_factory + self._value_generator = value_generator + self._arrow_array_builder = arrow_array_builder + self._ilp_setter = ilp_setter + self._compare_fn = compare_fn + self.round_trip_capable = round_trip_capable + self.supports_ilp_setter = supports_ilp_setter + self.supports_arrow_ingest = supports_arrow_ingest + self.supports_arrow_egress = supports_arrow_egress + self.supports_server_null = supports_server_null + self.params: Dict[str, Any] = params or {} + + def arrow_type(self) -> pa.DataType: + return self._arrow_type_factory(self.params) + + def metadata(self) -> Optional[Dict[bytes, bytes]]: + return self._metadata_factory(self.params) + + def make_field(self, col_name: str, nullable: bool = True) -> pa.Field: + return pa.field( + col_name, self.arrow_type(), nullable=nullable, + metadata=self.metadata(), + ) + + def generate_values( + self, rnd: Rng, n: int, mask: List[bool], *, edge: bool = False + ) -> List[Any]: + return self._value_generator(rnd, n, mask, edge=edge, **self.params) + + def build_arrow_array(self, values: List[Any]) -> pa.Array: + return self._arrow_array_builder(values, params=self.params) + + def ilp_set(self, buf, col_name: str, value: Any) -> None: + if not self.supports_ilp_setter: + raise NotImplementedError( + f"kind {self.name!r} has no per-row ILP setter" + ) + self._ilp_setter(buf, col_name, value, params=self.params) + + def compare(self, actual: Any, expected: Any) -> bool: + return self._compare_fn(actual, expected, params=self.params) + +def _vg_bool(rnd, n, mask, *, edge, **_): + return _gen_bool(rnd, n, mask, edge=edge) + +def _vg_signed(corpus, bound): + def fn(rnd, n, mask, *, edge, **_): + return _gen_signed_int(rnd, n, mask, edge=edge, corpus=corpus, bound=bound) + return fn + +def _vg_unsigned(corpus, ubound): + def fn(rnd, n, mask, *, edge, **_): + return _gen_unsigned_int(rnd, n, mask, edge=edge, corpus=corpus, ubound=ubound) + return fn + +def _vg_float(dtype: str): + def fn(rnd, n, mask, *, edge, **_): + return _gen_float(rnd, n, mask, edge=edge, dtype=dtype) + return fn + +def _vg_string(rnd, n, mask, *, edge, **_): + return _gen_string(rnd, n, mask, edge=edge) + +def _vg_binary(rnd, n, mask, *, edge, **_): + return _gen_binary(rnd, n, mask, edge=edge) + +def _vg_fixed_bytes(width): + def fn(rnd, n, mask, *, edge, **_): + return _gen_fixed_bytes(rnd, n, mask, edge=edge, width=width) + return fn + +def _vg_uuid_lo_hi(rnd, n, mask, *, edge, **_): + return _gen_uuid_lo_hi(rnd, n, mask, edge=edge) + +def _vg_char(rnd, n, mask, *, edge, **_): + return _gen_char_codepoints(rnd, n, mask, edge=edge) + +def _vg_ipv4(rnd, n, mask, *, edge, **_): + return _gen_ipv4(rnd, n, mask, edge=edge) + +def _vg_date(rnd, n, mask, *, edge, **_): + return _gen_date_ms(rnd, n, mask, edge=edge) + +def _vg_ts_us(rnd, n, mask, *, edge, base=1_700_000_000_000_000, **_): + return _gen_ts_us(rnd, n, mask, edge=edge, base=base) + +def _vg_ts_ns(rnd, n, mask, *, edge, base=1_700_000_000_000_000_000, **_): + return _gen_ts_ns(rnd, n, mask, edge=edge, base=base) + +def _vg_symbol(rnd, n, mask, *, edge, **_): + return _gen_symbol(rnd, n, mask, edge=edge) + +def _vg_geohash(rnd, n, mask, *, edge, bits, **_): + return _gen_geohash(rnd, n, mask, edge=edge, bits=bits) + +def _vg_decimal(rnd, n, mask, *, edge, bound, **_): + return _gen_decimal_int(rnd, n, mask, edge=edge, bound=bound) + +def _vg_double_array_1d(rnd, n, mask, *, edge, **_): + return _gen_double_array_1d(rnd, n, mask, edge=edge) + +def _vg_double_array_2d(rnd, n, mask, *, edge, **_): + return _gen_double_array_2d(rnd, n, mask, edge=edge) + +def _vg_double_array_3d(rnd, n, mask, *, edge, **_): + return _gen_double_array_3d(rnd, n, mask, edge=edge) + +def _vg_long_array_1d(rnd, n, mask, *, edge, **_): + return _gen_long_array_1d(rnd, n, mask, edge=edge) + +def _ty_bool(p): return pa.bool_() +def _ty_int8(p): return pa.int8() +def _ty_int16(p): return pa.int16() +def _ty_int32(p): return pa.int32() +def _ty_int64(p): return pa.int64() +def _ty_float32(p): return pa.float32() +def _ty_float64(p): return pa.float64() +def _ty_uint16(p): return pa.uint16() +def _ty_uint32(p): return pa.uint32() +def _ty_string(p): return pa.string() +def _ty_binary(p): return pa.binary() +def _ty_fsb(p): return pa.binary(p["width"]) +def _ty_fsb16(p): return pa.binary(16) +def _ty_fsb32(p): return pa.binary(32) + +def _ty_timestamp(p): + return pa.timestamp(p["unit"], tz="UTC") + +def _ty_symbol(p): + return pa.dictionary(pa.uint32(), pa.string()) + +def _ty_geohash_int(p): + return p["arrow_dtype"] + +def _ty_decimal64(p): + factory = getattr(pa, "decimal64", None) + if factory is None: + return pa.decimal128(p.get("precision", 18), p["scale"]) + return factory(p.get("precision", 18), p["scale"]) + +def _ty_decimal128(p): + return pa.decimal128(p.get("precision", 38), p["scale"]) + +def _ty_decimal256(p): + return pa.decimal256(p.get("precision", 76), p["scale"]) + +def _ty_double_list(p): + leaf = pa.float64() + for _ in range(p["ndim"]): + leaf = pa.list_(leaf) + return leaf + +def _ty_long_list(p): + return pa.list_(pa.int64()) + +def _md_none(p): + return None + +def _md_char(p): + return {b"questdb.column_type": b"char"} + +def _md_ipv4(p): + return {b"questdb.column_type": b"ipv4"} + +def _md_uuid(p): + return {b"ARROW:extension:name": b"arrow.uuid"} + +def _md_symbol(p): + return {b"questdb.symbol": b"true"} + +def _md_geohash(p): + return {b"questdb.geohash_bits": str(p["bits"]).encode()} + +def _geohash_arrow_dtype_for_bits(bits: int) -> pa.DataType: + if bits <= 7: + return pa.int8() + if bits <= 15: + return pa.int16() + if bits <= 31: + return pa.int32() + return pa.int64() + +def _make_geohash_spec(bits: int) -> KindSpec: + arrow_dtype = _geohash_arrow_dtype_for_bits(bits) + name = f"geohash{bits}" + return KindSpec( + name=name, + ddl=f"GEOHASH({bits}b)", + arrow_type_factory=_ty_geohash_int, + metadata_factory=_md_geohash, + value_generator=_vg_geohash, + arrow_array_builder=_arr_geohash_int, + ilp_setter=_set_geohash, + compare_fn=_cmp_geohash_sentinel, + params={"bits": bits, "arrow_dtype": arrow_dtype}, + ) + +def _build_kind_registry() -> Dict[str, KindSpec]: + reg: Dict[str, KindSpec] = {} + + reg["boolean"] = KindSpec( + "boolean", "BOOLEAN", + _ty_bool, _md_none, + _vg_bool, _arr_bool, _set_bool, + supports_server_null=False, + ) + reg["byte"] = KindSpec( + "byte", "BYTE", + _ty_int8, _md_none, + _vg_signed(EDGE_INTS_I8, 100), _arr_int, _set_i8, + supports_server_null=False, + params={"arrow_dtype": pa.int8()}, + ) + reg["short"] = KindSpec( + "short", "SHORT", + _ty_int16, _md_none, + _vg_signed(EDGE_INTS_I16, 10_000), _arr_int, _set_i16, + supports_server_null=False, + params={"arrow_dtype": pa.int16()}, + ) + reg["int"] = KindSpec( + "int", "INT", + _ty_int32, _md_none, + _vg_signed(EDGE_INTS_I32, 1_000_000), _arr_int, _set_i32, + compare_fn=_cmp_int_sentinel, + params={"arrow_dtype": pa.int32()}, + ) + reg["long"] = KindSpec( + "long", "LONG", + _ty_int64, _md_none, + _vg_signed(EDGE_INTS_I64, 1_000_000_000), _arr_int, _set_i64, + compare_fn=_cmp_long_sentinel, + params={"arrow_dtype": pa.int64()}, + ) + reg["float"] = KindSpec( + "float", "FLOAT", + _ty_float32, _md_none, + _vg_float("float"), _arr_float, _set_f32, + compare_fn=_cmp_float32, + params={"arrow_dtype": pa.float32()}, + ) + reg["double"] = KindSpec( + "double", "DOUBLE", + _ty_float64, _md_none, + _vg_float("double"), _arr_float, _set_f64, + compare_fn=_cmp_float, + params={"arrow_dtype": pa.float64()}, + ) + reg["char"] = KindSpec( + "char", "CHAR", + _ty_uint16, _md_char, + _vg_char, _arr_uint16, _set_char, + supports_server_null=False, + ) + reg["ipv4"] = KindSpec( + "ipv4", "IPV4", + _ty_uint32, _md_ipv4, + _vg_ipv4, _arr_uint32, _set_ipv4, + compare_fn=_cmp_ipv4_sentinel, + ) + reg["varchar"] = KindSpec( + "varchar", "VARCHAR", + _ty_string, _md_none, + _vg_string, _arr_string, _set_varchar, + ) + reg["binary"] = KindSpec( + "binary", "BINARY", + _ty_binary, _md_none, + _vg_binary, _arr_binary, _set_binary, + ) + reg["symbol"] = KindSpec( + "symbol", "SYMBOL", + _ty_symbol, _md_symbol, + _vg_symbol, _arr_symbol, _set_symbol, + compare_fn=_cmp_symbol, + ) + reg["uuid"] = KindSpec( + "uuid", "UUID", + _ty_fsb16, _md_uuid, + _vg_uuid_lo_hi, _arr_uuid_lo_hi, _set_uuid, + compare_fn=_cmp_uuid_tuple, + params={"width": 16}, + ) + reg["long256"] = KindSpec( + "long256", "LONG256", + _ty_fsb32, _md_none, + _vg_fixed_bytes(32), _arr_fsb, _set_long256, + compare_fn=_cmp_uuid_bytes, + params={"width": 32}, + ) + reg["date"] = KindSpec( + "date", "DATE", + _ty_timestamp, _md_none, + _vg_date, _arr_timestamp, _set_date, + compare_fn=_cmp_timestamp, + params={"unit": "ms"}, + ) + reg["timestamp"] = KindSpec( + "timestamp", "TIMESTAMP", + _ty_timestamp, _md_none, + _vg_ts_us, _arr_timestamp, _set_ts_us, + compare_fn=_cmp_timestamp, + params={"unit": "us"}, + ) + reg["timestamp_ns"] = KindSpec( + "timestamp_ns", "TIMESTAMP_NS", + _ty_timestamp, _md_none, + _vg_ts_ns, _arr_timestamp, _set_ts_ns, + compare_fn=_cmp_timestamp, + params={"unit": "ns"}, + ) + for bits in EDGE_GEOHASH_BITS: + spec = _make_geohash_spec(bits) + reg[spec.name] = spec + reg["decimal64"] = KindSpec( + "decimal64", "DECIMAL(18,4)", + _ty_decimal64, _md_none, + _vg_decimal, _arr_decimal64, _set_decimal_str, + compare_fn=_cmp_decimal, + supports_ilp_setter=True, + params={"scale": 4, "precision": 18, "bound": 10**14}, + ) + reg["decimal128"] = KindSpec( + "decimal128", "DECIMAL(38,10)", + _ty_decimal128, _md_none, + _vg_decimal, _arr_decimal128, _set_decimal_str, + compare_fn=_cmp_decimal, + params={"scale": 10, "precision": 38, "bound": 10**28}, + ) + reg["decimal256"] = KindSpec( + "decimal256", "DECIMAL(76,20)", + _ty_decimal256, _md_none, + _vg_decimal, _arr_decimal256, _set_decimal_str, + compare_fn=_cmp_decimal, + supports_ilp_setter=False, + params={"scale": 20, "precision": 76, "bound": 10**40}, + ) + reg["double_array_1d"] = KindSpec( + "double_array_1d", "DOUBLE[]", + _ty_double_list, _md_none, + _vg_double_array_1d, _arr_double_list, _set_double_array, + compare_fn=_cmp_double_array, + params={"ndim": 1}, + ) + reg["double_array_2d"] = KindSpec( + "double_array_2d", "DOUBLE[][]", + _ty_double_list, _md_none, + _vg_double_array_2d, _arr_double_list, _set_double_array, + compare_fn=_cmp_double_array, + params={"ndim": 2}, + supports_ilp_setter=True, + ) + reg["double_array_3d"] = KindSpec( + "double_array_3d", "DOUBLE[][][]", + _ty_double_list, _md_none, + _vg_double_array_3d, _arr_double_list, _set_double_array, + compare_fn=_cmp_double_array, + params={"ndim": 3}, + supports_ilp_setter=True, + ) + return reg + +KIND_REGISTRY: Dict[str, KindSpec] = _build_kind_registry() + +def build_record_batch( + kinds: List[Tuple[str, KindSpec]], + rnd: Rng, + n: int, + *, + null_mode: str = "valid", # "valid" | "partial" | "all_null" | "edge" + null_p: float = 0.2, + ts_base_us: int = 1_700_000_000_000_000, +) -> pa.RecordBatch: + arrays: List[pa.Array] = [] + fields: List[pa.Field] = [] + for col_name, spec in kinds: + if null_mode == "valid": + mask = all_valid_mask(n) + edge = False + elif null_mode == "partial": + mask = partial_null_mask(rnd, n, null_p=null_p) + edge = False + elif null_mode == "all_null": + mask = all_null_mask(n) + edge = False + elif null_mode == "edge": + mask = all_valid_mask(n) + edge = True + else: + raise ValueError(f"unknown null_mode {null_mode!r}") + values = spec.generate_values(rnd, n, mask, edge=edge) + arr = spec.build_arrow_array(values) + arrays.append(arr) + fields.append(spec.make_field(col_name)) + ts_arr = pa.array( + [ts_base_us + i for i in range(n)], + type=pa.timestamp("us", tz="UTC"), + ) + arrays.append(ts_arr) + fields.append(pa.field("ts", pa.timestamp("us", tz="UTC"), nullable=False)) + return pa.RecordBatch.from_arrays(arrays, schema=pa.schema(fields)) + +def create_table_from_kinds( + fixture, table: str, kinds: List[Tuple[str, KindSpec]], + *, designated_ts: str = "ts", +) -> None: + col_defs = [f'"{n}" {s.ddl}' for n, s in kinds] + col_defs.append(f'"{designated_ts}" TIMESTAMP') + ddl = ( + f"CREATE TABLE '{table}' ({', '.join(col_defs)}) " + f"TIMESTAMP({designated_ts}) PARTITION BY DAY WAL" + ) + exec_ddl(fixture, ddl) + +class ArrowFuzzBase(unittest.TestCase): + """Common skeleton: live-fixture skip, seed echo, table cleanup.""" + + SUITE_LABEL = "arrow_fuzz" + + def setUp(self) -> None: + super().setUp() + try: + import pyarrow + except ImportError: + self.skipTest("pyarrow is required for the Arrow system tests") + self._fixture = get_live_fixture(self) + seed = derive_master_seed() + self._master_rng = Rng(seed) + self._seed_label = format_seed(seed) + sys.stderr.write( + f"[{self.SUITE_LABEL} seed] {self.id()} {self._seed_label}\n" + ) + sys.stderr.flush() + self._created_tables: List[str] = [] + self._exit_stack = contextlib.ExitStack() + + def tearDown(self) -> None: + self._exit_stack.close() + for table in self._created_tables: + drop_table_safe(self._fixture, table) + super().tearDown() + + def track_table(self, table: str) -> None: + self._created_tables.append(table) + + def fresh_table(self, prefix: str) -> str: + table = make_table_name(prefix, self._master_rng) + self.track_table(table) + return table + + def label(self, extra: str = "") -> str: + return f"seed={self._seed_label}{(' ' + extra) if extra else ''}" diff --git a/system_test/arrow_ingress_fuzz.py b/system_test/arrow_ingress_fuzz.py new file mode 100644 index 00000000..ca64c546 --- /dev/null +++ b/system_test/arrow_ingress_fuzz.py @@ -0,0 +1,908 @@ +from __future__ import annotations + +import base64 +import ctypes +import datetime as _dt +import os +import sys +import unittest +import uuid as _uuid_mod +from decimal import Decimal +from typing import Any, Callable, Dict, List, Optional, Tuple + +import pyarrow as pa + +import arrow_fuzz_common as afc +from arrow_fuzz_common import KIND_REGISTRY, KindSpec +from arrow_ffi import ( + ArrowSenderError, + SenderErrorCode, +) +from questdb_line_sender import Buffer, Sender + +_FUZZ_ITERATIONS = int(os.environ.get("ARROW_INGRESS_FUZZ_ITERATIONS", "6")) +_ROWS_PER_BATCH = int(os.environ.get("ARROW_INGRESS_FUZZ_ROWS", "12")) + +def _epoch_us() -> _dt.datetime: + return _dt.datetime(1970, 1, 1, tzinfo=_dt.timezone.utc) + +def _iso_to_us(s: str) -> int: + """ISO datetime string → microseconds since epoch (handles ns suffix).""" + s = s.rstrip("Z") + if "." in s: + head, frac = s.split(".", 1) + if "T" not in head: + head = head.replace(" ", "T") + frac = frac.ljust(6, "0") + us = int(frac[:6]) + ns_tail = frac[6:] + if ns_tail and any(c != "0" for c in ns_tail): + us += int(round(int(ns_tail.ljust(3, "0")[:3]) / 1000.0)) + try: + base_dt = _dt.datetime.fromisoformat(head).replace( + tzinfo=_dt.timezone.utc + ) + except ValueError: + return -1 + return int((base_dt - _epoch_us()).total_seconds() * 1_000_000) + us + head = s.replace(" ", "T") if "T" not in s else s + try: + base_dt = _dt.datetime.fromisoformat(head).replace( + tzinfo=_dt.timezone.utc + ) + except ValueError: + return -1 + return int((base_dt - _epoch_us()).total_seconds() * 1_000_000) + +def _iso_to_ns(s: str) -> int: + s = s.rstrip("Z") + if "." in s: + head, frac = s.split(".", 1) + if "T" not in head: + head = head.replace(" ", "T") + frac = frac.ljust(9, "0")[:9] + ns_part = int(frac) + try: + base_dt = _dt.datetime.fromisoformat(head).replace( + tzinfo=_dt.timezone.utc + ) + except ValueError: + return -1 + return int((base_dt - _epoch_us()).total_seconds() * 1_000_000_000) + ns_part + head = s.replace(" ", "T") if "T" not in s else s + try: + base_dt = _dt.datetime.fromisoformat(head).replace( + tzinfo=_dt.timezone.utc + ) + except ValueError: + return -1 + return int((base_dt - _epoch_us()).total_seconds() * 1_000_000_000) + +def _iso_to_ms(s: str) -> int: + return _iso_to_us(s) // 1_000 + +_INT_NULL_SENTINEL = -(1 << 31) +_LONG_NULL_SENTINEL = -(1 << 63) +_IPV4_NULL_SENTINEL = 0 + + +def _cmp_int(expected, actual) -> bool: + if expected is None or actual is None or actual == "": + return expected is None and (actual is None or actual == "") + return int(expected) == int(actual) + + +def _cmp_int32(expected, actual) -> bool: + if expected == _INT_NULL_SENTINEL: + expected = None + return _cmp_int(expected, actual) + + +def _cmp_int64(expected, actual) -> bool: + if expected == _LONG_NULL_SENTINEL: + expected = None + return _cmp_int(expected, actual) + + +def _cmp_ipv4_with_sentinel(expected, actual) -> bool: + if expected == _IPV4_NULL_SENTINEL: + expected = None + if expected is None: + return actual is None or actual == "" + if isinstance(actual, str): + parts = list(int(expected).to_bytes(4, "big")) + return actual == ".".join(str(p) for p in parts) + return int(actual) == int(expected) + + +_GEOHASH_BASE32 = "0123456789bcdefghjkmnpqrstuvwxyz" + + +def _geohash_decode_server_str(s: str, bits: int) -> int: + if bits % 5 == 0: + result = 0 + for c in s: + try: + result = (result << 5) | _GEOHASH_BASE32.index(c) + except ValueError: + return -1 + return result + result = 0 + for c in s: + if c not in ("0", "1"): + return -1 + result = (result << 1) | (1 if c == "1" else 0) + return result + + +def _cmp_geohash_with_sentinel(bits: int): + storage_w = 8 if bits <= 7 else 16 if bits <= 15 else 32 if bits <= 32 else 64 + storage_sentinel = (1 << storage_w) - 1 + def fn(expected, actual) -> bool: + if expected == storage_sentinel: + expected = None + if expected is None: + return actual is None or actual == "" + if actual is None or actual == "": + return False + if isinstance(actual, str): + decoded = _geohash_decode_server_str(actual, bits) + return decoded == int(expected) + return int(actual) == int(expected) + return fn + +def _is_null_or_special(v): + import math + if v is None or v == "": + return True + try: + f = float(v) + return math.isnan(f) or math.isinf(f) + except (TypeError, ValueError): + return False + + +def _cmp_float(expected, actual) -> bool: + if _is_null_or_special(expected) and _is_null_or_special(actual): + return True + if _is_null_or_special(expected) or _is_null_or_special(actual): + return False + return float(expected) == float(actual) + + +def _cmp_float32(expected, actual) -> bool: + import struct, math + if _is_null_or_special(expected) and _is_null_or_special(actual): + return True + if _is_null_or_special(expected) or _is_null_or_special(actual): + return False + def _f32(v): + try: + return struct.unpack(" bool: + if expected is None: + return actual is None or actual == "" + return str(expected) == str(actual) + +def _cmp_bool(expected, actual) -> bool: + if expected is None: + return actual is None or actual == "" + if isinstance(actual, bool): + return bool(expected) == actual + if isinstance(actual, str): + return ("true" if expected else "false") == actual.lower() + return bool(expected) == bool(actual) + +def _cmp_binary(expected, actual) -> bool: + if expected is None: + return actual is None or actual == "" or actual == [] + if isinstance(actual, list): + if not actual: + return True + try: + return bytes(expected) == bytes(actual) + except (TypeError, ValueError): + return False + if isinstance(actual, str): + if actual.startswith("0x"): + try: + return bytes(expected) == bytes.fromhex(actual[2:]) + except ValueError: + return False + try: + return bytes(expected) == base64.b64decode(actual) + except Exception: + return False + return bytes(expected) == bytes(actual) + +def _cmp_uuid(expected, actual) -> bool: + if expected is None: + return actual is None or actual == "" + lo, hi = expected + expected_int = (hi << 64) | lo + if isinstance(actual, str): + try: + return _uuid_mod.UUID(actual).int == expected_int + except Exception: + return False + if isinstance(actual, (bytes, bytearray)): + return bytes(actual) == lo.to_bytes(8, "little") + hi.to_bytes(8, "little") + return False + +def _cmp_long256(expected, actual) -> bool: + if expected is None: + return actual is None or actual == "" + expected = bytes(expected) + if isinstance(actual, str): + if actual.startswith("0x"): + try: + actual_bytes = bytes.fromhex(actual[2:].zfill(64)) + except ValueError: + return False + return actual_bytes == expected[::-1] or actual_bytes == expected + return False + +def _cmp_decimal(expected, actual, scale: int) -> bool: + if expected is None: + return actual is None or actual == "" + if actual is None or actual == "": + return False + try: + a = Decimal(str(actual)).normalize() + e = (Decimal(int(expected)).scaleb(-scale)).normalize() + return a == e + except Exception: + return False + +def _cmp_date_ms(expected, actual) -> bool: + if expected is None: + return actual is None or actual == "" + if isinstance(actual, str): + return _iso_to_ms(actual) == int(expected) + return int(expected) == int(actual) + +def _cmp_timestamp_us(expected, actual) -> bool: + if expected is None: + return actual is None or actual == "" + if isinstance(actual, str): + return _iso_to_us(actual) == int(expected) + return int(expected) == int(actual) + +def _cmp_timestamp_ns(expected, actual) -> bool: + if expected is None: + return actual is None or actual == "" + if isinstance(actual, str): + return _iso_to_ns(actual) == int(expected) + return int(expected) == int(actual) + +def _cmp_char_codepoint(expected, actual) -> bool: + if expected is None: + return actual is None or actual == "" + if isinstance(actual, str): + if len(actual) == 0: + return expected == 0 + return ord(actual) == int(expected) + return int(actual) == int(expected) + +def _cmp_ipv4(expected, actual) -> bool: + if expected is None: + return actual is None or actual == "" + if isinstance(actual, str): + parts = list(int(expected).to_bytes(4, "big")) + return actual == ".".join(str(p) for p in parts) + return int(actual) == int(expected) + +def _cmp_passthrough(expected, actual) -> bool: + return True + +def _cmp_array(expected, actual) -> bool: + """Best-effort: shape and non-null status; full string parsing is brittle.""" + if expected is None: + return actual is None or actual == "" + return actual is not None and str(actual) != "" + +# kind name → (expected_value, actual_json_cell) -> bool +_INGRESS_ORACLES: Dict[str, Callable[[Any, Any], bool]] = { + "boolean": _cmp_bool, + "byte": _cmp_int, "short": _cmp_int, + "int": _cmp_int32, "long": _cmp_int64, + "float": _cmp_float32, "double": _cmp_float, + "char": _cmp_char_codepoint, + "ipv4": _cmp_ipv4_with_sentinel, + "varchar": _cmp_str, + "binary": _cmp_binary, + "symbol": _cmp_str, + "uuid": _cmp_uuid, + "long256": _cmp_long256, + "date": _cmp_date_ms, + "timestamp": _cmp_timestamp_us, + "timestamp_ns": _cmp_timestamp_ns, + "geohash1": _cmp_geohash_with_sentinel(1), + "geohash5": _cmp_geohash_with_sentinel(5), + "geohash32": _cmp_geohash_with_sentinel(32), + "geohash60": _cmp_geohash_with_sentinel(60), + "decimal64": lambda e, a: _cmp_decimal(e, a, scale=4), + "decimal128": lambda e, a: _cmp_decimal(e, a, scale=10), + "decimal256": lambda e, a: _cmp_decimal(e, a, scale=20), + "double_array_1d": _cmp_array, + "double_array_2d": _cmp_array, + "double_array_3d": _cmp_array, + "long_array_1d": _cmp_array, +} + +def _build_record_batch_with_ts( + rnd: afc.Rng, n: int, kinds: List[Tuple[str, KindSpec]], + *, null_mode: str = "valid", null_p: float = 0.3, + ts_base_us: int = 1_700_000_000_000_000, + include_ts: bool = True, +) -> Tuple[pa.RecordBatch, Dict[str, List[Any]]]: + arrays: List[pa.Array] = [] + fields: List[pa.Field] = [] + values_per_col: Dict[str, List[Any]] = {} + for col_name, spec in kinds: + if null_mode == "valid": + mask = afc.all_valid_mask(n); edge = False + elif null_mode == "partial": + mask = afc.partial_null_mask(rnd, n, null_p=null_p); edge = False + elif null_mode == "all_null": + mask = afc.all_null_mask(n); edge = False + elif null_mode == "edge": + mask = afc.all_valid_mask(n); edge = True + else: + raise ValueError(null_mode) + values = spec.generate_values(rnd, n, mask, edge=edge) + values_per_col[col_name] = values + arrays.append(spec.build_arrow_array(values)) + fields.append(spec.make_field(col_name)) + if include_ts: + ts_values = [ts_base_us + i for i in range(n)] + arrays.append(pa.array(ts_values, type=pa.timestamp("us", tz="UTC"))) + fields.append(pa.field("ts", pa.timestamp("us", tz="UTC"), nullable=False)) + values_per_col["ts"] = ts_values + return pa.RecordBatch.from_arrays(arrays, schema=pa.schema(fields)), values_per_col + +def _read_back_json(fixture, table: str, kinds: List[Tuple[str, KindSpec]]) -> Tuple[list, list]: + cols_sql = ", ".join(f'"{c}"' for c, _ in kinds) + resp = fixture.http_sql_query( + f"select {cols_sql} from '{table}' order by ts" + ) + return resp["columns"], resp["dataset"] + + +def _read_back_arrow_cells(fixture, table: str, kinds: List[Tuple[str, KindSpec]]) -> list: + """Read column 0 cells back via Arrow C ABI (used for kinds that /exec + JSON cannot represent correctly, e.g. BINARY on this server).""" + cols_sql = ", ".join(f'"{c}"' for c, _ in kinds) + rb = afc.read_back_arrow_concat( + fixture, f"select {cols_sql} from '{table}' order by ts" + ) + return [rb.column(0)[r].as_py() for r in range(rb.num_rows)] + +class TestArrowIngressPerKind(afc.ArrowFuzzBase): + """One method per kind. Ingest via Arrow, read back via /exec, compare.""" + + SUITE_LABEL = "arrow_ingress_per_kind" + + def _exercise_kind(self, kind_name: str) -> None: + spec = KIND_REGISTRY[kind_name] + if not spec.supports_arrow_ingest: + self.skipTest(f"kind {kind_name!r} not supported by Arrow ingest") + modes = ["valid", "edge"] + if spec.supports_server_null: + modes[1:1] = ["partial", "all_null"] + for null_mode in modes: + with self.subTest(null_mode=null_mode): + table = self.fresh_table(f"arrow_in_{kind_name}_{null_mode}") + kinds = [(f"c_{kind_name}", spec)] + afc.create_table_from_kinds(self._fixture, table, kinds) + rb, vpc = _build_record_batch_with_ts( + self._master_rng, _ROWS_PER_BATCH, kinds, null_mode=null_mode, + ) + afc.ingest_via_arrow(self._fixture, table, rb) + afc.wait_for_rows(self._fixture, table, rb.num_rows) + expected_col = vpc[f"c_{kind_name}"] + if kind_name == "binary": + dataset = _read_back_arrow_cells( + self._fixture, table, kinds, + ) + self._assert_arrow_binary_matches( + kind_name, expected_col, dataset, null_mode, + ) + else: + _columns, dataset = _read_back_json(self._fixture, table, kinds) + self._assert_dataset_matches( + kind_name, spec, expected_col, dataset, null_mode, + ) + + def _assert_arrow_binary_matches( + self, kind_name: str, expected_values, actual_cells, null_mode: str, + ) -> None: + self.assertEqual( + len(actual_cells), len(expected_values), + self.label(f"row count for kind={kind_name} mode={null_mode}"), + ) + for r, (e, a) in enumerate(zip(expected_values, actual_cells)): + if e is None: + if a not in (None, b""): + self.fail(self.label( + f"kind={kind_name} mode={null_mode} row={r}: " + f"expected=None actual={a!r}" + )) + continue + if bytes(e) != bytes(a if a is not None else b""): + self.fail(self.label( + f"kind={kind_name} mode={null_mode} row={r}: " + f"expected={bytes(e)!r} actual={a!r}" + )) + + def _assert_dataset_matches( + self, kind_name: str, spec: KindSpec, + expected_values, dataset, null_mode: str, + ) -> None: + self.assertEqual( + len(dataset), len(expected_values), + self.label(f"row count for kind={kind_name} mode={null_mode}"), + ) + oracle = _INGRESS_ORACLES.get(kind_name, _cmp_passthrough) + for r, (expected, row) in enumerate(zip(expected_values, dataset)): + actual = row[0] + if not oracle(expected, actual): + self.fail(self.label( + f"kind={kind_name} mode={null_mode} row={r}: " + f"expected={expected!r} actual={actual!r}" + )) + +for _kind_name in list(KIND_REGISTRY.keys()): + def _make(name): + def test(self): + self._exercise_kind(name) + test.__name__ = f"test_kind_{name}" + test.__qualname__ = f"TestArrowIngressPerKind.test_kind_{name}" + return test + setattr(TestArrowIngressPerKind, f"test_kind_{_kind_name}", _make(_kind_name)) + +class TestArrowIngressDesignatedTs(afc.ArrowFuzzBase): + """Each designated-timestamp mode (column / server-now) against a small mixed batch.""" + + SUITE_LABEL = "arrow_ingress_dts" + + def _build_small_batch(self): + kinds = [ + ("c_int", KIND_REGISTRY["int"]), + ("c_sym", KIND_REGISTRY["symbol"]), + ("c_double", KIND_REGISTRY["double"]), + ] + rb, _vpc = _build_record_batch_with_ts( + self._master_rng, _ROWS_PER_BATCH, kinds, null_mode="valid", + ) + return rb, kinds + + def test_dts_column_micros(self): + rb, kinds = self._build_small_batch() + table = self.fresh_table("arrow_in_dts_col_us") + afc.ingest_via_arrow(self._fixture, table, rb, + ts_col=b"ts") + afc.wait_for_rows(self._fixture, table, rb.num_rows) + resp = self._fixture.http_sql_query(f"select count() from '{table}'") + self.assertEqual(int(resp["dataset"][0][0]), rb.num_rows, self.label()) + + def test_dts_column_nanos(self): + # Replace ts column with ns precision. + kinds = [("c_int", KIND_REGISTRY["int"])] + n = _ROWS_PER_BATCH + vs = KIND_REGISTRY["int"].generate_values( + self._master_rng, n, afc.all_valid_mask(n), edge=False, + ) + arr_int = KIND_REGISTRY["int"].build_arrow_array(vs) + ts_ns_base = 1_700_000_000_000_000_000 + ts_arr = pa.array( + [ts_ns_base + i for i in range(n)], + type=pa.timestamp("ns", tz="UTC"), + ) + schema = pa.schema([ + KIND_REGISTRY["int"].make_field("c_int"), + pa.field("ts", pa.timestamp("ns", tz="UTC"), nullable=False), + ]) + rb = pa.RecordBatch.from_arrays([arr_int, ts_arr], schema=schema) + table = self.fresh_table("arrow_in_dts_col_ns") + afc.ingest_via_arrow(self._fixture, table, rb, + ts_col=b"ts") + afc.wait_for_rows(self._fixture, table, rb.num_rows) + + def test_dts_default(self): + rb, kinds = self._build_small_batch() + no_ts_fields = [f for f in rb.schema if f.name != "ts"] + no_ts_arrays = [rb.column(rb.schema.get_field_index(f.name)) + for f in no_ts_fields] + rb_no_ts = pa.RecordBatch.from_arrays( + no_ts_arrays, schema=pa.schema(no_ts_fields), + ) + table = self.fresh_table("arrow_in_dts_default") + afc.ingest_via_arrow(self._fixture, table, rb_no_ts, ts_col=None) + afc.wait_for_rows(self._fixture, table, rb_no_ts.num_rows) + +class TestArrowIngressErrors(afc.ArrowFuzzBase): + """Deterministic recipes for each reachable line_sender_error_code.""" + + SUITE_LABEL = "arrow_ingress_errors" + + def _expect_code(self, rb: pa.RecordBatch, expected_code: int, *, + ts_col: Optional[bytes] = b"ts", + extras=None) -> ArrowSenderError: + table = f"arrow_in_err_{self._master_rng.next_int(2**32):08x}" + try: + afc.ingest_via_arrow( + self._fixture, table, rb, + ts_col=ts_col, + sender_conf_extras=extras or {}, + ) + except ArrowSenderError as e: + if e.code != expected_code: + self.fail(self.label( + f"expected code={expected_code} got code={e.code} msg={e}" + )) + return e + else: + self.fail(self.label( + f"expected ArrowSenderError code={expected_code} but call succeeded" + )) + + def test_err_designated_ts_column_missing(self): + rb, _ = _build_record_batch_with_ts( + self._master_rng, 4, + [("c_int", KIND_REGISTRY["int"])], + null_mode="valid", + ) + self._expect_code(rb, SenderErrorCode.ARROW_INGEST, + ts_col=b"definitely_not_a_column") + + def test_err_designated_ts_wrong_type(self): + # Build a batch where "ts" is Int64, not Timestamp. + n = 4 + vs = list(range(n)) + arr_int = pa.array(vs, type=pa.int64()) + ts_arr = pa.array(vs, type=pa.int64()) + schema = pa.schema([ + pa.field("c_int", pa.int64(), nullable=True), + pa.field("ts", pa.int64(), nullable=True), + ]) + rb = pa.RecordBatch.from_arrays([arr_int, ts_arr], schema=schema) + self._expect_code(rb, SenderErrorCode.ARROW_INGEST) + + def test_err_designated_ts_has_nulls(self): + n = 4 + c_int = pa.array([1, 2, 3, 4], type=pa.int64()) + ts_arr = pa.array([1_700_000_000_000_000, None, + 1_700_000_000_000_002, 1_700_000_000_000_003], + type=pa.timestamp("us", tz="UTC")) + schema = pa.schema([ + pa.field("c_int", pa.int64(), nullable=True), + pa.field("ts", pa.timestamp("us", tz="UTC"), nullable=True), + ]) + rb = pa.RecordBatch.from_arrays([c_int, ts_arr], schema=schema) + self._expect_code(rb, SenderErrorCode.ARROW_INGEST) + + def test_err_fsb16_without_uuid_metadata(self): + n = 4 + c_fsb = pa.array([b"x" * 16] * n, type=pa.binary(16)) + ts_arr = pa.array( + [1_700_000_000_000_000 + i for i in range(n)], + type=pa.timestamp("us", tz="UTC"), + ) + schema = pa.schema([ + pa.field("c_fsb", pa.binary(16), nullable=True), # no metadata + pa.field("ts", pa.timestamp("us", tz="UTC"), nullable=False), + ]) + rb = pa.RecordBatch.from_arrays([c_fsb, ts_arr], schema=schema) + self._expect_code(rb, SenderErrorCode.ARROW_UNSUPPORTED_COLUMN_KIND) + + def test_err_list_non_float_leaf(self): + n = 4 + c_list = pa.array([[1, 2], [3], [], [4, 5, 6]], type=pa.list_(pa.int64())) + # int64 list IS supported as LONG_ARRAY now — pick a non-numeric leaf. + c_str_list = pa.array( + [["a"], ["b", "c"], [], ["d"]], + type=pa.list_(pa.string()), + ) + ts_arr = pa.array( + [1_700_000_000_000_000 + i for i in range(n)], + type=pa.timestamp("us", tz="UTC"), + ) + schema = pa.schema([ + pa.field("c_str_list", pa.list_(pa.string()), nullable=True), + pa.field("ts", pa.timestamp("us", tz="UTC"), nullable=False), + ]) + rb = pa.RecordBatch.from_arrays([c_str_list, ts_arr], schema=schema) + self._expect_code(rb, SenderErrorCode.ARROW_UNSUPPORTED_COLUMN_KIND) + + def test_err_geohash_bits_zero(self): + n = 4 + c_geo = pa.array([0] * n, type=pa.int32()) + ts_arr = pa.array( + [1_700_000_000_000_000 + i for i in range(n)], + type=pa.timestamp("us", tz="UTC"), + ) + schema = pa.schema([ + pa.field("c_geo", pa.int32(), nullable=True, + metadata={b"questdb.geohash_bits": b"0"}), + pa.field("ts", pa.timestamp("us", tz="UTC"), nullable=False), + ]) + rb = pa.RecordBatch.from_arrays([c_geo, ts_arr], schema=schema) + self._expect_code(rb, SenderErrorCode.ARROW_INGEST) + + def test_err_geohash_bits_too_large(self): + n = 4 + c_geo = pa.array([0] * n, type=pa.int64()) + ts_arr = pa.array( + [1_700_000_000_000_000 + i for i in range(n)], + type=pa.timestamp("us", tz="UTC"), + ) + schema = pa.schema([ + pa.field("c_geo", pa.int64(), nullable=True, + metadata={b"questdb.geohash_bits": b"61"}), + pa.field("ts", pa.timestamp("us", tz="UTC"), nullable=False), + ]) + rb = pa.RecordBatch.from_arrays([c_geo, ts_arr], schema=schema) + self._expect_code(rb, SenderErrorCode.ARROW_INGEST) + +class TestArrowIngressExtraTypes(afc.ArrowFuzzBase): + """Arrow primitive variants that don't surface via polars but are + accepted by the Rust ingest path through a widening / unit conversion: + Float16, Date64, Timestamp(s), Decimal32.""" + + SUITE_LABEL = "arrow_ingress_extra_types" + + def _ts_arr(self, n: int) -> pa.Array: + return pa.array( + [1_700_000_000_000_000 + i for i in range(n)], + type=pa.timestamp("us", tz="UTC"), + ) + + def _ingest_one_col(self, table: str, ddl_col: str, col_name: str, + col_arr: pa.Array) -> None: + afc.exec_ddl( + self._fixture, + f'CREATE TABLE "{table}" ("{col_name}" {ddl_col}, ts TIMESTAMP) ' + f'TIMESTAMP(ts) PARTITION BY DAY WAL', + ) + ts_arr = self._ts_arr(len(col_arr)) + schema = pa.schema([ + pa.field(col_name, col_arr.type, nullable=True), + pa.field("ts", pa.timestamp("us", tz="UTC"), nullable=False), + ]) + rb = pa.RecordBatch.from_arrays([col_arr, ts_arr], schema=schema) + afc.ingest_via_arrow(self._fixture, table, rb, + ts_col=b"ts") + afc.wait_for_rows(self._fixture, table, len(col_arr)) + + def test_extra_float16_widens_to_double(self): + try: + import numpy as np + except ImportError: + self.skipTest("numpy required to build Float16 arrays via pyarrow") + arr = pa.array(np.array([1.5, -2.5, 0.0, 1.0], dtype=np.float16)) + self.assertEqual(arr.type, pa.float16()) + table = self.fresh_table("arrow_extra_f16") + self._ingest_one_col(table, "FLOAT", "c", arr) + + def test_extra_date64_appends_as_date(self): + # Date64 stores ms-since-epoch as i64. + day_ms = 86_400_000 + arr = pa.array([0, day_ms * 19_675, day_ms * 20_000, None], + type=pa.date64()) + table = self.fresh_table("arrow_extra_d64") + self._ingest_one_col(table, "DATE", "c", arr) + + def test_extra_timestamp_second_widens_to_micros(self): + arr = pa.array([1_700_000_000, 0, 1, None], + type=pa.timestamp("s", tz="UTC")) + table = self.fresh_table("arrow_extra_ts_s") + self._ingest_one_col(table, "TIMESTAMP", "c", arr) + + def test_extra_decimal32_widens_to_decimal64(self): + arr = pa.array([Decimal("1.23"), Decimal("-0.99"), + Decimal("99.99"), None], + type=pa.decimal32(9, 2)) + table = self.fresh_table("arrow_extra_d32") + self._ingest_one_col(table, "DECIMAL(18, 2)", "c", arr) + + +class TestArrowIngressUnsupportedTypes(afc.ArrowFuzzBase): + """Arrow primitive variants that QuestDB ingress explicitly rejects + with ARROW_UNSUPPORTED_COLUMN_KIND.""" + + SUITE_LABEL = "arrow_ingress_unsupported" + + def _expect_unsupported(self, col_arr: pa.Array) -> None: + n = len(col_arr) + ts_arr = pa.array( + [1_700_000_000_000_000 + i for i in range(n)], + type=pa.timestamp("us", tz="UTC"), + ) + schema = pa.schema([ + pa.field("c", col_arr.type, nullable=True), + pa.field("ts", pa.timestamp("us", tz="UTC"), nullable=False), + ]) + rb = pa.RecordBatch.from_arrays([col_arr, ts_arr], schema=schema) + table = self.fresh_table("arrow_in_reject") + try: + afc.ingest_via_arrow(self._fixture, table, rb, + ts_col=b"ts") + except ArrowSenderError as e: + self.assertEqual( + e.code, SenderErrorCode.ARROW_UNSUPPORTED_COLUMN_KIND, + self.label(f"code={e.code} msg={e}") + ) + return + self.fail(self.label( + f"expected ARROW_UNSUPPORTED_COLUMN_KIND for arrow type {col_arr.type}" + )) + + def test_reject_interval_month_day_nano(self): + arr = pa.array([(1, 2, 3)], type=pa.month_day_nano_interval()) + self._expect_unsupported(arr) + + def test_reject_map_string_int32(self): + arr = pa.array([[("k", 1)], [("q", 2)]], + type=pa.map_(pa.string(), pa.int32())) + self._expect_unsupported(arr) + + def test_reject_struct(self): + arr = pa.StructArray.from_arrays( + [pa.array([1, 2], type=pa.int32()), + pa.array(["a", "b"], type=pa.string())], + names=["x", "y"], + ) + self._expect_unsupported(arr) + + def test_reject_dense_union(self): + arr = pa.UnionArray.from_dense( + pa.array([0, 1, 0], type=pa.int8()), + pa.array([0, 0, 1], type=pa.int32()), + [pa.array([1, 2]), pa.array(["x"])], + ["i", "s"], + ) + self._expect_unsupported(arr) + + def test_reject_run_end_encoded(self): + arr = pa.RunEndEncodedArray.from_arrays([3], pa.array([42])) + self._expect_unsupported(arr) + + def test_reject_fixed_size_binary_non_uuid_width(self): + arr = pa.array([b"12345678"], type=pa.binary(8)) + self._expect_unsupported(arr) + + def test_reject_null(self): + arr = pa.array([None, None, None], type=pa.null()) + self._expect_unsupported(arr) + + +class TestArrowIngressMultiBatch(afc.ArrowFuzzBase): + """Multiple `buffer_append_arrow` calls on one Buffer before flush.""" + + SUITE_LABEL = "arrow_ingress_multi_batch" + + def _ingest_two_batches(self, table: str, rb1: pa.RecordBatch, + rb2: pa.RecordBatch) -> None: + from arrow_ffi import ( + buffer_append_arrow, pyarrow_export_record_batch, + ) + from questdb_line_sender import _table_name as _c_table_name + with afc.existing_sender(self._fixture) as sender: + buf = Buffer.from_sender(sender._impl) + for rb in (rb1, rb2): + table_name = _c_table_name(table) + arr, sch = pyarrow_export_record_batch(rb) + try: + buffer_append_arrow( + buf._impl, table_name, + ctypes.byref(arr), ctypes.byref(sch), + ts_column_name=b"ts", + ) + finally: + if sch.release: + sch.release(ctypes.byref(sch)) + sender.flush(buf) + + def test_identical_schema_two_batches_accumulate(self): + table = self.fresh_table("arrow_in_mb_same") + kinds = [("c_int", KIND_REGISTRY["int"])] + rb1, _ = _build_record_batch_with_ts( + self._master_rng, 5, kinds, null_mode="valid", + ) + rb2, _ = _build_record_batch_with_ts( + self._master_rng, 7, kinds, null_mode="valid", + ts_base_us=1_700_000_010_000_000, + ) + self._ingest_two_batches(table, rb1, rb2) + afc.wait_for_rows(self._fixture, table, 12) + + def test_schema_grows_new_column_in_batch2_rejected(self): + # QWP/WS Arrow ingest requires consistent column set per buffer: + # adding a column in batch 2 leaves batch-1 columns short of rows + # and is rejected client-side. + table = self.fresh_table("arrow_in_mb_grow") + kinds1 = [("c_int", KIND_REGISTRY["int"])] + rb1, _ = _build_record_batch_with_ts( + self._master_rng, 4, kinds1, null_mode="valid", + ) + kinds2 = [ + ("c_int", KIND_REGISTRY["int"]), + ("c_sym", KIND_REGISTRY["symbol"]), + ] + rb2, _ = _build_record_batch_with_ts( + self._master_rng, 4, kinds2, null_mode="valid", + ts_base_us=1_700_000_010_000_000, + ) + with self.assertRaises(ArrowSenderError) as cm: + self._ingest_two_batches(table, rb1, rb2) + self.assertEqual(cm.exception.code, SenderErrorCode.INVALID_API_CALL, + self.label(f"msg={cm.exception}")) + + def test_schema_drops_column_in_batch2_rejected(self): + table = self.fresh_table("arrow_in_mb_drop") + kinds_a = [ + ("c_int", KIND_REGISTRY["int"]), + ("c_sym", KIND_REGISTRY["symbol"]), + ] + kinds_b = [("c_int", KIND_REGISTRY["int"])] + rb1, _ = _build_record_batch_with_ts( + self._master_rng, 4, kinds_a, null_mode="valid", + ) + rb2, _ = _build_record_batch_with_ts( + self._master_rng, 4, kinds_b, null_mode="valid", + ts_base_us=1_700_000_010_000_000, + ) + with self.assertRaises(ArrowSenderError) as cm: + self._ingest_two_batches(table, rb1, rb2) + self.assertEqual(cm.exception.code, SenderErrorCode.INVALID_API_CALL, + self.label(f"msg={cm.exception}")) + +class TestArrowIngressFuzz(afc.ArrowFuzzBase): + """Random subsets of kinds × random null modes × random DTS variants.""" + + SUITE_LABEL = "arrow_ingress_fuzz" + + def test_random_arrow_ingest(self): + full_pool = [ + (n, s) for n, s in KIND_REGISTRY.items() + if s.supports_arrow_ingest + ] + nullable_pool = [(n, s) for n, s in full_pool if s.supports_server_null] + for it in range(_FUZZ_ITERATIONS): + with self.subTest(iter=it): + null_mode = ("valid", "partial", "all_null")[it % 3] + pool = full_pool if null_mode == "valid" else nullable_pool + self._master_rng.shuffle(pool) + picked = pool[: 4 + (it % 4)] + kinds = [(f"c{i}_{n}", s) for i, (n, s) in enumerate(picked)] + rb, _vpc = _build_record_batch_with_ts( + self._master_rng, _ROWS_PER_BATCH, kinds, + null_mode=null_mode, + ) + table = self.fresh_table(f"arrow_in_fuzz_{it}") + afc.create_table_from_kinds(self._fixture, table, kinds) + afc.ingest_via_arrow(self._fixture, table, rb) + afc.wait_for_rows(self._fixture, table, rb.num_rows) + +def register(loop_registry): + loop_registry.append(TestArrowIngressPerKind) + loop_registry.append(TestArrowIngressDesignatedTs) + loop_registry.append(TestArrowIngressErrors) + loop_registry.append(TestArrowIngressExtraTypes) + loop_registry.append(TestArrowIngressUnsupportedTypes) + loop_registry.append(TestArrowIngressMultiBatch) + loop_registry.append(TestArrowIngressFuzz) + +if __name__ == "__main__": + print( + "Note: arrow_ingress_fuzz tests require a live QuestDB fixture. " + "Run via `python test.py run --existing HOST:ILP:HTTP " + "TestArrowIngressPerKind` (or any of the other arrow ingress classes).", + file=sys.stderr, + ) + unittest.main() diff --git a/system_test/arrow_polars_fuzz.py b/system_test/arrow_polars_fuzz.py new file mode 100644 index 00000000..fec0cc36 --- /dev/null +++ b/system_test/arrow_polars_fuzz.py @@ -0,0 +1,272 @@ +from __future__ import annotations + +import os +import unittest +from typing import Dict, List, Tuple + +import pyarrow as pa + +import arrow_fuzz_common as afc +from arrow_fuzz_common import KIND_REGISTRY, KindSpec + +_FUZZ_ITERATIONS = int(os.environ.get("ARROW_POLARS_FUZZ_ITERATIONS", "6")) +_ROWS_PER_BATCH = int(os.environ.get("ARROW_POLARS_FUZZ_ROWS", "10")) + + +def _require_polars(testcase: unittest.TestCase): + try: + import polars as pl + except ImportError: + testcase.skipTest("polars is required for the Arrow-Polars round-trip tests") + + +def _polars_round_trip_capable(spec: KindSpec) -> bool: + if not (spec.round_trip_capable + and spec.supports_arrow_ingest + and spec.supports_arrow_egress): + return False + if spec.metadata(): + return False + if spec.name == "long256": + return False + if spec.name in ("decimal64", "decimal128", "decimal256"): + return False + if spec.name.startswith("double_array") or spec.name == "long_array_1d": + return False + return True + + +def _polars_round_trip_kinds() -> List[Tuple[str, KindSpec]]: + return [(n, s) for n, s in KIND_REGISTRY.items() if _polars_round_trip_capable(s)] + + +def _build_batch( + rnd: afc.Rng, n: int, kinds: List[Tuple[str, KindSpec]], + *, null_mode: str, ts_base_us: int, +) -> Tuple[pa.RecordBatch, Dict[str, list]]: + arrays: List[pa.Array] = [] + fields: List[pa.Field] = [] + vpc: Dict[str, list] = {} + for col_name, spec in kinds: + if null_mode == "valid": + mask = afc.all_valid_mask(n) + edge = False + elif null_mode == "partial": + mask = afc.partial_null_mask(rnd, n, null_p=0.3) + edge = False + elif null_mode == "all_null": + mask = afc.all_null_mask(n) + edge = False + elif null_mode == "edge": + mask = afc.all_valid_mask(n) + edge = True + else: + raise ValueError(null_mode) + vs = spec.generate_values(rnd, n, mask, edge=edge) + vpc[col_name] = vs + arrays.append(spec.build_arrow_array(vs)) + fields.append(spec.make_field(col_name)) + ts_arr = pa.array( + [ts_base_us + i for i in range(n)], + type=pa.timestamp("us", tz="UTC"), + ) + arrays.append(ts_arr) + fields.append(pa.field("ts", pa.timestamp("us", tz="UTC"), nullable=False)) + return pa.RecordBatch.from_arrays(arrays, schema=pa.schema(fields)), vpc + + +def _rb_to_polars(rb: pa.RecordBatch): + import polars as pl + return pl.from_arrow(rb) + + +def _polars_to_rb(df) -> pa.RecordBatch: + arrow_obj = df.to_arrow() + if isinstance(arrow_obj, pa.Table): + batches = arrow_obj.to_batches() + if len(batches) != 1: + raise AssertionError( + f"polars.to_arrow() produced {len(batches)} batches, expected 1" + ) + return batches[0] + return arrow_obj + + +def _read_back(fixture, table: str, kinds: List[Tuple[str, KindSpec]]) -> pa.RecordBatch: + cols_sql = ", ".join(f'"{c}"' for c, _ in kinds) + sql = f"select {cols_sql} from '{table}' order by ts" + return afc.read_back_arrow_concat(fixture, sql) + + +def _scalar_to_python(scalar, spec: KindSpec): + if scalar is None: + return None + if spec.name in ("timestamp", "timestamp_ns", "date") and hasattr(scalar, "value"): + if not scalar.is_valid: + return None + return scalar.value + try: + return scalar.as_py() + except (ValueError, OverflowError): + return getattr(scalar, "value", None) + + +def _canonicalise_value(value, spec: KindSpec): + if value is None: + return None + import datetime as _dt + from decimal import Decimal + if isinstance(value, _dt.datetime): + unit = spec.params.get("unit", "us") + divisor = {"s": 1, "ms": 1_000, "us": 1_000_000, "ns": 1_000_000_000}[unit] + if value.tzinfo is None: + value = value.replace(tzinfo=_dt.timezone.utc) + epoch = _dt.datetime(1970, 1, 1, tzinfo=_dt.timezone.utc) + return int(round((value - epoch).total_seconds() * divisor)) + if isinstance(value, Decimal): + scale = spec.params.get("scale", 0) + return int(value.scaleb(scale)) + return value + + +class TestArrowPolarsRoundTripPerKind(afc.ArrowFuzzBase): + SUITE_LABEL = "arrow_polars_round_trip_per_kind" + + def setUp(self) -> None: + super().setUp() + _require_polars(self) + + def _exercise_kind(self, kind_name: str) -> None: + spec = KIND_REGISTRY[kind_name] + if not _polars_round_trip_capable(spec): + self.skipTest( + f"kind {kind_name!r} not currently round-trippable via polars" + ) + modes = ["valid", "edge"] + if spec.supports_server_null: + modes[1:1] = ["partial", "all_null"] + for null_mode in modes: + with self.subTest(null_mode=null_mode): + table = self.fresh_table(f"arrow_pl_{kind_name}_{null_mode}") + kinds = [(f"c_{kind_name}", spec)] + afc.create_table_from_kinds(self._fixture, table, kinds) + ts_base = 1_700_000_000_000_000 + self._master_rng.next_int(1_000_000) + rb_orig, _vpc = _build_batch( + self._master_rng, _ROWS_PER_BATCH, kinds, + null_mode=null_mode, ts_base_us=ts_base, + ) + df_send = _rb_to_polars(rb_orig) + rb_send = _polars_to_rb(df_send) + afc.ingest_via_arrow(self._fixture, table, rb_send) + afc.wait_for_rows(self._fixture, table, rb_send.num_rows) + rb_recv = _read_back(self._fixture, table, kinds) + df_recv = _rb_to_polars(rb_recv) + rb_recv_pl = _polars_to_rb(df_recv) + self._assert_polars_round_trip( + rb_orig, rb_recv_pl, kinds, null_mode, + ) + + def _assert_polars_round_trip( + self, rb_in: pa.RecordBatch, rb_out: pa.RecordBatch, + kinds: List[Tuple[str, KindSpec]], null_mode: str, + ) -> None: + col_name, spec = kinds[0] + self.assertEqual( + rb_out.num_rows, rb_in.num_rows, + self.label(f"row count kind={spec.name} mode={null_mode}"), + ) + for r in range(rb_in.num_rows): + ev = _canonicalise_value( + _scalar_to_python(rb_in.column(0)[r], spec), spec) + av = _canonicalise_value( + _scalar_to_python(rb_out.column(0)[r], spec), spec) + if not spec.compare(av, ev): + self.fail(self.label( + f"kind={spec.name} mode={null_mode} row={r}: " + f"in={ev!r} out={av!r}" + )) + + +for _kind_name in list(KIND_REGISTRY.keys()): + if not _polars_round_trip_capable(KIND_REGISTRY[_kind_name]): + continue + + + def _make(name): + def test(self): + self._exercise_kind(name) + + test.__name__ = f"test_pl_{name}" + test.__qualname__ = f"TestArrowPolarsRoundTripPerKind.test_pl_{name}" + return test + + + setattr(TestArrowPolarsRoundTripPerKind, f"test_pl_{_kind_name}", _make(_kind_name)) + + +class TestArrowPolarsFuzz(afc.ArrowFuzzBase): + SUITE_LABEL = "arrow_polars_fuzz" + + def setUp(self) -> None: + super().setUp() + _require_polars(self) + + def _run_iteration(self, it: int, null_mode: str) -> None: + full_pool = _polars_round_trip_kinds() + if null_mode in ("partial", "all_null"): + pool = [(n, s) for n, s in full_pool if s.supports_server_null] + else: + pool = full_pool + self._master_rng.shuffle(pool) + picked = pool[: 3 + (it % 3)] + if not picked: + return + kinds = [(f"c{i}_{n}", s) for i, (n, s) in enumerate(picked)] + table = self.fresh_table(f"arrow_pl_fuzz_{it}") + afc.create_table_from_kinds(self._fixture, table, kinds) + ts_base = 1_700_000_000_000_000 + it * 10_000_000 + rb_orig, _vpc = _build_batch( + self._master_rng, _ROWS_PER_BATCH, kinds, + null_mode=null_mode, ts_base_us=ts_base, + ) + df_send = _rb_to_polars(rb_orig) + rb_send = _polars_to_rb(df_send) + afc.ingest_via_arrow(self._fixture, table, rb_send) + afc.wait_for_rows(self._fixture, table, rb_send.num_rows) + rb_recv = _read_back(self._fixture, table, kinds) + df_recv = _rb_to_polars(rb_recv) + rb_recv_pl = _polars_to_rb(df_recv) + self.assertEqual( + rb_recv_pl.num_rows, rb_orig.num_rows, + self.label(f"iter={it} mode={null_mode}"), + ) + for col_idx, (col_name, spec) in enumerate(kinds): + for r in range(rb_orig.num_rows): + ev = _canonicalise_value( + _scalar_to_python(rb_orig.column(col_idx)[r], spec), spec) + av = _canonicalise_value( + _scalar_to_python(rb_recv_pl.column(col_idx)[r], spec), spec) + if not spec.compare(av, ev): + self.fail(self.label( + f"iter={it} mode={null_mode} kind={spec.name} " + f"col={col_name} row={r}: in={ev!r} out={av!r}" + )) + + def test_random_valid(self): + for it in range(_FUZZ_ITERATIONS): + with self.subTest(iter=it): + self._run_iteration(it, "valid") + + def test_random_partial_null(self): + for it in range(_FUZZ_ITERATIONS): + with self.subTest(iter=it): + self._run_iteration(it, "partial") + + +def register(loop_registry): + loop_registry.append(TestArrowPolarsRoundTripPerKind) + loop_registry.append(TestArrowPolarsFuzz) + + +if __name__ == "__main__": + unittest.main() diff --git a/system_test/arrow_polars_per_dtype.py b/system_test/arrow_polars_per_dtype.py new file mode 100644 index 00000000..ce46fae0 --- /dev/null +++ b/system_test/arrow_polars_per_dtype.py @@ -0,0 +1,594 @@ +from __future__ import annotations + +import os +import sys +import unittest +from typing import Any, Callable, Optional + +import pyarrow as pa + +import arrow_fuzz_common as afc +from arrow_ffi import ArrowSenderError, SenderErrorCode + + +_ROWS = 4 +_TS_BASE_US = 1_700_000_000_000_000 + + +def _require_polars(testcase: unittest.TestCase): + try: + import polars as pl + except ImportError: + testcase.skipTest("polars is required for the Arrow-Polars dtype coverage tests") + + +def _polars_to_rb(df) -> pa.RecordBatch: + arrow_obj = df.to_arrow() + if isinstance(arrow_obj, pa.Table): + batches = arrow_obj.to_batches() + if len(batches) != 1: + raise AssertionError( + f"polars.to_arrow() produced {len(batches)} batches, expected 1" + ) + return batches[0] + return arrow_obj + + +def _ts_series_ns(pl, n: int): + return pl.Series( + "ts", + [_TS_BASE_US * 1000 + i for i in range(n)], + dtype=pl.Datetime("ns", time_zone="UTC"), + ) + + +def _create_table(fixture, table: str, ddl_body: str) -> None: + afc.exec_ddl( + fixture, + f"CREATE TABLE '{table}' ({ddl_body}, ts TIMESTAMP) " + f"TIMESTAMP(ts) PARTITION BY DAY WAL", + ) + + +def _try_ingest(testcase, table: str, df) -> Optional[Exception]: + try: + rb = _polars_to_rb(df) + afc.ingest_via_arrow(testcase._fixture, table, rb, ts_col=b"ts") + return None + except unittest.SkipTest: + # Let unittest propagate the skip; never wrap it as a returned error. + raise + except Exception as e: + return e + + +def _wait_or_zero(testcase, table: str, expected: int, timeout: float = 8.0) -> int: + import time as _t + deadline = _t.monotonic() + timeout + last = 0 + while _t.monotonic() < deadline: + try: + resp = testcase._fixture.http_sql_query( + f"select count() from '{table}'") + last = int(resp["dataset"][0][0]) + if last >= expected: + return last + except Exception: + pass + _t.sleep(0.1) + return last + + +class TestArrowPolarsPerDtype(afc.ArrowFuzzBase): + """One test method per polars data type. Supported dtypes must + round-trip cleanly; unsupported ones must surface a deterministic + error — either a client-side ``ArrowSenderError`` with a specific + ``line_sender_error_code`` or a server-side rejection that leaves + the pre-created table at 0 rows.""" + + SUITE_LABEL = "arrow_polars_per_dtype" + + def setUp(self) -> None: + super().setUp() + _require_polars(self) + + def _expect_success(self, table: str, df, ddl_body: str) -> None: + _create_table(self._fixture, table, ddl_body) + err = _try_ingest(self, table, df) + if err is not None: + self.fail(self.label( + f"polars round-trip expected to succeed; " + f"got {type(err).__name__}: {err}" + )) + rows = _wait_or_zero(self, table, df.height) + self.assertEqual(rows, df.height, self.label( + f"row count after polars round-trip; got {rows} want {df.height}")) + + def _expect_client_reject(self, df, expected_code: int) -> None: + table = self.fresh_table("polars_reject") + err = _try_ingest(self, table, df) + if not isinstance(err, ArrowSenderError): + self.fail(self.label( + f"expected ArrowSenderError, got {type(err).__name__ if err else 'None'}: {err}" + )) + self.assertEqual( + err.code, expected_code, + self.label(f"expected code={expected_code} got code={err.code} msg={err}") + ) + + def _expect_server_reject(self, df, ddl_body: str) -> None: + table = self.fresh_table("polars_server_reject") + _create_table(self._fixture, table, ddl_body) + _try_ingest(self, table, df) + rows = _wait_or_zero(self, table, 1, timeout=3.0) + self.assertEqual( + rows, 0, + self.label(f"expected server to reject batch (0 rows); got {rows}") + ) + + def _maybe_skip(self, fn: Callable[[], Any], reason_prefix: str) -> Any: + try: + return fn() + except Exception as e: + self.skipTest(f"{reason_prefix}: {e}") + + # ---- Supported: round-trip required --------------------------------- + + def test_dtype_boolean(self): + import polars as pl + table = self.fresh_table("polars_boolean") + df = pl.DataFrame({ + "c": pl.Series([True, False, True, False], dtype=pl.Boolean), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" BOOLEAN') + + def test_dtype_int8(self): + import polars as pl + table = self.fresh_table("polars_int8") + df = pl.DataFrame({ + "c": pl.Series([1, -2, 0, 3], dtype=pl.Int8), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" BYTE') + + def test_dtype_int16(self): + import polars as pl + table = self.fresh_table("polars_int16") + df = pl.DataFrame({ + "c": pl.Series([100, -100, 0, 200], dtype=pl.Int16), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" SHORT') + + def test_dtype_int32(self): + import polars as pl + table = self.fresh_table("polars_int32") + df = pl.DataFrame({ + "c": pl.Series([1, -1, 0, 1_000_000], dtype=pl.Int32), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" INT') + + def test_dtype_int64(self): + import polars as pl + table = self.fresh_table("polars_int64") + df = pl.DataFrame({ + "c": pl.Series([1, -1, 0, 1_000_000_000_000], dtype=pl.Int64), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" LONG') + + def test_dtype_float32(self): + import polars as pl + table = self.fresh_table("polars_float32") + df = pl.DataFrame({ + "c": pl.Series([1.5, -2.5, 0.0, 3.25], dtype=pl.Float32), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" FLOAT') + + def test_dtype_float64(self): + import polars as pl + table = self.fresh_table("polars_float64") + df = pl.DataFrame({ + "c": pl.Series([1.5, -2.5, 0.0, 1e10], dtype=pl.Float64), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" DOUBLE') + + def test_dtype_utf8(self): + import polars as pl + table = self.fresh_table("polars_utf8") + df = pl.DataFrame({ + "c": pl.Series(["a", "bb", "", "日本語"], dtype=pl.Utf8), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" VARCHAR') + + def test_dtype_binary(self): + import polars as pl + table = self.fresh_table("polars_binary") + df = pl.DataFrame({ + "c": pl.Series([b"\x01", b"\x02\x03", b"", b"\xff"], dtype=pl.Binary), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" BINARY') + + def test_dtype_datetime_us(self): + import polars as pl + table = self.fresh_table("polars_datetime_us") + df = pl.DataFrame({ + "c": pl.Series( + [_TS_BASE_US + i for i in range(_ROWS)], + dtype=pl.Datetime("us", time_zone="UTC"), + ), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" TIMESTAMP') + + def test_dtype_datetime_ns(self): + import polars as pl + table = self.fresh_table("polars_datetime_ns") + df = pl.DataFrame({ + "c": pl.Series( + [_TS_BASE_US * 1000 + i for i in range(_ROWS)], + dtype=pl.Datetime("ns", time_zone="UTC"), + ), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" TIMESTAMP_NS') + + def test_dtype_datetime_ms(self): + import polars as pl + table = self.fresh_table("polars_datetime_ms") + df = pl.DataFrame({ + "c": pl.Series( + [_TS_BASE_US // 1000 + i for i in range(_ROWS)], + dtype=pl.Datetime("ms", time_zone="UTC"), + ), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" DATE') + + def test_dtype_decimal(self): + import polars as pl + from decimal import Decimal + decimal_factory = getattr(pl, "Decimal", None) + if decimal_factory is None: + self.skipTest("this polars version has no Decimal dtype") + dt = self._maybe_skip( + lambda: decimal_factory(precision=18, scale=4), + "polars Decimal construction", + ) + df = self._maybe_skip( + lambda: pl.DataFrame({ + "c": pl.Series( + [Decimal("1.2345"), Decimal("-1.2345"), + Decimal("0"), Decimal("99.9999")], + dtype=dt, + ), + "ts": _ts_series_ns(pl, _ROWS), + }), + "polars Decimal DataFrame construction", + ) + table = self.fresh_table("polars_decimal") + self._expect_success(table, df, '"c" DECIMAL(18,4)') + + def test_dtype_categorical_becomes_symbol(self): + import polars as pl + df = self._maybe_skip( + lambda: pl.DataFrame({ + "c": pl.Series(["AAPL", "MSFT", "AAPL", "GOOG"], + dtype=pl.Categorical), + "ts": _ts_series_ns(pl, _ROWS), + }), + "polars Categorical DataFrame construction", + ) + table = self.fresh_table("polars_cat") + self._expect_success(table, df, '"c" SYMBOL') + + def test_dtype_enum_becomes_symbol(self): + import polars as pl + enum_factory = getattr(pl, "Enum", None) + if enum_factory is None: + self.skipTest("this polars version has no Enum dtype") + dt = self._maybe_skip( + lambda: enum_factory(["AAPL", "MSFT", "GOOG"]), + "polars Enum construction", + ) + df = self._maybe_skip( + lambda: pl.DataFrame({ + "c": pl.Series(["AAPL", "MSFT", "AAPL", "GOOG"], dtype=dt), + "ts": _ts_series_ns(pl, _ROWS), + }), + "polars Enum DataFrame construction", + ) + table = self.fresh_table("polars_enum") + self._expect_success(table, df, '"c" SYMBOL') + + def test_dtype_datetime_us_naive(self): + import polars as pl + table = self.fresh_table("polars_datetime_us_naive") + df = pl.DataFrame({ + "c": pl.Series( + [_TS_BASE_US + i for i in range(_ROWS)], + dtype=pl.Datetime("us"), + ), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" TIMESTAMP') + + def test_dtype_decimal_high_scale(self): + import polars as pl + from decimal import Decimal + decimal_factory = getattr(pl, "Decimal", None) + if decimal_factory is None: + self.skipTest("this polars version has no Decimal dtype") + dt = self._maybe_skip( + lambda: decimal_factory(precision=38, scale=10), + "polars Decimal(38, 10) construction", + ) + df = self._maybe_skip( + lambda: pl.DataFrame({ + "c": pl.Series( + [Decimal("1.2345678901"), Decimal("-1.2345678901"), + Decimal("0"), Decimal("99.9999999999")], + dtype=dt, + ), + "ts": _ts_series_ns(pl, _ROWS), + }), + "polars Decimal(38, 10) DataFrame construction", + ) + table = self.fresh_table("polars_decimal_p38s10") + self._expect_success(table, df, '"c" DECIMAL(38,10)') + + def test_dtype_list_float64(self): + import polars as pl + table = self.fresh_table("polars_list_f64") + df = pl.DataFrame({ + "c": pl.Series( + [[1.0, 2.0], [3.0], [], [4.0, 5.0, 6.0]], + dtype=pl.List(pl.Float64), + ), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" DOUBLE[]') + + def test_dtype_list_list_float64_ragged_within_row_rejected(self): + import polars as pl + df = self._maybe_skip( + lambda: pl.DataFrame({ + "c": pl.Series( + [[[1.0, 2.0], [3.0]], + [[4.0, 5.0], [6.0, 7.0]], + [[8.0], [9.0]], + [[10.0, 11.0]]], + dtype=pl.List(pl.List(pl.Float64)), + ), + "ts": _ts_series_ns(pl, _ROWS), + }), + "polars 2D ragged List(List(Float64)) construction", + ) + self._expect_client_reject(df, SenderErrorCode.ARROW_INGEST) + + def test_dtype_list_list_float64(self): + import polars as pl + table = self.fresh_table("polars_list2d_f64") + df = self._maybe_skip( + lambda: pl.DataFrame({ + "c": pl.Series( + [[[1.0, 2.0], [3.0, 4.0]], + [[5.0, 6.0]], + [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]], + [[13.0], [14.0], [15.0]]], + dtype=pl.List(pl.List(pl.Float64)), + ), + "ts": _ts_series_ns(pl, _ROWS), + }), + "polars 2D List(List(Float64)) construction", + ) + self._expect_success(table, df, '"c" DOUBLE[][]') + + def test_dtype_array_float64(self): + import polars as pl + array_factory = getattr(pl, "Array", None) + if array_factory is None: + self.skipTest("this polars version has no Array (fixed-size list) dtype") + df = self._maybe_skip( + lambda: pl.DataFrame({ + "c": pl.Series( + [[1.0, 2.0, 3.0]] * _ROWS, + dtype=array_factory(pl.Float64, 3), + ), + "ts": _ts_series_ns(pl, _ROWS), + }), + "polars Array (fixed-size list) construction", + ) + table = self.fresh_table("polars_array_f64") + self._expect_success(table, df, '"c" DOUBLE[]') + + # ---- Unsupported: client-side ArrowSenderError --------------------- + + def test_dtype_uint16_widens_to_int(self): + import polars as pl + table = self.fresh_table("polars_uint16") + df = pl.DataFrame({ + "c": pl.Series([1, 2, 3, 4], dtype=pl.UInt16), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" INT') + + def test_dtype_uint32_widens_to_long(self): + import polars as pl + table = self.fresh_table("polars_uint32") + df = pl.DataFrame({ + "c": pl.Series([1, 2, 3, 4], dtype=pl.UInt32), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" LONG') + + def test_dtype_uint8_widens_to_short(self): + import polars as pl + table = self.fresh_table("polars_uint8") + df = pl.DataFrame({ + "c": pl.Series([1, 2, 3, 4], dtype=pl.UInt8), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" SHORT') + + def test_dtype_uint64_reinterprets_as_long(self): + import polars as pl + table = self.fresh_table("polars_uint64") + df = pl.DataFrame({ + "c": pl.Series([1, 2, 3, 4], dtype=pl.UInt64), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" LONG') + + def test_dtype_int128_rejected_if_present(self): + import polars as pl + dt = getattr(pl, "Int128", None) + if dt is None: + self.skipTest("this polars version has no Int128 dtype") + df = self._maybe_skip( + lambda: pl.DataFrame({ + "c": pl.Series([1, -1, 0, 10**30], dtype=dt), + "ts": _ts_series_ns(pl, _ROWS), + }), + "polars Int128 DataFrame construction", + ) + table = self.fresh_table("polars_int128") + err = _try_ingest(self, table, df) + if err is None: + self.fail(self.label("expected polars Int128 ingest to be rejected")) + + def test_dtype_date(self): + import polars as pl + import datetime as _dt + table = self.fresh_table("polars_date") + df = pl.DataFrame({ + "c": pl.Series( + [_dt.date(2023, 11, 14) for _ in range(_ROWS)], + dtype=pl.Date, + ), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" DATE') + + def test_dtype_time(self): + import polars as pl + import datetime as _dt + table = self.fresh_table("polars_time") + df = pl.DataFrame({ + "c": pl.Series( + [_dt.time(12, 30, 0) for _ in range(_ROWS)], + dtype=pl.Time, + ), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" LONG') + + def test_dtype_duration(self): + import polars as pl + import datetime as _dt + table = self.fresh_table("polars_duration") + df = pl.DataFrame({ + "c": pl.Series( + [_dt.timedelta(seconds=i) for i in range(_ROWS)], + dtype=pl.Duration("us"), + ), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_success(table, df, '"c" LONG') + + def test_dtype_struct_rejected(self): + import polars as pl + df = self._maybe_skip( + lambda: pl.DataFrame({ + "c": pl.Series( + [{"x": i, "y": float(i) * 0.5} for i in range(_ROWS)], + dtype=pl.Struct({"x": pl.Int32, "y": pl.Float64}), + ), + "ts": _ts_series_ns(pl, _ROWS), + }), + "polars Struct DataFrame construction", + ) + self._expect_client_reject(df, SenderErrorCode.ARROW_UNSUPPORTED_COLUMN_KIND) + + def test_dtype_list_utf8_rejected(self): + import polars as pl + df = pl.DataFrame({ + "c": pl.Series( + [["a"], ["b", "c"], [], ["d"]], + dtype=pl.List(pl.Utf8), + ), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_client_reject(df, SenderErrorCode.ARROW_UNSUPPORTED_COLUMN_KIND) + + def test_dtype_list_int64_rejected(self): + import polars as pl + df = pl.DataFrame({ + "c": pl.Series( + [[1, 2], [3], [], [4, 5, 6]], + dtype=pl.List(pl.Int64), + ), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_client_reject(df, SenderErrorCode.ARROW_UNSUPPORTED_COLUMN_KIND) + + def test_dtype_list_boolean_rejected(self): + import polars as pl + df = pl.DataFrame({ + "c": pl.Series( + [[True, False], [True], [], [False]], + dtype=pl.List(pl.Boolean), + ), + "ts": _ts_series_ns(pl, _ROWS), + }) + self._expect_client_reject(df, SenderErrorCode.ARROW_UNSUPPORTED_COLUMN_KIND) + + def test_dtype_object_rejected(self): + import polars as pl + dt = getattr(pl, "Object", None) + if dt is None: + self.skipTest("this polars version has no Object dtype") + df = self._maybe_skip( + lambda: pl.DataFrame({ + "c": pl.Series([{"k": i} for i in range(_ROWS)], dtype=dt), + "ts": _ts_series_ns(pl, _ROWS), + }), + "polars Object DataFrame construction", + ) + err = _try_ingest(self, self.fresh_table("polars_object"), df) + if err is None: + self.fail(self.label("expected polars Object to be rejected")) + + def test_dtype_null_rejected(self): + import polars as pl + dt = getattr(pl, "Null", None) + if dt is None: + self.skipTest("this polars version has no Null dtype") + df = self._maybe_skip( + lambda: pl.DataFrame({ + "c": pl.Series([None] * _ROWS, dtype=dt), + "ts": _ts_series_ns(pl, _ROWS), + }), + "polars Null DataFrame construction", + ) + self._expect_client_reject(df, SenderErrorCode.ARROW_UNSUPPORTED_COLUMN_KIND) + + +def register(loop_registry): + loop_registry.append(TestArrowPolarsPerDtype) + + +if __name__ == "__main__": + print( + "Note: arrow_polars_per_dtype tests require a live QuestDB fixture + polars. " + "Run via `python test.py run --existing HOST:ILP:HTTP " + "TestArrowPolarsPerDtype`.", + file=sys.stderr, + ) + unittest.main() diff --git a/system_test/arrow_round_trip_fuzz.py b/system_test/arrow_round_trip_fuzz.py new file mode 100644 index 00000000..6082017f --- /dev/null +++ b/system_test/arrow_round_trip_fuzz.py @@ -0,0 +1,284 @@ +from __future__ import annotations + +import os +import sys +import unittest +from typing import Dict, List, Tuple + +import pyarrow as pa + +import arrow_fuzz_common as afc +from arrow_fuzz_common import KIND_REGISTRY, KindSpec + +_FUZZ_ITERATIONS = int(os.environ.get("ARROW_ROUND_TRIP_FUZZ_ITERATIONS", "6")) +_ROWS_PER_BATCH = int(os.environ.get("ARROW_ROUND_TRIP_FUZZ_ROWS", "10")) + + +def _round_trip_capable(spec: KindSpec) -> bool: + return ( + spec.round_trip_capable + and spec.supports_arrow_ingest + and spec.supports_arrow_egress + ) + + +def _round_trip_capable_kinds() -> List[Tuple[str, KindSpec]]: + return [(n, s) for n, s in KIND_REGISTRY.items() if _round_trip_capable(s)] + + +def _build_batch( + rnd: afc.Rng, n: int, kinds: List[Tuple[str, KindSpec]], + *, null_mode: str, ts_base_us: int, +) -> Tuple[pa.RecordBatch, Dict[str, list]]: + arrays: List[pa.Array] = [] + fields: List[pa.Field] = [] + vpc: Dict[str, list] = {} + for col_name, spec in kinds: + if null_mode == "valid": + mask = afc.all_valid_mask(n); + edge = False + elif null_mode == "partial": + mask = afc.partial_null_mask(rnd, n, null_p=0.3); + edge = False + elif null_mode == "all_null": + mask = afc.all_null_mask(n); + edge = False + elif null_mode == "edge": + mask = afc.all_valid_mask(n); + edge = True + else: + raise ValueError(null_mode) + vs = spec.generate_values(rnd, n, mask, edge=edge) + vpc[col_name] = vs + arrays.append(spec.build_arrow_array(vs)) + fields.append(spec.make_field(col_name)) + ts_arr = pa.array( + [ts_base_us + i for i in range(n)], + type=pa.timestamp("us", tz="UTC"), + ) + arrays.append(ts_arr) + fields.append(pa.field("ts", pa.timestamp("us", tz="UTC"), nullable=False)) + return pa.RecordBatch.from_arrays(arrays, schema=pa.schema(fields)), vpc + + +def _read_back(fixture, table: str, kinds: List[Tuple[str, KindSpec]]) -> pa.RecordBatch: + cols_sql = ", ".join(f'"{c}"' for c, _ in kinds) + sql = f"select {cols_sql} from '{table}' order by ts" + return afc.read_back_arrow_concat(fixture, sql) + + +class TestArrowRoundTripPerKind(afc.ArrowFuzzBase): + """Per-kind round-trip. Failure pinpoints the single offending type.""" + + SUITE_LABEL = "arrow_round_trip_per_kind" + + def _exercise_kind(self, kind_name: str) -> None: + spec = KIND_REGISTRY[kind_name] + if not _round_trip_capable(spec): + self.skipTest(f"kind {kind_name!r} not round-trip capable") + modes = ["valid", "edge"] + if spec.supports_server_null: + modes[1:1] = ["partial", "all_null"] + for null_mode in modes: + with self.subTest(null_mode=null_mode): + table = self.fresh_table(f"arrow_rt_{kind_name}_{null_mode}") + kinds = [(f"c_{kind_name}", spec)] + afc.create_table_from_kinds(self._fixture, table, kinds) + ts_base = 1_700_000_000_000_000 + self._master_rng.next_int(1_000_000) + rb_in, vpc = _build_batch( + self._master_rng, _ROWS_PER_BATCH, kinds, + null_mode=null_mode, ts_base_us=ts_base, + ) + afc.ingest_via_arrow(self._fixture, table, rb_in) + afc.wait_for_rows(self._fixture, table, rb_in.num_rows) + rb_out = _read_back(self._fixture, table, kinds) + self._assert_kind_round_trip(rb_in, rb_out, kinds, null_mode) + + def _assert_kind_round_trip( + self, rb_in: pa.RecordBatch, rb_out: pa.RecordBatch, + kinds: List[Tuple[str, KindSpec]], null_mode: str, + ) -> None: + col_name, spec = kinds[0] + self.assertEqual(rb_out.num_rows, rb_in.num_rows, + self.label(f"row count kind={spec.name} mode={null_mode}")) + expected_dtype = spec.arrow_type() + actual_dtype = _storage_type(rb_out.column(0).type) + if not _dtype_compatible(actual_dtype, expected_dtype): + self.fail(self.label(f"DataType kind={spec.name}: " + f"want {expected_dtype}, got {actual_dtype}")) + expected_md = spec.metadata() or {} + actual_md = dict(rb_out.schema.field(0).metadata or {}) + ext_name = getattr(rb_out.schema.field(0).type, "extension_name", None) + for k, v in expected_md.items(): + key_bytes = k if isinstance(k, bytes) else k.encode() + val_bytes = v if isinstance(v, bytes) else v.encode() + if key_bytes == b"ARROW:extension:name" and ext_name is not None: + if ext_name.encode() == val_bytes: + continue + self.assertEqual( + actual_md.get(key_bytes), val_bytes, + self.label(f"kind={spec.name} field metadata mismatch " + f"key={key_bytes!r} expected={val_bytes!r} " + f"actual={actual_md.get(key_bytes)!r}"), + ) + for r in range(rb_in.num_rows): + ev_canon = _canonicalise_value( + _scalar_to_python(rb_in.column(0)[r], spec), spec) + av_canon = _canonicalise_value( + _scalar_to_python(rb_out.column(0)[r], spec), spec) + if not spec.compare(av_canon, ev_canon): + self.fail(self.label( + f"kind={spec.name} mode={null_mode} row={r}: " + f"in={ev_canon!r} out={av_canon!r}" + )) + + +def _storage_type(t: pa.DataType) -> pa.DataType: + storage = getattr(t, "storage_type", None) + return storage if storage is not None else t + + +def _leaf_type(t: pa.DataType) -> str: + while pa.types.is_list(t) or pa.types.is_large_list(t): + t = t.value_type + return str(t) + + +def _dtype_compatible(actual: pa.DataType, expected: pa.DataType) -> bool: + if str(actual) == str(expected): + return True + a_str = str(actual) + e_str = str(expected) + if a_str.startswith("decimal") and e_str.startswith("decimal"): + return a_str[a_str.index("("):] == e_str[e_str.index("("):] + if "list" in a_str and "list" in e_str: + return _leaf_type(actual) == _leaf_type(expected) + return False + + +def _scalar_to_python(scalar, spec=None): + if scalar is None: + return None + if spec is not None and spec.name in ("timestamp", "timestamp_ns", "date") \ + and hasattr(scalar, "value"): + if not scalar.is_valid: + return None + return scalar.value + try: + return scalar.as_py() + except (ValueError, OverflowError): + return getattr(scalar, "value", None) + + +def _canonicalise_value(value, spec: KindSpec): + if value is None: + return None + import datetime as _dt + import uuid as _uuid + from decimal import Decimal + if isinstance(value, _dt.datetime): + unit = spec.params.get("unit", "us") + divisor = {"s": 1, "ms": 1_000, "us": 1_000_000, "ns": 1_000_000_000}[unit] + if value.tzinfo is None: + value = value.replace(tzinfo=_dt.timezone.utc) + epoch = _dt.datetime(1970, 1, 1, tzinfo=_dt.timezone.utc) + return int(round((value - epoch).total_seconds() * divisor)) + if isinstance(value, Decimal): + scale = spec.params.get("scale", 0) + return int(value.scaleb(scale)) + if spec.name == "uuid": + if isinstance(value, _uuid.UUID): + value = value.bytes + if isinstance(value, (bytes, bytearray)): + lo = int.from_bytes(value[:8], "little") + hi = int.from_bytes(value[8:], "little") + return (lo, hi) + return value + + +for _kind_name in list(KIND_REGISTRY.keys()): + spec = KIND_REGISTRY[_kind_name] + if not _round_trip_capable(spec): + continue + + + def _make(name): + def test(self): + self._exercise_kind(name) + + test.__name__ = f"test_rt_{name}" + test.__qualname__ = f"TestArrowRoundTripPerKind.test_rt_{name}" + return test + + + setattr(TestArrowRoundTripPerKind, f"test_rt_{_kind_name}", _make(_kind_name)) + + +class TestArrowRoundTripFuzz(afc.ArrowFuzzBase): + """Random subsets of kinds, random null modes.""" + + SUITE_LABEL = "arrow_round_trip_fuzz" + + def _run_random_iteration(self, it: int, null_mode: str, + *, include_edge: bool = False) -> None: + full_pool = _round_trip_capable_kinds() + mode = "edge" if include_edge else null_mode + if mode in ("partial", "all_null"): + pool = [(n, s) for n, s in full_pool if s.supports_server_null] + else: + pool = full_pool + self._master_rng.shuffle(pool) + picked = pool[: 3 + (it % 4)] + kinds = [(f"c{i}_{n}", s) for i, (n, s) in enumerate(picked)] + table = self.fresh_table(f"arrow_rt_fuzz_{it}") + afc.create_table_from_kinds(self._fixture, table, kinds) + ts_base = 1_700_000_000_000_000 + it * 10_000_000 + rb_in, _vpc = _build_batch( + self._master_rng, _ROWS_PER_BATCH, kinds, + null_mode=mode, ts_base_us=ts_base, + ) + afc.ingest_via_arrow(self._fixture, table, rb_in) + afc.wait_for_rows(self._fixture, table, rb_in.num_rows) + rb_out = _read_back(self._fixture, table, kinds) + self.assertEqual(rb_out.num_rows, rb_in.num_rows, self.label()) + for col_idx, (col_name, spec) in enumerate(kinds): + for r in range(rb_in.num_rows): + ev = _canonicalise_value( + _scalar_to_python(rb_in.column(col_idx)[r], spec), spec) + av = _canonicalise_value( + _scalar_to_python(rb_out.column(col_idx)[r], spec), spec) + if not spec.compare(av, ev): + self.fail(self.label( + f"iter={it} mode={mode} kind={spec.name} " + f"col={col_name} row={r}: in={ev!r} out={av!r}" + )) + + def test_random_schemas_all_valid(self): + for it in range(_FUZZ_ITERATIONS): + with self.subTest(iter=it): + self._run_random_iteration(it, "valid") + + def test_random_schemas_partial_null(self): + for it in range(_FUZZ_ITERATIONS): + with self.subTest(iter=it): + self._run_random_iteration(it, "partial") + + def test_random_schemas_edge_values(self): + for it in range(_FUZZ_ITERATIONS): + with self.subTest(iter=it): + self._run_random_iteration(it, "edge", include_edge=True) + + +def register(loop_registry): + loop_registry.append(TestArrowRoundTripPerKind) + loop_registry.append(TestArrowRoundTripFuzz) + + +if __name__ == "__main__": + print( + "Note: arrow_round_trip_fuzz tests require a live QuestDB fixture. " + "Run via `python test.py run --existing HOST:ILP:HTTP " + "TestArrowRoundTripPerKind` (or TestArrowRoundTripFuzz).", + file=sys.stderr, + ) + unittest.main() diff --git a/system_test/questdb_line_sender.py b/system_test/questdb_line_sender.py index bec6b0c8..c4024ce8 100644 --- a/system_test/questdb_line_sender.py +++ b/system_test/questdb_line_sender.py @@ -257,13 +257,12 @@ def set_sig(fn, restype, *argtypes): set_sig( dll.line_sender_error_get_code, - c_line_sender_error_p, c_int, - c_void_p) + c_line_sender_error_p) set_sig( dll.line_sender_error_msg, - c_line_sender_error_p, c_void_p, + c_line_sender_error_p, c_size_t_p) set_sig( dll.line_sender_error_free, diff --git a/system_test/test.py b/system_test/test.py index 77537d05..da910c97 100755 --- a/system_test/test.py +++ b/system_test/test.py @@ -27,6 +27,9 @@ import sys sys.dont_write_bytecode = True + +sys.modules.setdefault('test', sys.modules[__name__]) + import os import pathlib import math @@ -43,6 +46,40 @@ import questdb_line_sender as qls import qwp_ws_fuzz import uuid + +from arrow_egress_fuzz import ( + TestArrowEgressPerKind, + TestArrowEgressEmpty, + TestArrowEgressFuzz, +) +from arrow_ingress_fuzz import ( + TestArrowIngressPerKind, + TestArrowIngressDesignatedTs, + TestArrowIngressErrors, + TestArrowIngressExtraTypes, + TestArrowIngressUnsupportedTypes, + TestArrowIngressMultiBatch, + TestArrowIngressFuzz, +) +from arrow_round_trip_fuzz import ( + TestArrowRoundTripPerKind, + TestArrowRoundTripFuzz, +) +from arrow_polars_fuzz import ( + TestArrowPolarsRoundTripPerKind, + TestArrowPolarsFuzz, +) +from arrow_polars_per_dtype import ( + TestArrowPolarsPerDtype, +) +from arrow_alignment_fuzz import TestArrowAlignment +from test_arrow_fuzz_common_unit import ( + TestKindRegistryCompleteness, + TestCompareSemantics, + TestRngDeterminism, + TestBuildRecordBatch, + TestEdgeCorpora, +) from fixture import ( Project, QuestDbFixtureBase, @@ -85,6 +122,40 @@ def sql_query(query: str): return QDB_FIXTURE.http_sql_query(query) +_QWP_WS_UNSUPPORTED_MARKERS = ( + 'unsupported protocol', + 'unknown protocol', + 'unknown scheme', + 'missing endpoint', + 'endpoint not found', + # Ingest (Sender → qwpws://) error phrasing + 'websocket upgrade failed: http status 404', + 'websocket upgrade failed: http status 405', + 'websocket upgrade failed: http status 501', + # Egress (Reader → ws://) error phrasing + 'websocket handshake failed with http 404', + 'websocket handshake failed with http 405', + 'websocket handshake failed with http 501', +) + + +def is_unsupported_qwp_ws_fixture_error(error) -> bool: + msg = str(error).lower() + return any(m in msg for m in _QWP_WS_UNSUPPORTED_MARKERS) + + +def skip_if_unsupported_qwp_ws_fixture(error, fixture) -> None: + if not is_unsupported_qwp_ws_fixture_error(error): + return + root_dir = getattr(fixture, '_root_dir', None) + is_repo_master = root_dir is not None and root_dir.name == 'repo' + if is_repo_master: + return + raise unittest.SkipTest( + f'QWP/WebSocket is not supported by this QuestDB fixture: {error}' + ) from error + + class _ParsedUnittestProgram(unittest.TestProgram): def runTests(self): pass @@ -117,7 +188,7 @@ def _suite_kind(test): return SUITE_QWP_WS_PROTOCOL if class_name == 'TestQwpWsRestart': return SUITE_QWP_WS_RESTART - if class_name == 'TestQwpWsFuzz': + if class_name == 'TestQwpWsFuzz' or class_name.startswith('TestArrow'): return SUITE_QWP_WS_FUZZ return SUITE_MATRIX @@ -1484,21 +1555,6 @@ def _sender_conf( conf.append(f'{key}={value};') return ''.join(conf) - @staticmethod - def _is_unsupported_qwp_ws_fixture_error(error): - message = str(error).lower() - unsupported_markers = ( - 'unsupported protocol', - 'unknown protocol', - 'unknown scheme', - 'missing endpoint', - 'endpoint not found', - 'websocket upgrade failed: http status 404', - 'websocket upgrade failed: http status 405', - 'websocket upgrade failed: http status 501', - ) - return any(marker in message for marker in unsupported_markers) - def _connect_sender(self, conf): sender = None try: @@ -1508,12 +1564,7 @@ def _connect_sender(self, conf): except qls.SenderError as e: if sender is not None: sender.close(False) - root_dir = getattr(QDB_FIXTURE, '_root_dir', None) - if ( - root_dir is not None and - root_dir.name != 'repo' and - self._is_unsupported_qwp_ws_fixture_error(e)): - self.skipTest(f'QWP/WebSocket is not supported by this QuestDB fixture: {e}') + skip_if_unsupported_qwp_ws_fixture(e, QDB_FIXTURE) raise return sender @@ -1679,13 +1730,7 @@ def _assert_auth_rejected(self, sender_id, sf_dir, include_auth, password=None): with self.assertRaises(qls.SenderError) as ctx: sender.connect() native_error = ctx.exception.__cause__ or ctx.exception - root_dir = getattr(QDB_FIXTURE, '_root_dir', None) - if ( - root_dir is not None and - root_dir.name != 'repo' and - self._is_unsupported_qwp_ws_fixture_error(native_error)): - self.skipTest( - f'QWP/WebSocket is not supported by this QuestDB fixture: {native_error}') + skip_if_unsupported_qwp_ws_fixture(native_error, QDB_FIXTURE) self.assertRegex( str(native_error), r'(?i)(401|403|unauthor|forbidden|authentication)') diff --git a/system_test/test_arrow_fuzz_common_unit.py b/system_test/test_arrow_fuzz_common_unit.py new file mode 100644 index 00000000..76f6713c --- /dev/null +++ b/system_test/test_arrow_fuzz_common_unit.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import math +import unittest + +import pyarrow as pa + +import arrow_fuzz_common as afc + + +class TestKindRegistryCompleteness(unittest.TestCase): + """Every registry entry must satisfy the KindSpec contract.""" + + def test_all_specs_resolve(self): + self.assertGreater(len(afc.KIND_REGISTRY), 20, + "registry should contain ~28 entries") + for name, spec in afc.KIND_REGISTRY.items(): + with self.subTest(kind=name): + self.assertEqual(spec.name, name) + self.assertIsInstance(spec.ddl, str) + self.assertTrue(spec.ddl, "DDL fragment must be non-empty") + dtype = spec.arrow_type() + self.assertIsInstance(dtype, pa.DataType) + # `metadata()` returns either None or a dict[bytes, bytes]. + md = spec.metadata() + if md is not None: + self.assertIsInstance(md, dict) + for k, v in md.items(): + self.assertIsInstance(k, (bytes, str)) + self.assertIsInstance(v, (bytes, str)) + + def test_each_spec_builds_valid_arrow_array(self): + rnd = afc.Rng(0xDEADBEEF) + for name, spec in afc.KIND_REGISTRY.items(): + with self.subTest(kind=name): + mask = afc.all_valid_mask(8) + values = spec.generate_values(rnd, 8, mask, edge=False) + self.assertEqual(len(values), 8) + arr = spec.build_arrow_array(values) + self.assertEqual(len(arr), 8) + self.assertEqual(arr.null_count, 0) + + def test_each_spec_handles_null_mask(self): + rnd = afc.Rng(0xCAFEBABE) + for name, spec in afc.KIND_REGISTRY.items(): + with self.subTest(kind=name): + mask = [True, False, True, False, True, False, True, False] + values = spec.generate_values(rnd, 8, mask, edge=False) + arr = spec.build_arrow_array(values) + self.assertEqual(arr.null_count, 4, + f"{name}: expected 4 nulls") + + def test_each_spec_handles_all_null(self): + rnd = afc.Rng(0x12345678) + for name, spec in afc.KIND_REGISTRY.items(): + with self.subTest(kind=name): + mask = afc.all_null_mask(8) + values = spec.generate_values(rnd, 8, mask, edge=False) + arr = spec.build_arrow_array(values) + self.assertEqual(arr.null_count, 8, + f"{name}: expected 8 nulls") + + def test_field_construction_carries_metadata(self): + for name, spec in afc.KIND_REGISTRY.items(): + with self.subTest(kind=name): + field = spec.make_field(f"c_{name}") + if spec.metadata() is not None: + self.assertIsNotNone(field.metadata, + f"{name}: field metadata stripped") + + def test_edge_mode_produces_distinct_values(self): + rnd = afc.Rng(0xFEEDFACE) + for name, spec in afc.KIND_REGISTRY.items(): + with self.subTest(kind=name): + mask = afc.all_valid_mask(8) + normal = spec.generate_values(rnd, 8, mask, edge=False) + edge = spec.generate_values(rnd, 8, mask, edge=True) + self.assertEqual(len(normal), len(edge)) + + +class TestCompareSemantics(unittest.TestCase): + def test_default_equality(self): + spec = afc.KIND_REGISTRY["int"] + self.assertTrue(spec.compare(42, 42)) + self.assertFalse(spec.compare(42, 43)) + self.assertTrue(spec.compare(None, None)) + self.assertFalse(spec.compare(None, 0)) + + def test_float_nan_compares_equal_to_itself(self): + spec = afc.KIND_REGISTRY["double"] + nan = float("nan") + self.assertTrue(spec.compare(nan, nan)) + self.assertFalse(spec.compare(nan, 0.0)) + self.assertTrue(spec.compare(float("inf"), float("inf"))) + self.assertTrue(spec.compare(float("inf"), float("-inf"))) + self.assertTrue(spec.compare(float("nan"), float("inf"))) + + def test_float32_rounding_tolerated(self): + spec = afc.KIND_REGISTRY["float"] + self.assertTrue(spec.compare(0.5, 0.5)) + self.assertFalse(spec.compare(0.1, 0.2)) + + def test_decimal_normalises(self): + from decimal import Decimal + spec = afc.KIND_REGISTRY["decimal64"] + self.assertTrue(spec.compare(Decimal("1.10"), Decimal("1.1"))) + self.assertTrue(spec.compare(Decimal("0"), Decimal("0.000"))) + + +class TestRngDeterminism(unittest.TestCase): + def test_two_rngs_same_seed_match(self): + a = afc.Rng(0xAA55AA55) + b = afc.Rng(0xAA55AA55) + for _ in range(20): + self.assertEqual(a.next_int(1_000_000), b.next_int(1_000_000)) + + def test_seed_label_round_trips(self): + for seed in (0x0, 0x1, 0xDEADBEEF, (1 << 63)): + label = afc.format_seed(seed) + self.assertEqual(label, f"0x{seed:016x}") + + +class TestBuildRecordBatch(unittest.TestCase): + def test_build_minimal_batch(self): + rnd = afc.Rng(0xBEEF1234) + kinds = [ + ("c_int", afc.KIND_REGISTRY["int"]), + ("c_double", afc.KIND_REGISTRY["double"]), + ("c_symbol", afc.KIND_REGISTRY["symbol"]), + ] + rb = afc.build_record_batch(kinds, rnd, 4, null_mode="valid") + self.assertEqual(rb.num_rows, 4) + self.assertEqual(rb.num_columns, 4) # 3 kinds + ts + self.assertEqual(rb.column(3).type, pa.timestamp("us", tz="UTC")) + + def test_partial_null_mode_inserts_some_nulls(self): + rnd = afc.Rng(0xABCD) + kinds = [("c_int", afc.KIND_REGISTRY["int"])] + rb = afc.build_record_batch(kinds, rnd, 100, null_mode="partial", + null_p=0.5) + nulls = rb.column(0).null_count + self.assertGreater(nulls, 10, "expected >10 nulls in 100-row sample") + self.assertLess(nulls, 90) + + def test_all_null_mode(self): + rnd = afc.Rng(0x9999) + kinds = [("c_uuid", afc.KIND_REGISTRY["uuid"])] + rb = afc.build_record_batch(kinds, rnd, 8, null_mode="all_null") + self.assertEqual(rb.column(0).null_count, 8) + + +class TestEdgeCorpora(unittest.TestCase): + def test_edge_floats_contain_nan_inf_minus_zero(self): + self.assertTrue(any(math.isnan(v) for v in afc.EDGE_FLOATS)) + self.assertTrue(any(v == float("inf") for v in afc.EDGE_FLOATS)) + self.assertTrue(any(v == float("-inf") for v in afc.EDGE_FLOATS)) + zeros = [v for v in afc.EDGE_FLOATS if v == 0.0] + self.assertEqual(len(zeros), 2, "should include +0.0 and -0.0") + + def test_edge_ints_cover_min_max(self): + self.assertIn(-128, afc.EDGE_INTS_I8) + self.assertIn(127, afc.EDGE_INTS_I8) + self.assertIn(-(1 << 63), afc.EDGE_INTS_I64) + self.assertIn((1 << 63) - 1, afc.EDGE_INTS_I64) + + def test_edge_strings_include_empty_and_unicode(self): + self.assertIn("", afc.EDGE_STRINGS) + self.assertTrue( + any(ord(c) > 0x7F for s in afc.EDGE_STRINGS for c in s), + "expected at least one non-ASCII edge string", + ) + + +if __name__ == "__main__": + unittest.main()