From b1ce4dfe0b820070f6ea2a29759a4a753218de70 Mon Sep 17 00:00:00 2001 From: Carsten Brachem Date: Sat, 11 Apr 2026 19:46:50 +0200 Subject: [PATCH 1/3] feat: reading data from hstore columns --- src/CMakeLists.txt | 1 + src/include/postgres_hstore.hpp | 17 +++ src/postgres_extension.cpp | 3 + src/postgres_hstore.cpp | 231 +++++++++++++++++++++++++++++ test/sql/misc/postgres_hstore.test | 223 ++++++++++++++++++++++++++++ 5 files changed, 475 insertions(+) create mode 100644 src/include/postgres_hstore.hpp create mode 100644 src/postgres_hstore.cpp create mode 100644 test/sql/misc/postgres_hstore.test diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a11df8b01..5b99d6e43 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,6 +15,7 @@ add_library( postgres_execute.cpp postgres_extension.cpp postgres_filter_pushdown.cpp + postgres_hstore.cpp postgres_parameters.cpp postgres_query.cpp postgres_scanner.cpp diff --git a/src/include/postgres_hstore.hpp b/src/include/postgres_hstore.hpp new file mode 100644 index 000000000..11e76883d --- /dev/null +++ b/src/include/postgres_hstore.hpp @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// postgres_hstore.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb.hpp" + +namespace duckdb { + +void RegisterHstoreFunctions(ExtensionLoader &loader); + +} // namespace duckdb diff --git a/src/postgres_extension.cpp b/src/postgres_extension.cpp index 4fbe8fc69..e46fbbf36 100644 --- a/src/postgres_extension.cpp +++ b/src/postgres_extension.cpp @@ -21,6 +21,7 @@ #include "duckdb/main/connection_manager.hpp" #include "duckdb/common/error_data.hpp" #include "postgres_logging.hpp" +#include "postgres_hstore.hpp" using namespace duckdb; @@ -181,6 +182,8 @@ static void LoadInternal(ExtensionLoader &loader) { PostgresConfigurePoolFunction configure_pool_function; loader.RegisterFunction(configure_pool_function); + RegisterHstoreFunctions(loader); + // Register the new type SecretType secret_type; secret_type.name = "postgres"; diff --git a/src/postgres_hstore.cpp b/src/postgres_hstore.cpp new file mode 100644 index 000000000..74c1ab996 --- /dev/null +++ b/src/postgres_hstore.cpp @@ -0,0 +1,231 @@ +#include "postgres_hstore.hpp" +#include "duckdb/common/exception.hpp" +#include "duckdb/function/scalar_function.hpp" + +#include + +namespace duckdb { + +struct HstorePair { + std::string key; + std::optional value; +}; + +namespace { + +bool IsSpace(const char c) { + const auto u = static_cast(c); + return std::isspace(u); +} + +void SkipWhitespace(std::string_view input, size_t &pos) { + while (pos < input.size() && IsSpace(input[pos])) { + ++pos; + } +} + +bool IsNullLiteral(const std::string_view &s) { + return s.size() == 4 && std::tolower(static_cast(s[0])) == 'n' && + std::tolower(static_cast(s[1])) == 'u' && + std::tolower(static_cast(s[2])) == 'l' && + std::tolower(static_cast(s[3])) == 'l'; +} + +std::optional ReadToken(std::string_view input, size_t &pos, bool is_key) { + SkipWhitespace(input, pos); + if (pos >= input.size()) { + return std::nullopt; + } + + std::string result; + + // quoted + if (input[pos] == '"') { + ++pos; // skip opening quote + while (pos < input.size()) { + char c = input[pos]; + ++pos; + if (c == '"') { + return std::move(result); + } + if (c == '\\') { + if (pos >= input.size()) { + break; + } + result += input[pos]; + ++pos; + } else { + result += c; + } + } + throw InvalidInputException("syntax error in hstore: unexpected end of string"); + } + + // unquoted + auto terminator = is_key ? '=' : ','; + while (pos < input.size()) { + char c = input[pos]; + if (std::isspace(static_cast(c)) || c == terminator) { + break; + } + if (c == '\\') { + ++pos; + if (pos >= input.size()) { + throw InvalidInputException("syntax error in hstore: unexpected end of string"); + } + result += input[pos]; + } else { + result += c; + } + ++pos; + } + + if (is_key && result.empty()) { + throw InvalidInputException("syntax error in hstore, near \"%c\" at position %d", input[pos], + static_cast(pos)); + } + if (!is_key && IsNullLiteral(result)) { + return std::nullopt; + } + return std::move(result); +} + +void ExpectArrow(std::string_view input, size_t &pos) { + SkipWhitespace(input, pos); + if (pos + 1 >= input.size() || input[pos] != '=' || input[pos + 1] != '>') { + throw InvalidInputException("syntax error in hstore, near \"%c\" at position %d", + pos < input.size() ? input[pos] : '?', static_cast(pos)); + } + pos += 2; +} + +std::vector ParseHstore(std::string_view input) { + std::vector pairs; + size_t pos = 0; + + SkipWhitespace(input, pos); + while (pos < input.size()) { + auto key = ReadToken(input, pos, /* is_key = */ true); + D_ASSERT(key.has_value()); + ExpectArrow(input, pos); + auto value = ReadToken(input, pos, /* is_key = */ false); + pairs.push_back({std::move(*key), std::move(value)}); + + // Expect comma or end + SkipWhitespace(input, pos); + if (pos >= input.size()) { + break; + } + if (input[pos] != ',') { + throw InvalidInputException("syntax error in hstore, near \"%c\" at position %d", input[pos], + static_cast(pos)); + } + ++pos; + SkipWhitespace(input, pos); + } + + return pairs; +} + +void JsonEscapeString(std::string &out, const std::string &s) { + out += '"'; + for (char c : s) { + switch (c) { + case '"': + out += "\\\""; + break; + case '\\': + out += "\\\\"; + break; + case '\b': + out += "\\b"; + break; + case '\f': + out += "\\f"; + break; + case '\n': + out += "\\n"; + break; + case '\r': + out += "\\r"; + break; + case '\t': + out += "\\t"; + break; + default: + if (static_cast(c) < 0x20) { + char buf[8]; + snprintf(buf, sizeof(buf), "\\u%04x", static_cast(c)); + out += buf; + } else { + out += c; + } + break; + } + } + out += '"'; +} + +void PostgresHstoreGetFun(DataChunk &args, ExpressionState &state, Vector &result) { + auto &hstore_vector = args.data[0]; + auto &key_vector = args.data[1]; + + BinaryExecutor::ExecuteWithNulls( + hstore_vector, key_vector, result, args.size(), + [&](string_t hstore, string_t key, ValidityMask &mask, idx_t idx) -> string_t { + auto pairs = ParseHstore(hstore.GetString()); + + for (auto it = pairs.rbegin(); it != pairs.rend(); ++it) { + if (it->key == key.GetString()) { + if (!it->value.has_value()) { + mask.SetInvalid(idx); + return string_t {}; + } + return StringVector::AddString(result, *it->value); + } + } + mask.SetInvalid(idx); + return string_t {}; + }); +} + +void PostgresHstoreToJsonFun(DataChunk &args, ExpressionState &state, Vector &result) { + auto &hstore_vector = args.data[0]; + + UnaryExecutor::Execute(hstore_vector, result, args.size(), + [&](string_t hstore_str) -> string_t { + auto pairs = ParseHstore(hstore_str.GetString()); + std::string json; + json += '{'; + bool first = true; + for (auto &pair : pairs) { + if (!first) { + json += ", "; + } + first = false; + JsonEscapeString(json, pair.key); + json += ": "; + if (pair.value.has_value()) { + JsonEscapeString(json, *pair.value); + } else { + json += "null"; + } + } + json += '}'; + return StringVector::AddString(result, json); + }); +} + +} // anonymous namespace + +void RegisterHstoreFunctions(ExtensionLoader &loader) { + auto hstore_get = ScalarFunction("postgres_hstore_get", {LogicalType::VARCHAR, LogicalType::VARCHAR}, + LogicalType::VARCHAR, PostgresHstoreGetFun); + loader.RegisterFunction(hstore_get); + + auto hstore_to_json = + ScalarFunction("postgres_hstore_to_json", {LogicalType::VARCHAR}, LogicalType::JSON(), PostgresHstoreToJsonFun); + loader.RegisterFunction(hstore_to_json); +} + +} // namespace duckdb diff --git a/test/sql/misc/postgres_hstore.test b/test/sql/misc/postgres_hstore.test new file mode 100644 index 000000000..0e84a4197 --- /dev/null +++ b/test/sql/misc/postgres_hstore.test @@ -0,0 +1,223 @@ +# name: test/sql/misc/postgres_hstore.test +# description: test postgres hstore scalar functions +# group: [misc] + +# Before we load the extension, this will fail +statement error +SELECT postgres_hstore_get('a=>b', 'a'); +---- +Catalog Error: Scalar Function with name postgres_hstore_get does not exist! + +# Require statement will ensure this test is run with this extension loaded +require postgres_scanner + +statement ok +PRAGMA enable_verification + +# Confirm the extension works +query I +SELECT postgres_hstore_get('a=>b', 'a'); +---- +b + +# --- postgres_hstore_get: basic lookups --- + +query I +SELECT postgres_hstore_get('aa=>b, c=>d, b=>16', 'c'); +---- +d + +query I +SELECT postgres_hstore_get('aa=>b, c=>d, b=>16', 'b'); +---- +16 + +query I +SELECT postgres_hstore_get('aa=>b, c=>d, b=>16', 'aa'); +---- +b + +# --- postgres_hstore_get: duplicate key, last one wins --- + +query I +SELECT postgres_hstore_get('a=>b, a=>c', 'a'); +---- +c + +# --- postgres_hstore_get: key not found returns NULL --- + +query I +SELECT postgres_hstore_get('aa=>b, c=>d, b=>16', 'gg'); +---- +NULL + +# --- postgres_hstore_get: empty hstore returns NULL --- + +query I +SELECT postgres_hstore_get('', 'x'); +---- +NULL + +# --- postgres_hstore_get: hstore NULL value returns NULL --- + +query I +SELECT postgres_hstore_get('aa=>NULL, c=>d, b=>16', 'aa'); +---- +NULL + +query I +SELECT postgres_hstore_get('aa=>NuLl, c=>d', 'aa'); +---- +NULL + +# --- postgres_hstore_get: quoted "NULL" is NOT null, it's a literal string --- + +query I +SELECT postgres_hstore_get('aa=>"NuLl", c=>d', 'aa'); +---- +NuLl + +# --- postgres_hstore_get: quoted key with space --- + +query I +SELECT postgres_hstore_get('"a key" =>1, b => t', 'a key'); +---- +1 + +# --- postgres_hstore_get: quoted value with escaped characters --- + +query I +SELECT postgres_hstore_get('k=>"val \"quoted\""', 'k'); +---- +val "quoted" + +# --- postgres_hstore_get: whitespace variations --- + +query I +SELECT postgres_hstore_get(' a => b ', 'a'); +---- +b + +query I +SELECT postgres_hstore_get('a=>b', 'a'); +---- +b + +query I +SELECT postgres_hstore_get(' a=>b', 'a'); +---- +b + +query I +SELECT postgres_hstore_get('a =>b', 'a'); +---- +b + +query I +SELECT postgres_hstore_get('a=> b', 'a'); +---- +b + +query I +SELECT postgres_hstore_get('a=>b ', 'a'); +---- +b + +# --- postgres_hstore_get: NULL input propagation (DEFAULT_NULL_HANDLING) --- + +query I +SELECT postgres_hstore_get(NULL, 'a'); +---- +NULL + +query I +SELECT postgres_hstore_get('a=>1', NULL); +---- +NULL + +# --- postgres_hstore_get: backslash escaping in unquoted tokens --- + +query I +SELECT postgres_hstore_get('\=a=>q=w', '=a'); +---- +q=w + +# --- postgres_hstore_get: empty string key (quoted) --- + +query I +SELECT postgres_hstore_get('""=>1', ''); +---- +1 + +# --- postgres_hstore_get: error cases --- + +statement error +SELECT postgres_hstore_get(' =>null', 'x'); +---- +Invalid Input Error + +statement error +SELECT postgres_hstore_get('a=b', 'x'); +---- +Invalid Input Error + +statement error +SELECT postgres_hstore_get('aa=>"', 'x'); +---- +Invalid Input Error + +# --- postgres_hstore_to_json: basic conversion --- + +query I +SELECT postgres_hstore_to_json('"a key" =>1, b => t, c => null, d=> 12345, e => 012345, f=> 1.234, g=> 2.345e+4'); +---- +{"a key": "1", "b": "t", "c": null, "d": "12345", "e": "012345", "f": "1.234", "g": "2.345e+4"} + +# --- postgres_hstore_to_json: empty hstore --- + +query I +SELECT postgres_hstore_to_json(''); +---- +{} + +# --- postgres_hstore_to_json: NULL input --- + +query I +SELECT postgres_hstore_to_json(NULL); +---- +NULL + +# --- postgres_hstore_to_json: quoted key with space --- + +query I +SELECT postgres_hstore_to_json('"a key"=>1, b=>2'); +---- +{"a key": "1", "b": "2"} + +# --- postgres_hstore_to_json: quoted value with escaped quotes --- + +query I +SELECT postgres_hstore_to_json('k=>"val \"q\""'); +---- +{"k": "val \"q\""} + +# --- postgres_hstore_to_json: hstore NULL value becomes json null --- + +query I +SELECT postgres_hstore_to_json('a=>null, b=>2'); +---- +{"a": null, "b": "2"} + +# --- postgres_hstore_to_json: preserves input order --- + +query I +SELECT postgres_hstore_to_json('z=>1, a=>2, m=>3'); +---- +{"z": "1", "a": "2", "m": "3"} + +# --- postgres_hstore_to_json: value type is JSON --- + +query T +SELECT typeof(postgres_hstore_to_json('a=>1')); +---- +JSON From af3920a2c1da0c8274f6b6ebae9f1bb5eb5721ce Mon Sep 17 00:00:00 2001 From: Carsten Brachem Date: Sun, 12 Apr 2026 11:55:46 +0200 Subject: [PATCH 2/3] remove c++17 usage --- src/postgres_hstore.cpp | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/postgres_hstore.cpp b/src/postgres_hstore.cpp index 74c1ab996..3a5a0cc69 100644 --- a/src/postgres_hstore.cpp +++ b/src/postgres_hstore.cpp @@ -2,13 +2,11 @@ #include "duckdb/common/exception.hpp" #include "duckdb/function/scalar_function.hpp" -#include - namespace duckdb { struct HstorePair { std::string key; - std::optional value; + unique_ptr value; }; namespace { @@ -18,23 +16,23 @@ bool IsSpace(const char c) { return std::isspace(u); } -void SkipWhitespace(std::string_view input, size_t &pos) { +void SkipWhitespace(const std::string &input, size_t &pos) { while (pos < input.size() && IsSpace(input[pos])) { ++pos; } } -bool IsNullLiteral(const std::string_view &s) { +bool IsNullLiteral(const std::string &s) { return s.size() == 4 && std::tolower(static_cast(s[0])) == 'n' && std::tolower(static_cast(s[1])) == 'u' && std::tolower(static_cast(s[2])) == 'l' && std::tolower(static_cast(s[3])) == 'l'; } -std::optional ReadToken(std::string_view input, size_t &pos, bool is_key) { +unique_ptr ReadToken(const std::string &input, size_t &pos, bool is_key) { SkipWhitespace(input, pos); if (pos >= input.size()) { - return std::nullopt; + return nullptr; } std::string result; @@ -46,7 +44,7 @@ std::optional ReadToken(std::string_view input, size_t &pos, bool i char c = input[pos]; ++pos; if (c == '"') { - return std::move(result); + return make_uniq(std::move(result)); } if (c == '\\') { if (pos >= input.size()) { @@ -85,12 +83,12 @@ std::optional ReadToken(std::string_view input, size_t &pos, bool i static_cast(pos)); } if (!is_key && IsNullLiteral(result)) { - return std::nullopt; + return nullptr; } - return std::move(result); + return make_uniq(std::move(result)); } -void ExpectArrow(std::string_view input, size_t &pos) { +void ExpectArrow(const std::string &input, size_t &pos) { SkipWhitespace(input, pos); if (pos + 1 >= input.size() || input[pos] != '=' || input[pos + 1] != '>') { throw InvalidInputException("syntax error in hstore, near \"%c\" at position %d", @@ -99,14 +97,14 @@ void ExpectArrow(std::string_view input, size_t &pos) { pos += 2; } -std::vector ParseHstore(std::string_view input) { +std::vector ParseHstore(const std::string &input) { std::vector pairs; size_t pos = 0; SkipWhitespace(input, pos); while (pos < input.size()) { auto key = ReadToken(input, pos, /* is_key = */ true); - D_ASSERT(key.has_value()); + D_ASSERT(key); ExpectArrow(input, pos); auto value = ReadToken(input, pos, /* is_key = */ false); pairs.push_back({std::move(*key), std::move(value)}); @@ -177,7 +175,7 @@ void PostgresHstoreGetFun(DataChunk &args, ExpressionState &state, Vector &resul for (auto it = pairs.rbegin(); it != pairs.rend(); ++it) { if (it->key == key.GetString()) { - if (!it->value.has_value()) { + if (!it->value) { mask.SetInvalid(idx); return string_t {}; } @@ -205,7 +203,7 @@ void PostgresHstoreToJsonFun(DataChunk &args, ExpressionState &state, Vector &re first = false; JsonEscapeString(json, pair.key); json += ": "; - if (pair.value.has_value()) { + if (pair.value) { JsonEscapeString(json, *pair.value); } else { json += "null"; From 958443edb9a1489cb34981808d6a508f7e667e7b Mon Sep 17 00:00:00 2001 From: Carsten Brachem Date: Sun, 12 Apr 2026 19:55:24 +0200 Subject: [PATCH 3/3] load extension in test / test file cleanup --- test/sql/misc/postgres_hstore.test | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/test/sql/misc/postgres_hstore.test b/test/sql/misc/postgres_hstore.test index 0e84a4197..7f4c228a6 100644 --- a/test/sql/misc/postgres_hstore.test +++ b/test/sql/misc/postgres_hstore.test @@ -2,23 +2,13 @@ # description: test postgres hstore scalar functions # group: [misc] -# Before we load the extension, this will fail -statement error -SELECT postgres_hstore_get('a=>b', 'a'); ----- -Catalog Error: Scalar Function with name postgres_hstore_get does not exist! - -# Require statement will ensure this test is run with this extension loaded require postgres_scanner statement ok -PRAGMA enable_verification +LOAD postgres_scanner; -# Confirm the extension works -query I -SELECT postgres_hstore_get('a=>b', 'a'); ----- -b +statement ok +PRAGMA enable_verification # --- postgres_hstore_get: basic lookups ---