diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a11df8b01..5b99d6e43 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,6 +15,7 @@ add_library( postgres_execute.cpp postgres_extension.cpp postgres_filter_pushdown.cpp + postgres_hstore.cpp postgres_parameters.cpp postgres_query.cpp postgres_scanner.cpp diff --git a/src/include/postgres_hstore.hpp b/src/include/postgres_hstore.hpp new file mode 100644 index 000000000..11e76883d --- /dev/null +++ b/src/include/postgres_hstore.hpp @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// postgres_hstore.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb.hpp" + +namespace duckdb { + +void RegisterHstoreFunctions(ExtensionLoader &loader); + +} // namespace duckdb diff --git a/src/postgres_extension.cpp b/src/postgres_extension.cpp index 4fbe8fc69..e46fbbf36 100644 --- a/src/postgres_extension.cpp +++ b/src/postgres_extension.cpp @@ -21,6 +21,7 @@ #include "duckdb/main/connection_manager.hpp" #include "duckdb/common/error_data.hpp" #include "postgres_logging.hpp" +#include "postgres_hstore.hpp" using namespace duckdb; @@ -181,6 +182,8 @@ static void LoadInternal(ExtensionLoader &loader) { PostgresConfigurePoolFunction configure_pool_function; loader.RegisterFunction(configure_pool_function); + RegisterHstoreFunctions(loader); + // Register the new type SecretType secret_type; secret_type.name = "postgres"; diff --git a/src/postgres_hstore.cpp b/src/postgres_hstore.cpp new file mode 100644 index 000000000..3a5a0cc69 --- /dev/null +++ b/src/postgres_hstore.cpp @@ -0,0 +1,229 @@ +#include "postgres_hstore.hpp" +#include "duckdb/common/exception.hpp" +#include "duckdb/function/scalar_function.hpp" + +namespace duckdb { + +struct HstorePair { + std::string key; + unique_ptr value; +}; + +namespace { + +bool IsSpace(const char c) { + const auto u = static_cast(c); + return std::isspace(u); +} + +void SkipWhitespace(const std::string &input, size_t &pos) { + while (pos < input.size() && IsSpace(input[pos])) { + ++pos; + } +} + +bool IsNullLiteral(const std::string &s) { + return s.size() == 4 && std::tolower(static_cast(s[0])) == 'n' && + std::tolower(static_cast(s[1])) == 'u' && + std::tolower(static_cast(s[2])) == 'l' && + std::tolower(static_cast(s[3])) == 'l'; +} + +unique_ptr ReadToken(const std::string &input, size_t &pos, bool is_key) { + SkipWhitespace(input, pos); + if (pos >= input.size()) { + return nullptr; + } + + std::string result; + + // quoted + if (input[pos] == '"') { + ++pos; // skip opening quote + while (pos < input.size()) { + char c = input[pos]; + ++pos; + if (c == '"') { + return make_uniq(std::move(result)); + } + if (c == '\\') { + if (pos >= input.size()) { + break; + } + result += input[pos]; + ++pos; + } else { + result += c; + } + } + throw InvalidInputException("syntax error in hstore: unexpected end of string"); + } + + // unquoted + auto terminator = is_key ? '=' : ','; + while (pos < input.size()) { + char c = input[pos]; + if (std::isspace(static_cast(c)) || c == terminator) { + break; + } + if (c == '\\') { + ++pos; + if (pos >= input.size()) { + throw InvalidInputException("syntax error in hstore: unexpected end of string"); + } + result += input[pos]; + } else { + result += c; + } + ++pos; + } + + if (is_key && result.empty()) { + throw InvalidInputException("syntax error in hstore, near \"%c\" at position %d", input[pos], + static_cast(pos)); + } + if (!is_key && IsNullLiteral(result)) { + return nullptr; + } + return make_uniq(std::move(result)); +} + +void ExpectArrow(const std::string &input, size_t &pos) { + SkipWhitespace(input, pos); + if (pos + 1 >= input.size() || input[pos] != '=' || input[pos + 1] != '>') { + throw InvalidInputException("syntax error in hstore, near \"%c\" at position %d", + pos < input.size() ? input[pos] : '?', static_cast(pos)); + } + pos += 2; +} + +std::vector ParseHstore(const std::string &input) { + std::vector pairs; + size_t pos = 0; + + SkipWhitespace(input, pos); + while (pos < input.size()) { + auto key = ReadToken(input, pos, /* is_key = */ true); + D_ASSERT(key); + ExpectArrow(input, pos); + auto value = ReadToken(input, pos, /* is_key = */ false); + pairs.push_back({std::move(*key), std::move(value)}); + + // Expect comma or end + SkipWhitespace(input, pos); + if (pos >= input.size()) { + break; + } + if (input[pos] != ',') { + throw InvalidInputException("syntax error in hstore, near \"%c\" at position %d", input[pos], + static_cast(pos)); + } + ++pos; + SkipWhitespace(input, pos); + } + + return pairs; +} + +void JsonEscapeString(std::string &out, const std::string &s) { + out += '"'; + for (char c : s) { + switch (c) { + case '"': + out += "\\\""; + break; + case '\\': + out += "\\\\"; + break; + case '\b': + out += "\\b"; + break; + case '\f': + out += "\\f"; + break; + case '\n': + out += "\\n"; + break; + case '\r': + out += "\\r"; + break; + case '\t': + out += "\\t"; + break; + default: + if (static_cast(c) < 0x20) { + char buf[8]; + snprintf(buf, sizeof(buf), "\\u%04x", static_cast(c)); + out += buf; + } else { + out += c; + } + break; + } + } + out += '"'; +} + +void PostgresHstoreGetFun(DataChunk &args, ExpressionState &state, Vector &result) { + auto &hstore_vector = args.data[0]; + auto &key_vector = args.data[1]; + + BinaryExecutor::ExecuteWithNulls( + hstore_vector, key_vector, result, args.size(), + [&](string_t hstore, string_t key, ValidityMask &mask, idx_t idx) -> string_t { + auto pairs = ParseHstore(hstore.GetString()); + + for (auto it = pairs.rbegin(); it != pairs.rend(); ++it) { + if (it->key == key.GetString()) { + if (!it->value) { + mask.SetInvalid(idx); + return string_t {}; + } + return StringVector::AddString(result, *it->value); + } + } + mask.SetInvalid(idx); + return string_t {}; + }); +} + +void PostgresHstoreToJsonFun(DataChunk &args, ExpressionState &state, Vector &result) { + auto &hstore_vector = args.data[0]; + + UnaryExecutor::Execute(hstore_vector, result, args.size(), + [&](string_t hstore_str) -> string_t { + auto pairs = ParseHstore(hstore_str.GetString()); + std::string json; + json += '{'; + bool first = true; + for (auto &pair : pairs) { + if (!first) { + json += ", "; + } + first = false; + JsonEscapeString(json, pair.key); + json += ": "; + if (pair.value) { + JsonEscapeString(json, *pair.value); + } else { + json += "null"; + } + } + json += '}'; + return StringVector::AddString(result, json); + }); +} + +} // anonymous namespace + +void RegisterHstoreFunctions(ExtensionLoader &loader) { + auto hstore_get = ScalarFunction("postgres_hstore_get", {LogicalType::VARCHAR, LogicalType::VARCHAR}, + LogicalType::VARCHAR, PostgresHstoreGetFun); + loader.RegisterFunction(hstore_get); + + auto hstore_to_json = + ScalarFunction("postgres_hstore_to_json", {LogicalType::VARCHAR}, LogicalType::JSON(), PostgresHstoreToJsonFun); + loader.RegisterFunction(hstore_to_json); +} + +} // namespace duckdb diff --git a/test/sql/misc/postgres_hstore.test b/test/sql/misc/postgres_hstore.test new file mode 100644 index 000000000..7f4c228a6 --- /dev/null +++ b/test/sql/misc/postgres_hstore.test @@ -0,0 +1,213 @@ +# name: test/sql/misc/postgres_hstore.test +# description: test postgres hstore scalar functions +# group: [misc] + +require postgres_scanner + +statement ok +LOAD postgres_scanner; + +statement ok +PRAGMA enable_verification + +# --- postgres_hstore_get: basic lookups --- + +query I +SELECT postgres_hstore_get('aa=>b, c=>d, b=>16', 'c'); +---- +d + +query I +SELECT postgres_hstore_get('aa=>b, c=>d, b=>16', 'b'); +---- +16 + +query I +SELECT postgres_hstore_get('aa=>b, c=>d, b=>16', 'aa'); +---- +b + +# --- postgres_hstore_get: duplicate key, last one wins --- + +query I +SELECT postgres_hstore_get('a=>b, a=>c', 'a'); +---- +c + +# --- postgres_hstore_get: key not found returns NULL --- + +query I +SELECT postgres_hstore_get('aa=>b, c=>d, b=>16', 'gg'); +---- +NULL + +# --- postgres_hstore_get: empty hstore returns NULL --- + +query I +SELECT postgres_hstore_get('', 'x'); +---- +NULL + +# --- postgres_hstore_get: hstore NULL value returns NULL --- + +query I +SELECT postgres_hstore_get('aa=>NULL, c=>d, b=>16', 'aa'); +---- +NULL + +query I +SELECT postgres_hstore_get('aa=>NuLl, c=>d', 'aa'); +---- +NULL + +# --- postgres_hstore_get: quoted "NULL" is NOT null, it's a literal string --- + +query I +SELECT postgres_hstore_get('aa=>"NuLl", c=>d', 'aa'); +---- +NuLl + +# --- postgres_hstore_get: quoted key with space --- + +query I +SELECT postgres_hstore_get('"a key" =>1, b => t', 'a key'); +---- +1 + +# --- postgres_hstore_get: quoted value with escaped characters --- + +query I +SELECT postgres_hstore_get('k=>"val \"quoted\""', 'k'); +---- +val "quoted" + +# --- postgres_hstore_get: whitespace variations --- + +query I +SELECT postgres_hstore_get(' a => b ', 'a'); +---- +b + +query I +SELECT postgres_hstore_get('a=>b', 'a'); +---- +b + +query I +SELECT postgres_hstore_get(' a=>b', 'a'); +---- +b + +query I +SELECT postgres_hstore_get('a =>b', 'a'); +---- +b + +query I +SELECT postgres_hstore_get('a=> b', 'a'); +---- +b + +query I +SELECT postgres_hstore_get('a=>b ', 'a'); +---- +b + +# --- postgres_hstore_get: NULL input propagation (DEFAULT_NULL_HANDLING) --- + +query I +SELECT postgres_hstore_get(NULL, 'a'); +---- +NULL + +query I +SELECT postgres_hstore_get('a=>1', NULL); +---- +NULL + +# --- postgres_hstore_get: backslash escaping in unquoted tokens --- + +query I +SELECT postgres_hstore_get('\=a=>q=w', '=a'); +---- +q=w + +# --- postgres_hstore_get: empty string key (quoted) --- + +query I +SELECT postgres_hstore_get('""=>1', ''); +---- +1 + +# --- postgres_hstore_get: error cases --- + +statement error +SELECT postgres_hstore_get(' =>null', 'x'); +---- +Invalid Input Error + +statement error +SELECT postgres_hstore_get('a=b', 'x'); +---- +Invalid Input Error + +statement error +SELECT postgres_hstore_get('aa=>"', 'x'); +---- +Invalid Input Error + +# --- postgres_hstore_to_json: basic conversion --- + +query I +SELECT postgres_hstore_to_json('"a key" =>1, b => t, c => null, d=> 12345, e => 012345, f=> 1.234, g=> 2.345e+4'); +---- +{"a key": "1", "b": "t", "c": null, "d": "12345", "e": "012345", "f": "1.234", "g": "2.345e+4"} + +# --- postgres_hstore_to_json: empty hstore --- + +query I +SELECT postgres_hstore_to_json(''); +---- +{} + +# --- postgres_hstore_to_json: NULL input --- + +query I +SELECT postgres_hstore_to_json(NULL); +---- +NULL + +# --- postgres_hstore_to_json: quoted key with space --- + +query I +SELECT postgres_hstore_to_json('"a key"=>1, b=>2'); +---- +{"a key": "1", "b": "2"} + +# --- postgres_hstore_to_json: quoted value with escaped quotes --- + +query I +SELECT postgres_hstore_to_json('k=>"val \"q\""'); +---- +{"k": "val \"q\""} + +# --- postgres_hstore_to_json: hstore NULL value becomes json null --- + +query I +SELECT postgres_hstore_to_json('a=>null, b=>2'); +---- +{"a": null, "b": "2"} + +# --- postgres_hstore_to_json: preserves input order --- + +query I +SELECT postgres_hstore_to_json('z=>1, a=>2, m=>3'); +---- +{"z": "1", "a": "2", "m": "3"} + +# --- postgres_hstore_to_json: value type is JSON --- + +query T +SELECT typeof(postgres_hstore_to_json('a=>1')); +---- +JSON