diff --git a/be/src/vec/exprs/table_function/table_function_factory.cpp b/be/src/vec/exprs/table_function/table_function_factory.cpp index 0990b7754ed385..900118734e635f 100644 --- a/be/src/vec/exprs/table_function/table_function_factory.cpp +++ b/be/src/vec/exprs/table_function/table_function_factory.cpp @@ -33,6 +33,7 @@ #include "vec/exprs/table_function/vexplode_map.h" #include "vec/exprs/table_function/vexplode_numbers.h" #include "vec/exprs/table_function/vexplode_v2.h" +#include "vec/exprs/table_function/vjson_each.h" #include "vec/utils/util.hpp" namespace doris::vectorized { @@ -50,6 +51,8 @@ const std::unordered_map()}, {"explode_map", TableFunctionCreator {}}, {"explode_json_object", TableFunctionCreator {}}, + {"json_each", TableFunctionCreator {}}, + {"json_each_text", TableFunctionCreator {}}, {"posexplode", TableFunctionCreator {}}, {"explode", TableFunctionCreator {}}, {"explode_variant_array_old", TableFunctionCreator()}, diff --git a/be/src/vec/exprs/table_function/vjson_each.cpp b/be/src/vec/exprs/table_function/vjson_each.cpp new file mode 100644 index 00000000000000..459eb3b7462d29 --- /dev/null +++ b/be/src/vec/exprs/table_function/vjson_each.cpp @@ -0,0 +1,203 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exprs/table_function/vjson_each.h" + +#include + +#include +#include + +#include "common/status.h" +#include "util/jsonb_document.h" +#include "util/jsonb_utils.h" +#include "util/jsonb_writer.h" +#include "vec/columns/column.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_struct.h" +#include "vec/common/assert_cast.h" +#include "vec/common/string_ref.h" +#include "vec/core/block.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/exprs/vexpr.h" +#include "vec/exprs/vexpr_context.h" + +namespace doris::vectorized { +#include "common/compile_check_begin.h" + +template +VJsonEachTableFunction::VJsonEachTableFunction() { + _fn_name = TEXT_MODE ? "vjson_each_text" : "vjson_each"; +} + +template +Status VJsonEachTableFunction::process_init(Block* block, RuntimeState* /*state*/) { + int value_column_idx = -1; + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &value_column_idx)); + auto [col, is_const] = unpack_if_const(block->get_by_position(value_column_idx).column); + _json_column = col; + _is_const = is_const; + return Status::OK(); +} + +// Helper: insert one JsonbValue as plain text into a ColumnNullable. +// For strings: raw blob content (quotes stripped, matching json_each_text PG semantics). +// For null JSON values: SQL NULL (insert_default). +// For all others (numbers, bools, objects, arrays): JSON text representation. +static void insert_value_as_text(const JsonbValue* value, MutableColumnPtr& col) { + if (value == nullptr || value->isNull()) { + col->insert_default(); + return; + } + if (value->isString()) { + const auto* str_val = value->unpack(); + col->insert_data(str_val->getBlob(), str_val->getBlobLen()); + } else { + JsonbToJson converter; + std::string text = converter.to_json_string(value); + col->insert_data(text.data(), text.size()); + } +} + +// Helper: insert one JsonbValue in JSONB binary form into a ColumnNullable. +// For null JSON values: SQL NULL (insert_default). +// For all others: write JSONB binary via JsonbWriter. +static void insert_value_as_json(const JsonbValue* value, MutableColumnPtr& col, + JsonbWriter& writer) { + if (value == nullptr || value->isNull()) { + col->insert_default(); + return; + } + writer.reset(); + writer.writeValue(value); + const auto* buf = writer.getOutput()->getBuffer(); + size_t len = writer.getOutput()->getSize(); + col->insert_data(buf, len); +} + +template +void VJsonEachTableFunction::process_row(size_t row_idx) { + TableFunction::process_row(row_idx); + + StringRef text; + const size_t idx = _is_const ? 0 : row_idx; + if (const auto* nullable_col = check_and_get_column(*_json_column)) { + if (nullable_col->is_null_at(idx)) { + return; + } + text = assert_cast(nullable_col->get_nested_column()).get_data_at(idx); + } else { + text = assert_cast(*_json_column).get_data_at(idx); + } + + const JsonbDocument* doc = nullptr; + auto st = JsonbDocument::checkAndCreateDocument(text.data, text.size, &doc); + if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { + return; + } + + const JsonbValue* jv = doc->getValue(); + if (!jv->isObject()) { + return; + } + + const auto* obj = jv->unpack(); + _cur_size = obj->numElem(); + if (_cur_size == 0) { + return; + } + + _kv_pairs.first = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()); + _kv_pairs.second = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()); + _kv_pairs.first->reserve(_cur_size); + _kv_pairs.second->reserve(_cur_size); + + if constexpr (TEXT_MODE) { + for (const auto& kv : *obj) { + _kv_pairs.first->insert_data(kv.getKeyStr(), kv.klen()); + insert_value_as_text(kv.value(), _kv_pairs.second); + } + } else { + JsonbWriter writer; + for (const auto& kv : *obj) { + _kv_pairs.first->insert_data(kv.getKeyStr(), kv.klen()); + insert_value_as_json(kv.value(), _kv_pairs.second, writer); + } + } +} + +template +void VJsonEachTableFunction::process_close() { + _json_column = nullptr; + _kv_pairs.first = nullptr; + _kv_pairs.second = nullptr; +} + +template +void VJsonEachTableFunction::get_same_many_values(MutableColumnPtr& column, int length) { + if (current_empty()) { + column->insert_many_defaults(length); + return; + } + + ColumnStruct* ret; + if (_is_nullable) { + auto* nullable = assert_cast(column.get()); + ret = assert_cast(nullable->get_nested_column_ptr().get()); + assert_cast(nullable->get_null_map_column_ptr().get()) + ->insert_many_defaults(length); + } else { + ret = assert_cast(column.get()); + } + + ret->get_column(0).insert_many_from(*_kv_pairs.first, _cur_offset, length); + ret->get_column(1).insert_many_from(*_kv_pairs.second, _cur_offset, length); +} + +template +int VJsonEachTableFunction::get_value(MutableColumnPtr& column, int max_step) { + max_step = std::min(max_step, (int)(_cur_size - _cur_offset)); + + if (current_empty()) { + column->insert_default(); + max_step = 1; + } else { + ColumnStruct* struct_col = nullptr; + if (_is_nullable) { + auto* nullable_col = assert_cast(column.get()); + struct_col = assert_cast(nullable_col->get_nested_column_ptr().get()); + assert_cast(nullable_col->get_null_map_column_ptr().get()) + ->insert_many_defaults(max_step); + } else { + struct_col = assert_cast(column.get()); + } + + struct_col->get_column(0).insert_range_from(*_kv_pairs.first, _cur_offset, max_step); + struct_col->get_column(1).insert_range_from(*_kv_pairs.second, _cur_offset, max_step); + } + + forward(max_step); + return max_step; +} + +// // Explicit template instantiations +template class VJsonEachTableFunction; // json_each +template class VJsonEachTableFunction; // json_each_text + +#include "common/compile_check_end.h" +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/table_function/vjson_each.h b/be/src/vec/exprs/table_function/vjson_each.h new file mode 100644 index 00000000000000..3b54cae8e5e99d --- /dev/null +++ b/be/src/vec/exprs/table_function/vjson_each.h @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/status.h" +#include "vec/data_types/data_type.h" +#include "vec/exprs/table_function/table_function.h" + +namespace doris::vectorized { +#include "common/compile_check_begin.h" +class Block; + +// json_each('{"a":"foo","b":123}') → +// | key | value | +// | a | "foo" (JSON) | +// | b | 123 (JSON) | +// +// json_each_text('{"a":"foo","b":123}') → +// | key | value | +// | a | foo | ← string unquoted +// | b | 123 | ← number as text +// +// TEXT_MODE=false → json_each (value column type: JSONB binary) +// TEXT_MODE=true → json_each_text (value column type: plain STRING) +template +class VJsonEachTableFunction : public TableFunction { + ENABLE_FACTORY_CREATOR(VJsonEachTableFunction); + +public: + VJsonEachTableFunction(); + + ~VJsonEachTableFunction() override = default; + + Status process_init(Block* block, RuntimeState* state) override; + void process_row(size_t row_idx) override; + void process_close() override; + void get_same_many_values(MutableColumnPtr& column, int length) override; + int get_value(MutableColumnPtr& column, int max_step) override; + +private: + ColumnPtr _json_column; + // _kv_pairs.first : ColumnNullable key (always plain text) + // _kv_pairs.second : ColumnNullable value (JSONB bytes or plain text) + std::pair _kv_pairs; +}; + +using VJsonEachTableFn = VJsonEachTableFunction; +using VJsonEachTextTableFn = VJsonEachTableFunction; + +#include "common/compile_check_end.h" +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_fake.cpp b/be/src/vec/functions/function_fake.cpp index 6997d9c93855c1..f4739db1e4c696 100644 --- a/be/src/vec/functions/function_fake.cpp +++ b/be/src/vec/functions/function_fake.cpp @@ -151,6 +151,34 @@ struct FunctionExplodeJsonObject { static std::string get_error_msg() { return "Fake function do not support execute"; } }; +// json_each(json) -> Nullable(Struct(key Nullable(String), value Nullable(JSONB))) +struct FunctionJsonEach { + static DataTypePtr get_return_type_impl(const DataTypes& arguments) { + DCHECK_EQ(arguments[0]->get_primitive_type(), PrimitiveType::TYPE_JSONB) + << " json_each " << arguments[0]->get_name() << " not supported"; + DataTypes fieldTypes(2); + fieldTypes[0] = make_nullable(std::make_shared()); + fieldTypes[1] = make_nullable(std::make_shared()); + return make_nullable(std::make_shared(fieldTypes)); + } + static DataTypes get_variadic_argument_types() { return {}; } + static std::string get_error_msg() { return "Fake function do not support execute"; } +}; + +// json_each_text(json) -> Nullable(Struct(key Nullable(String), value Nullable(String))) +struct FunctionJsonEachText { + static DataTypePtr get_return_type_impl(const DataTypes& arguments) { + DCHECK_EQ(arguments[0]->get_primitive_type(), PrimitiveType::TYPE_JSONB) + << " json_each_text " << arguments[0]->get_name() << " not supported"; + DataTypes fieldTypes(2); + fieldTypes[0] = make_nullable(std::make_shared()); + fieldTypes[1] = make_nullable(std::make_shared()); + return make_nullable(std::make_shared(fieldTypes)); + } + static DataTypes get_variadic_argument_types() { return {}; } + static std::string get_error_msg() { return "Fake function do not support execute"; } +}; + struct FunctionEsquery { static DataTypePtr get_return_type_impl(const DataTypes& arguments) { return FunctionFakeBaseImpl::get_return_type_impl(arguments); @@ -239,6 +267,8 @@ void register_function_fake(SimpleFunctionFactory& factory) { register_table_function_expand_outer(factory, "explode_map"); register_table_function_expand_outer(factory, "explode_json_object"); + register_function(factory, "json_each"); + register_function(factory, "json_each_text"); register_table_function_expand_outer_default(factory, "explode_split"); register_table_function_expand_outer_default(factory, "explode_numbers"); register_table_function_expand_outer_default(factory, diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp index 64708135d2ba1d..36dcdf01f7bcf9 100644 --- a/be/test/vec/function/table_function_test.cpp +++ b/be/test/vec/function/table_function_test.cpp @@ -25,11 +25,22 @@ #include #include "exprs/mock_vexpr.h" +#include "runtime/jsonb_value.h" +#include "runtime/runtime_state.h" #include "testutil/any_type.h" +#include "util/jsonb_document.h" +#include "util/jsonb_utils.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_struct.h" +#include "vec/common/assert_cast.h" #include "vec/core/types.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_struct.h" #include "vec/exprs/table_function/vexplode.h" #include "vec/exprs/table_function/vexplode_numbers.h" #include "vec/exprs/table_function/vexplode_v2.h" +#include "vec/exprs/table_function/vjson_each.h" #include "vec/function/function_test_util.h" namespace doris::vectorized { @@ -308,4 +319,297 @@ TEST_F(TableFunctionTest, vexplode_numbers) { } } +// --------------------------------------------------------------------------- +// Direct-API helpers for json_each / json_each_text tests. +// The test framework's check_vec_table_function does not properly support +// TYPE_STRUCT output (insert_cell always expects ColumnNullable wrapping the +// struct column), so we drive the table function API directly. +// --------------------------------------------------------------------------- + +// Build a one-column JSONB input block. An empty string means SQL NULL. +static std::unique_ptr build_jsonb_input_block(const std::vector& json_rows) { + auto str_col = ColumnString::create(); + auto null_col = ColumnUInt8::create(); + for (const auto& json : json_rows) { + if (json.empty()) { + str_col->insert_default(); + null_col->insert_value(1); + } else { + JsonBinaryValue jbv; + if (jbv.from_json_string(json.c_str(), json.size()).ok()) { + str_col->insert_data(jbv.value(), jbv.size()); + null_col->insert_value(0); + } else { + str_col->insert_default(); + null_col->insert_value(1); + } + } + } + auto col = ColumnNullable::create(std::move(str_col), std::move(null_col)); + auto block = Block::create_unique(); + block->insert({std::move(col), + make_nullable(DataTypeFactory::instance().create_data_type( + doris::PrimitiveType::TYPE_JSONB, false)), + "jval"}); + return block; +} + +// Run the given table function over all rows in block. +// Returns list of (key, value) pairs where value == "__NULL__" means SQL NULL. +// val_is_jsonb controls whether the value column is decoded as JSONB→JSON text or plain text. +static std::vector> run_json_each_fn(TableFunction* fn, + Block* block, + bool val_is_jsonb) { + // Output type: Nullable(Struct(Nullable(VARCHAR key), Nullable(VARCHAR/JSONB value))) + DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( + doris::PrimitiveType::TYPE_VARCHAR, false)); + DataTypePtr val_dt = make_nullable(DataTypeFactory::instance().create_data_type( + val_is_jsonb ? doris::PrimitiveType::TYPE_JSONB : doris::PrimitiveType::TYPE_VARCHAR, + false)); + DataTypePtr struct_dt = + make_nullable(std::make_shared(DataTypes {key_dt, val_dt})); + + auto out_col = struct_dt->create_column(); + fn->set_nullable(); + + TQueryOptions q_opts; + TQueryGlobals q_globals; + RuntimeState rs(q_opts, q_globals); + EXPECT_TRUE(fn->process_init(block, &rs).ok()); + + for (size_t row = 0; row < block->rows(); ++row) { + fn->process_row(row); + if (!fn->current_empty()) { + do { + fn->get_value(out_col, 1); + } while (!fn->eos()); + } + } + fn->process_close(); + + std::vector> result; + const auto& nullable_out = assert_cast(*out_col); + const auto& struct_col = assert_cast(nullable_out.get_nested_column()); + const auto& key_col = assert_cast(struct_col.get_column(0)); + const auto& val_col = assert_cast(struct_col.get_column(1)); + + for (size_t i = 0; i < struct_col.size(); ++i) { + if (nullable_out.is_null_at(i)) { + result.emplace_back("__NULL_ROW__", "__NULL_ROW__"); + continue; + } + std::string key; + if (!key_col.is_null_at(i)) { + StringRef sr = key_col.get_nested_column().get_data_at(i); + key.assign(sr.data, sr.size); + } + std::string val; + if (val_col.is_null_at(i)) { + val = "__NULL__"; + } else { + StringRef sr = val_col.get_nested_column().get_data_at(i); + if (val_is_jsonb) { + // JSONB binary → JSON text for comparison + const JsonbDocument* doc = nullptr; + if (JsonbDocument::checkAndCreateDocument(sr.data, sr.size, &doc).ok() && doc && + doc->getValue()) { + val = JsonbToJson().to_json_string(doc->getValue()); + } else { + val = "__BAD_JSONB__"; + } + } else { + val.assign(sr.data, sr.size); + } + } + result.emplace_back(std::move(key), std::move(val)); + } + return result; +} + +TEST_F(TableFunctionTest, vjson_each) { + init_expr_context(1); + VJsonEachTableFn fn; + fn.set_expr_context(_ctx); + + // basic: string and numeric values; JSONB value column shows JSON text with quotes + { + auto block = build_jsonb_input_block({{R"({"a":"foo","b":123})"}}); + auto rows = run_json_each_fn(&fn, block.get(), true); + ASSERT_EQ(2u, rows.size()); + EXPECT_EQ("a", rows[0].first); + EXPECT_EQ("\"foo\"", rows[0].second); // JSONB string → JSON text includes quotes + EXPECT_EQ("b", rows[1].first); + EXPECT_EQ("123", rows[1].second); + } + + // JSON null value → SQL NULL + { + auto block = build_jsonb_input_block({{R"({"x":null})"}}); + auto rows = run_json_each_fn(&fn, block.get(), true); + ASSERT_EQ(1u, rows.size()); + EXPECT_EQ("x", rows[0].first); + EXPECT_EQ("__NULL__", rows[0].second); + } + + // boolean and negative int + { + auto block = build_jsonb_input_block({{R"({"flag":true,"neg":-1})"}}); + auto rows = run_json_each_fn(&fn, block.get(), true); + ASSERT_EQ(2u, rows.size()); + bool ok_flag = false, ok_neg = false; + for (auto& kv : rows) { + if (kv.first == "flag") { + EXPECT_EQ("true", kv.second); + ok_flag = true; + } + if (kv.first == "neg") { + EXPECT_EQ("-1", kv.second); + ok_neg = true; + } + } + EXPECT_TRUE(ok_flag) << "key 'flag' not found"; + EXPECT_TRUE(ok_neg) << "key 'neg' not found"; + } + + // SQL NULL input → 0 rows + { + auto block = build_jsonb_input_block({{""}}); // empty string → SQL NULL + auto rows = run_json_each_fn(&fn, block.get(), true); + EXPECT_EQ(0u, rows.size()); + } + + // empty object → 0 rows + { + auto block = build_jsonb_input_block({{"{}"}}); + auto rows = run_json_each_fn(&fn, block.get(), true); + EXPECT_EQ(0u, rows.size()); + } + + // non-object input → 0 rows + { + auto block = build_jsonb_input_block({{"[1,2,3]"}}); + auto rows = run_json_each_fn(&fn, block.get(), true); + EXPECT_EQ(0u, rows.size()); + } +} + +TEST_F(TableFunctionTest, vjson_each_text) { + init_expr_context(1); + VJsonEachTextTableFn fn; + fn.set_expr_context(_ctx); + + // basic: strings unquoted (text mode), numbers as plain text + { + auto block = build_jsonb_input_block({{R"({"a":"foo","b":123})"}}); + auto rows = run_json_each_fn(&fn, block.get(), false); + ASSERT_EQ(2u, rows.size()); + EXPECT_EQ("a", rows[0].first); + EXPECT_EQ("foo", rows[0].second); // string unquoted in text mode + EXPECT_EQ("b", rows[1].first); + EXPECT_EQ("123", rows[1].second); + } + + // booleans + { + auto block = build_jsonb_input_block({{R"({"t":true,"f":false})"}}); + auto rows = run_json_each_fn(&fn, block.get(), false); + ASSERT_EQ(2u, rows.size()); + bool ok_t = false, ok_f = false; + for (auto& kv : rows) { + if (kv.first == "t") { + EXPECT_EQ("true", kv.second); + ok_t = true; + } + if (kv.first == "f") { + EXPECT_EQ("false", kv.second); + ok_f = true; + } + } + EXPECT_TRUE(ok_t) << "key 't' not found"; + EXPECT_TRUE(ok_f) << "key 'f' not found"; + } + + // JSON null → SQL NULL + { + auto block = build_jsonb_input_block({{R"({"x":null})"}}); + auto rows = run_json_each_fn(&fn, block.get(), false); + ASSERT_EQ(1u, rows.size()); + EXPECT_EQ("x", rows[0].first); + EXPECT_EQ("__NULL__", rows[0].second); + } + + // SQL NULL input → 0 rows + { + auto block = build_jsonb_input_block({{""}}); + auto rows = run_json_each_fn(&fn, block.get(), false); + EXPECT_EQ(0u, rows.size()); + } + + // empty object → 0 rows + { + auto block = build_jsonb_input_block({{"{}"}}); + auto rows = run_json_each_fn(&fn, block.get(), false); + EXPECT_EQ(0u, rows.size()); + } +} +TEST_F(TableFunctionTest, vjson_each_get_same_many_values) { + init_expr_context(1); + VJsonEachTableFn fn; + fn.set_expr_context(_ctx); + fn.set_nullable(); + + DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( + doris::PrimitiveType::TYPE_VARCHAR, false)); + DataTypePtr val_dt = make_nullable( + DataTypeFactory::instance().create_data_type(doris::PrimitiveType::TYPE_JSONB, false)); + DataTypePtr struct_dt = + make_nullable(std::make_shared(DataTypes {key_dt, val_dt})); + + TQueryOptions q_opts; + TQueryGlobals q_globals; + RuntimeState rs(q_opts, q_globals); + + // Case 1: normal object — get_same_many_values replicates the entry at _cur_offset. + // Simulates a non-last table function being asked to copy its current value 3 times + // to match 3 rows emitted by the driving (last) function in the same pass. + { + auto block = build_jsonb_input_block({{R"({"k0":"v0","k1":"v1"})"}}); + ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); + fn.process_row(0); + ASSERT_FALSE(fn.current_empty()); + + auto out_col = struct_dt->create_column(); + fn.get_same_many_values(out_col, 3); + + const auto& nullable_out = assert_cast(*out_col); + ASSERT_EQ(3u, nullable_out.size()); + const auto& struct_col = assert_cast(nullable_out.get_nested_column()); + const auto& key_col = assert_cast(struct_col.get_column(0)); + // All 3 output rows should carry the entry at _cur_offset=0 ("k0") + for (size_t i = 0; i < 3; ++i) { + EXPECT_FALSE(nullable_out.is_null_at(i)); + ASSERT_FALSE(key_col.is_null_at(i)); + StringRef k = key_col.get_nested_column().get_data_at(i); + EXPECT_EQ("k0", std::string(k.data, k.size)); + } + fn.process_close(); + } + + // Case 2: SQL NULL input — current_empty() is true → insert_many_defaults. + { + auto block = build_jsonb_input_block({{""}}); // empty string → SQL NULL + ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); + fn.process_row(0); + ASSERT_TRUE(fn.current_empty()); + + auto out_col = struct_dt->create_column(); + fn.get_same_many_values(out_col, 2); + + ASSERT_EQ(2u, out_col->size()); + const auto& nullable_out = assert_cast(*out_col); + EXPECT_TRUE(nullable_out.is_null_at(0)); + EXPECT_TRUE(nullable_out.is_null_at(1)); + fn.process_close(); + } +} } // namespace doris::vectorized diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java index 1ad8b5ccf4ba8f..5c1bd477f8a9d9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java @@ -38,6 +38,8 @@ import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeSplit; import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeSplitOuter; import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeVariantArray; +import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEach; +import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEachText; import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplode; import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplodeOuter; import org.apache.doris.nereids.trees.expressions.functions.generator.Unnest; @@ -63,6 +65,8 @@ public class BuiltinTableGeneratingFunctions implements FunctionHelper { tableGenerating(ExplodeMapOuter.class, "explode_map_outer"), tableGenerating(ExplodeJsonObject.class, "explode_json_object"), tableGenerating(ExplodeJsonObjectOuter.class, "explode_json_object_outer"), + tableGenerating(JsonEach.class, "json_each"), + tableGenerating(JsonEachText.class, "json_each_text"), tableGenerating(ExplodeNumbers.class, "explode_numbers"), tableGenerating(ExplodeNumbersOuter.class, "explode_numbers_outer"), tableGenerating(ExplodeBitmap.class, "explode_bitmap"), @@ -89,7 +93,8 @@ public class BuiltinTableGeneratingFunctions implements FunctionHelper { .add("explode_json_array_string").add("explode_json_array_json").add("explode_json_array_int_outer") .add("explode_json_array_double_outer").add("explode_json_array_string_outer") .add("explode_json_array_json_outer").add("explode_split").add("explode_split_outer") - .add("posexplode").add("posexplode_outer").build(); + .add("posexplode").add("posexplode_outer") + .add("json_each").add("json_each_text").build(); public Set getReturnManyColumnFunctions() { return RETURN_MULTI_COLUMNS_FUNCTIONS; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/JsonEach.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/JsonEach.java new file mode 100644 index 00000000000000..acb7a1a0891ee3 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/JsonEach.java @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.generator; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.literal.StructLiteral; +import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.StringType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * json_each(json) expands the top-level JSON object into a set of key/value + * pairs. + * Returns: Struct(key VARCHAR, value JSON) — one row per top-level key. + * + * Example: + * SELECT key, value FROM LATERAL VIEW json_each('{"a":"foo","b":"bar"}') t AS + * key, value + * → key="a", value="foo" (JSON-formatted) + * → key="b", value="bar" + */ +public class JsonEach extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable { + + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(StructLiteral.constructStructType( + ImmutableList.of(StringType.INSTANCE, JsonType.INSTANCE))) + .args(JsonType.INSTANCE)); + + /** + * Constructor with 1 argument. + */ + public JsonEach(Expression arg) { + super("json_each", arg); + } + + /** Constructor for withChildren and reuse signature. */ + private JsonEach(GeneratorFunctionParams functionParams) { + super(functionParams); + } + + @Override + public JsonEach withChildren(List children) { + Preconditions.checkArgument(children.size() == 1); + return new JsonEach(getFunctionParams(children)); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitJsonEach(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/JsonEachText.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/JsonEachText.java new file mode 100644 index 00000000000000..21faaf47de34cc --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/JsonEachText.java @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.generator; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.literal.StructLiteral; +import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.StringType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * json_each_text(json) expands the top-level JSON object into a set of + * key/value pairs. + * Returns: Struct(key VARCHAR, value VARCHAR) — the JSON value is returned as + * plain text. + * + * Example: + * SELECT key, value FROM LATERAL VIEW json_each_text('{"a":"foo","b":"bar"}') t + * AS key, value + * → key="a", value=foo (plain string, not JSON-quoted) + * → key="b", value=bar + */ +public class JsonEachText extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable { + + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(StructLiteral.constructStructType( + ImmutableList.of(StringType.INSTANCE, StringType.INSTANCE))) + .args(JsonType.INSTANCE)); + + /** + * Constructor with 1 argument. + */ + public JsonEachText(Expression arg) { + super("json_each_text", arg); + } + + /** Constructor for withChildren and reuse signature. */ + private JsonEachText(GeneratorFunctionParams functionParams) { + super(functionParams); + } + + @Override + public JsonEachText withChildren(List children) { + Preconditions.checkArgument(children.size() == 1); + return new JsonEachText(getFunctionParams(children)); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitJsonEachText(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/TableGeneratingFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/TableGeneratingFunctionVisitor.java index 85af01ebd616c1..f61585a7aadc3a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/TableGeneratingFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/TableGeneratingFunctionVisitor.java @@ -38,6 +38,8 @@ import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeSplit; import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeSplitOuter; import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeVariantArray; +import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEach; +import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEachText; import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplode; import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplodeOuter; import org.apache.doris.nereids.trees.expressions.functions.generator.TableGeneratingFunction; @@ -79,6 +81,14 @@ default R visitExplodeJsonObjectOuter(ExplodeJsonObjectOuter explodeOuter, C con return visitTableGeneratingFunction(explodeOuter, context); } + default R visitJsonEach(JsonEach jsonEach, C context) { + return visitTableGeneratingFunction(jsonEach, context); + } + + default R visitJsonEachText(JsonEachText jsonEachText, C context) { + return visitTableGeneratingFunction(jsonEachText, context); + } + default R visitExplodeNumbers(ExplodeNumbers explodeNumbers, C context) { return visitTableGeneratingFunction(explodeNumbers, context); } diff --git a/regression-test/data/query_p0/sql_functions/table_function/json_each.out b/regression-test/data/query_p0/sql_functions/table_function/json_each.out new file mode 100644 index 00000000000000..9bff76da1dd035 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/table_function/json_each.out @@ -0,0 +1,71 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !json_each_basic -- +1 a "foo" +1 b "bar" + +-- !json_each_mixed -- +2 x 1 +2 y true +2 z \N + +-- !json_each_empty -- + +-- !json_each_null_input -- + +-- !json_each_all -- +1 a "foo" +1 b "bar" +2 x 1 +2 y true +2 z \N + +-- !json_each_literal -- +name "doris" +version 3 + +-- !json_each_neg_false -- +5 bool_f false +5 neg -1 + +-- !json_each_unicode -- +6 cn "中文" + +-- !json_each_non_object_str -- + +-- !json_each_non_object_arr -- + +-- !json_each_text_basic -- +1 a foo +1 b bar + +-- !json_each_text_mixed -- +2 x 1 +2 y true +2 z \N + +-- !json_each_text_empty -- + +-- !json_each_text_null_input -- + +-- !json_each_text_all -- +1 a foo +1 b bar +2 x 1 +2 y true +2 z \N + +-- !json_each_text_literal -- +name doris +version 3 + +-- !json_each_text_neg_false -- +5 bool_f false +5 neg -1 + +-- !json_each_text_unicode -- +6 cn 中文 + +-- !json_each_text_non_object_str -- + +-- !json_each_text_non_object_arr -- + diff --git a/regression-test/suites/query_p0/sql_functions/table_function/json_each.groovy b/regression-test/suites/query_p0/sql_functions/table_function/json_each.groovy new file mode 100644 index 00000000000000..7dee7c65f36f4f --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/table_function/json_each.groovy @@ -0,0 +1,240 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite('json_each') { + sql ''' DROP TABLE IF EXISTS jdata ''' + sql ''' + CREATE TABLE IF NOT EXISTS jdata ( + id INT, + jval JSONB + ) DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES("replication_num" = "1") + ''' + + sql """ INSERT INTO jdata VALUES + (1, '{"a":"foo","b":"bar"}'), + (2, '{"x":1,"y":true,"z":null}'), + (3, '{}'), + (4, NULL), + (5, '{"neg":-1,"bool_f":false}'), + (6, '{"cn":"\u4e2d\u6587"}'), + (7, '"a_string"'), + (8, '[1,2,3]'), + (9, '{"arr":[1,2],"sub":{"x":1}}') + """ + + // ---------- json_each ---------- + + // basic string values: value is JSONB, shown with JSON quotes + qt_json_each_basic ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each(jval) t AS k, v + WHERE id = 1 + ORDER BY id, k + ''' + + // int / bool true / JSON null → SQL NULL + qt_json_each_mixed ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each(jval) t AS k, v + WHERE id = 2 + ORDER BY id, k + ''' + + // empty object → 0 rows + qt_json_each_empty ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each(jval) t AS k, v + WHERE id = 3 + ORDER BY id, k + ''' + + // SQL NULL input, non-outer → 0 rows + qt_json_each_null_input ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each(jval) t AS k, v + WHERE id = 4 + ORDER BY id, k + ''' + + // ids 1-4 combined + qt_json_each_all ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each(jval) t AS k, v + WHERE id IN (1, 2, 3, 4) + ORDER BY id, k + ''' + + // inline literal + qt_json_each_literal """ + SELECT k, v + FROM (SELECT 1) dummy + LATERAL VIEW json_each('{"name":"doris","version":3}') t AS k, v + ORDER BY k + """ + + // negative int, boolean false + qt_json_each_neg_false ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each(jval) t AS k, v + WHERE id = 5 + ORDER BY id, k + ''' + + // unicode string value + qt_json_each_unicode ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each(jval) t AS k, v + WHERE id = 6 + ORDER BY id, k + ''' + + // non-object input: JSON string → 0 rows + qt_json_each_non_object_str ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each(jval) t AS k, v + WHERE id = 7 + ORDER BY id, k + ''' + + // non-object input: JSON array → 0 rows + qt_json_each_non_object_arr ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each(jval) t AS k, v + WHERE id = 8 + ORDER BY id, k + ''' + + // complex value types (nested obj + array): functional coverage only + sql ''' + SELECT id, k + FROM jdata + LATERAL VIEW json_each(jval) t AS k, v + WHERE id = 9 + ORDER BY id, k + ''' + + // ---------- json_each_text ---------- + + // string values unquoted in text mode + qt_json_each_text_basic ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each_text(jval) t AS k, v + WHERE id = 1 + ORDER BY id, k + ''' + + // int / bool / JSON null → SQL NULL + qt_json_each_text_mixed ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each_text(jval) t AS k, v + WHERE id = 2 + ORDER BY id, k + ''' + + // empty object → 0 rows + qt_json_each_text_empty ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each_text(jval) t AS k, v + WHERE id = 3 + ORDER BY id, k + ''' + + // SQL NULL input, non-outer → 0 rows + qt_json_each_text_null_input ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each_text(jval) t AS k, v + WHERE id = 4 + ORDER BY id, k + ''' + + // ids 1-4 combined + qt_json_each_text_all ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each_text(jval) t AS k, v + WHERE id IN (1, 2, 3, 4) + ORDER BY id, k + ''' + + // inline literal: strings unquoted + qt_json_each_text_literal """ + SELECT k, v + FROM (SELECT 1) dummy + LATERAL VIEW json_each_text('{"name":"doris","version":3}') t AS k, v + ORDER BY k + """ + + // negative int, boolean false + qt_json_each_text_neg_false ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each_text(jval) t AS k, v + WHERE id = 5 + ORDER BY id, k + ''' + + // unicode string value: unquoted in text mode + qt_json_each_text_unicode ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each_text(jval) t AS k, v + WHERE id = 6 + ORDER BY id, k + ''' + + // non-object input: JSON string → 0 rows + qt_json_each_text_non_object_str ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each_text(jval) t AS k, v + WHERE id = 7 + ORDER BY id, k + ''' + + // non-object input: JSON array → 0 rows + qt_json_each_text_non_object_arr ''' + SELECT id, k, v + FROM jdata + LATERAL VIEW json_each_text(jval) t AS k, v + WHERE id = 8 + ORDER BY id, k + ''' + + // complex value types in text mode: functional coverage only + sql ''' + SELECT id, k + FROM jdata + LATERAL VIEW json_each_text(jval) t AS k, v + WHERE id = 9 + ORDER BY id, k + ''' +}