Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions be/src/vec/common/variant_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,9 @@ struct GlobRegexCacheEntry {
std::list<std::string>::iterator lru_it;
};

std::mutex g_glob_regex_cache_mutex;
std::list<std::string> g_glob_regex_cache_lru;
std::unordered_map<std::string, GlobRegexCacheEntry> g_glob_regex_cache;
static std::mutex g_glob_regex_cache_mutex;
static std::list<std::string> g_glob_regex_cache_lru;
static std::unordered_map<std::string, GlobRegexCacheEntry> g_glob_regex_cache;

std::shared_ptr<RE2> get_or_build_re2(const std::string& glob_pattern) {
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ private boolean containsVariantTypeOutsideCast(Expression expr) {
}

private boolean containsVariantTypeOutsideCast(Expression expr, boolean underCast) {
boolean nextUnderCast = underCast || expr instanceof Cast;
boolean nextUnderCast = underCast || (expr instanceof Cast && !expr.getDataType().isVariantType());
if (!nextUnderCast && expr.getDataType().isVariantType()) {
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -735,10 +735,7 @@ private boolean isEnableVariantSchemaAutoCast(ExpressionRewriteContext context)
return false;
}
SessionVariable sessionVariable = context.cascadesContext.getConnectContext().getSessionVariable();
if (sessionVariable == null || !sessionVariable.isEnableVariantSchemaAutoCast()) {
return false;
}
return sessionVariable.isEnableVariantSchemaAutoCast();
return sessionVariable != null && sessionVariable.isEnableVariantSchemaAutoCast();
}

private Expression wrapVariantElementAtWithCast(Expression expr) {
Expand Down Expand Up @@ -808,6 +805,9 @@ private Expression maybeCastAliasExpression(Alias alias, ExpressionRewriteContex
return alias;
}
Expression child = alias.child();
if (!(child instanceof ElementAt)) {
return alias;
}
Expression casted = wrapVariantElementAtWithCast(child);
if (casted == child) {
return alias;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,7 @@ private Plan checkChildren(LogicalFilter<? extends Plan> filter) {
for (Expression expr : expressions) {
if (expr instanceof Match) {
Match matchExpression = (Match) expr;
boolean isSlotReference = matchExpression.left() instanceof SlotReference;
boolean isCastChildWithSlotReference = (matchExpression.left() instanceof Cast
&& matchExpression.left().child(0) instanceof SlotReference);
if (!(isSlotReference || isCastChildWithSlotReference)
if (!isSlotOrCastChainOnSlot(matchExpression.left())
|| !(matchExpression.right() instanceof Literal)) {
throw new AnalysisException(String.format("Only support match left operand is SlotRef,"
+ " right operand is Literal. But meet expression %s", matchExpression));
Expand All @@ -61,4 +58,12 @@ private Plan checkChildren(LogicalFilter<? extends Plan> filter) {
}
return filter;
}

private boolean isSlotOrCastChainOnSlot(Expression expression) {
Expression current = expression;
while (current instanceof Cast) {
current = current.child(0);
}
return current instanceof SlotReference;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2442,7 +2442,7 @@ public boolean isEnableHboNonStrictMatchingMode() {
+ "因为外表会存在表的 schema 中 char 或者 varchar 列的最大长度和底层 parquet 或者 orc 文件中的 schema 不一致"
+ "的情况。此时开启改选项,会按照表的 schema 中的最大长度进行截断。",
"Whether to truncate char or varchar columns according to the table's schema. "
+ "The default is false.\n"
+ "The default is true.\n"
+ "Because the maximum length of the char or varchar column in the schema of the table"
+ " is inconsistent with the schema in the underlying parquet or orc file."
+ " At this time, if the option is turned on, it will be truncated according to the maximum length"
Expand Down Expand Up @@ -3297,12 +3297,12 @@ public boolean isEnableESParallelScroll() {
needForward = true,
affectQueryResultInExecution = true,
description = {
"是否启用基于 schema template 的 variant 自动 cast,默认关闭。",
"是否启用基于 schema template 的 variant 自动 cast,默认开启。",
"Whether to enable schema-template-based auto cast for variant expressions. "
+ "The default is false."
+ "The default is true."
}
)
public boolean enableVariantSchemaAutoCast = false;
public boolean enableVariantSchemaAutoCast = true;

@VariableMgr.VarAttr(
name = DEFAULT_VARIANT_ENABLE_TYPED_PATHS_TO_SPARSE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,44 @@ charlie 50
-- !leaf_having_mixed --
3033333 3033333
4044444 4044444

-- !glob_wild_match --
2 3

-- !glob_literal_match --
2

-- !nonleaf_auto_cast_on --
\\N

-- !nonleaf_auto_cast_off --
{"level1_num_1":1011111,"level1_num_2":102}

-- !explicit_cast_chain_select_2 --
10
30
50
15

-- !explicit_cast_chain_where_3 --
2
3

-- !explicit_cast_chain_order_by_4 --
3
2
4
1

-- !explicit_cast_chain_group_having_4 --
15 1
30 1
50 1

-- !explicit_cast_chain_match_2 --
1
4

-- !explicit_cast_chain_match_4 --
1
4
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
suite("test_predefine_typed_to_sparse", "p0"){
sql """ set enable_common_expr_pushdown = true """
sql """ set default_variant_enable_doc_mode = false """
sql """ set enable_variant_schema_auto_cast = false """
def count = new Random().nextInt(10) + 1

def load_json_data = {table_name, file_name ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,4 +307,108 @@ suite("test_schema_template_auto_cast", "p0") {
ORDER BY data['int_nested.level1_num_1'] """

sql "DROP TABLE IF EXISTS ${leafTable}"
}

// Test 16: backslash escaping in schema template pattern
def globWildTable = "test_variant_schema_auto_cast_glob_wild"
def globLiteralTable = "test_variant_schema_auto_cast_glob_literal"
def globLiteralPattern = "a\\*b" // SQL sees a\*b, glob sees a\*b (literal *)

sql "DROP TABLE IF EXISTS ${globWildTable}"
sql "DROP TABLE IF EXISTS ${globLiteralTable}"

sql """CREATE TABLE ${globWildTable} (
`id` bigint NULL,
`data` variant<'a*b': BIGINT> NOT NULL
) ENGINE=OLAP DUPLICATE KEY(`id`)
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES ( "replication_allocation" = "tag.location.default: 1")"""

sql """CREATE TABLE ${globLiteralTable} (
`id` bigint NULL,
`data` variant<'${globLiteralPattern}': BIGINT> NOT NULL
) ENGINE=OLAP DUPLICATE KEY(`id`)
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES ( "replication_allocation" = "tag.location.default: 1")"""

sql """insert into ${globWildTable} values(1, '{\"a*b\": 1, \"axb\": 2}')"""
sql """insert into ${globLiteralTable} values(1, '{\"a*b\": 1, \"axb\": 2}')"""

// wildcard a*b matches both a*b and axb
qt_glob_wild_match """ SELECT data['a*b'] + 1 AS v1, data['axb'] + 1 AS v2
FROM ${globWildTable} ORDER BY id """

// literal a\*b matches only a*b
qt_glob_literal_match """ SELECT data['a*b'] + 1 AS v1 FROM ${globLiteralTable} ORDER BY id """
test {
sql """ SELECT data['axb'] + 1 FROM ${globLiteralTable} """
exception "Cannot cast from variant"
}

sql "DROP TABLE IF EXISTS ${globWildTable}"
sql "DROP TABLE IF EXISTS ${globLiteralTable}"


// Test 17: non-leaf path auto cast limitation
def nonleafTable = "test_variant_schema_auto_cast_nonleaf_limit"
sql "DROP TABLE IF EXISTS ${nonleafTable}"
sql """CREATE TABLE ${nonleafTable} (
`id` int NULL,
`data` variant<'int_*': INT> NOT NULL
) ENGINE=OLAP DUPLICATE KEY(`id`)
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES ( "replication_allocation" = "tag.location.default: 1")"""

sql """insert into ${nonleafTable} values(
1, '{"int_1": 1, "int_nested": {"level1_num_1": 1011111, "level1_num_2": 102}}')"""

// auto cast enabled: non-leaf path matches int_* and returns NULL
sql "set enable_variant_schema_auto_cast = true"
qt_nonleaf_auto_cast_on """ SELECT data['int_nested'] FROM ${nonleafTable} ORDER BY id """

// auto cast disabled: return original object
sql "set enable_variant_schema_auto_cast = false"
qt_nonleaf_auto_cast_off """ SELECT data['int_nested'] FROM ${nonleafTable} ORDER BY id """

// restore default
sql "set enable_variant_schema_auto_cast = true"
sql "DROP TABLE IF EXISTS ${nonleafTable}"


// Test 18: multi-layer explicit cast chain (2~4), including MATCH clause
def castChainTable = "test_variant_schema_auto_cast_cast_chain"
sql "DROP TABLE IF EXISTS ${castChainTable}"
sql """CREATE TABLE ${castChainTable} (
`id` bigint NULL,
`data` variant<'num_*': BIGINT, 'str_*': STRING> NOT NULL
) ENGINE=OLAP DUPLICATE KEY(`id`)
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES ( "replication_allocation" = "tag.location.default: 1")"""

sql """insert into ${castChainTable} values(1, '{\"num_a\": 10, \"num_b\": 20, \"str_name\": \"alice\"}')"""
sql """insert into ${castChainTable} values(2, '{\"num_a\": 30, \"num_b\": 40, \"str_name\": \"bob\"}')"""
sql """insert into ${castChainTable} values(3, '{\"num_a\": 50, \"num_b\": 60, \"str_name\": \"charlie\"}')"""
sql """insert into ${castChainTable} values(4, '{\"num_a\": 15, \"num_b\": 25, \"str_name\": \"alice\"}')"""

qt_explicit_cast_chain_select_2 """ SELECT CAST(CAST(data['num_a'] AS BIGINT) AS BIGINT)
FROM ${castChainTable} ORDER BY id """
qt_explicit_cast_chain_where_3 """ SELECT id FROM ${castChainTable}
WHERE CAST(CAST(CAST(data['num_a'] AS BIGINT) AS BIGINT) AS BIGINT) > 20 ORDER BY id """
qt_explicit_cast_chain_order_by_4 """ SELECT id FROM ${castChainTable}
ORDER BY CAST(CAST(CAST(CAST(data['num_b'] AS BIGINT) AS BIGINT) AS BIGINT) AS BIGINT) DESC, id """
qt_explicit_cast_chain_group_having_4 """ SELECT
CAST(CAST(CAST(CAST(data['num_a'] AS BIGINT) AS BIGINT) AS BIGINT) AS BIGINT) AS v, COUNT(*)
FROM ${castChainTable}
GROUP BY CAST(CAST(CAST(CAST(data['num_a'] AS BIGINT) AS BIGINT) AS BIGINT) AS BIGINT)
HAVING CAST(CAST(CAST(CAST(data['num_a'] AS BIGINT) AS BIGINT) AS BIGINT) AS BIGINT) >= 15
ORDER BY v """

sql """ set enable_match_without_inverted_index = true """
qt_explicit_cast_chain_match_2 """ SELECT id FROM ${castChainTable}
WHERE CAST(CAST(data['str_name'] AS STRING) AS VARCHAR) MATCH 'alice' ORDER BY id """
qt_explicit_cast_chain_match_4 """ SELECT id FROM ${castChainTable}
WHERE CAST(CAST(CAST(CAST(data['str_name'] AS STRING) AS VARCHAR) AS STRING) AS VARCHAR) MATCH 'alice' ORDER BY id """
sql """ set enable_match_without_inverted_index = false """

sql "DROP TABLE IF EXISTS ${castChainTable}"

}
Loading