Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/query/expression/src/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,7 @@ impl DataBlock {
BlockEntry::Const(s, data_type, _) => {
s.as_ref().estimated_scalar_repeat_size(num_rows, data_type)
}
_ => entry.memory_size(),
_ => entry.memory_size_with_options(true),
})
.sum()
}
Expand Down
6 changes: 3 additions & 3 deletions src/query/expression/src/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -907,13 +907,13 @@ impl ScalarRef<'_> {
ScalarRef::Decimal(_) => n * self.memory_size(),
ScalarRef::Boolean(_) => n.div_ceil(8),
ScalarRef::Binary(s) => s.len() * n + (n + 1) * 8,
ScalarRef::String(s) => n * 16 + if s.len() > 12 && n > 0 { s.len() } else { 0 },
ScalarRef::String(s) => n * 16 + s.len() * n,
ScalarRef::Timestamp(_) => n * 8,
ScalarRef::TimestampTz(_) => n * 16,
ScalarRef::Date(_) => n * 4,
ScalarRef::Interval(_) => n * 16,
ScalarRef::Array(col) => col.memory_size(false) * n + (n + 1) * 8,
ScalarRef::Map(col) => col.memory_size(false) * n + (n + 1) * 8,
ScalarRef::Array(col) => col.memory_size(true) * n + (n + 1) * 8,
ScalarRef::Map(col) => col.memory_size(true) * n + (n + 1) * 8,
ScalarRef::Bitmap(b) => b.len() * n + (n + 1) * 8,
ScalarRef::Tuple(fields) => {
let DataType::Tuple(fields_ty) = data_type else {
Expand Down
33 changes: 30 additions & 3 deletions src/query/expression/tests/it/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,7 @@ fn assert_estimated_scalar_repeat_size(scalar: ScalarRef, num_rows: usize, ty: D
let col = builder.build();
assert_eq!(
scalar.estimated_scalar_repeat_size(num_rows, &ty),
col.memory_size(false)
col.memory_size(true)
);
}

Expand Down Expand Up @@ -643,20 +643,34 @@ fn test_estimated_scalar_repeat_size() {
assert_estimated_scalar_repeat_size(scalar, num_rows, ty);
}

// string
// string short
{
let scalar = ScalarRef::String("abc");
let ty = DataType::String;
assert_estimated_scalar_repeat_size(scalar, num_rows, ty);
}

// string
// string short, single row
{
let scalar = ScalarRef::String("abc");
let ty = DataType::String;
assert_estimated_scalar_repeat_size(scalar, 1, ty);
}

// long string
{
let scalar = ScalarRef::String("abcdefghijklmn123");
let ty = DataType::String;
assert_estimated_scalar_repeat_size(scalar, num_rows, ty);
}

// nullable(string)
{
let scalar = ScalarRef::String("abc");
let ty = DataType::Nullable(Box::new(DataType::String));
assert_estimated_scalar_repeat_size(scalar, num_rows, ty);
}

// binary
{
let scalar = ScalarRef::Binary(&[1, 133, 244, 123]);
Expand Down Expand Up @@ -697,4 +711,17 @@ fn test_estimated_scalar_repeat_size() {
])));
assert_estimated_scalar_repeat_size(scalar, num_rows, ty);
}

// tuple
{
let scalar = ScalarRef::Tuple(vec![
ScalarRef::Number(NumberScalar::UInt8(1)),
ScalarRef::String("abc"),
]);
let ty = DataType::Tuple(vec![
DataType::Number(NumberDataType::UInt8),
DataType::String,
]);
assert_estimated_scalar_repeat_size(scalar, num_rows, ty);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
q1.parquet 637 1
>>>> streaming load: q1.parquet error :
+ curl -sS -H x-databend-query-id:load-q1 -H 'X-Databend-SQL:insert into streaming_load_parquet(c2,c3) from @_databend_load file_format = (type='\''parquet'\'', missing_field_as=error, null_if=())' -F upload=@/tmp/streaming_load_parquet/q1.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load
{"id":"load-q1","stats":{"rows":1,"bytes":27}}
{"id":"load-q1","stats":{"rows":1,"bytes":29}}
<<<<
>>>> select * from streaming_load_parquet;
ok 1 2021-01-01
Expand All @@ -26,7 +26,7 @@ q2.parquet 431 1
q3.parquet 431 1
>>>> streaming load: q3.parquet field_default :
+ curl -sS -H x-databend-query-id:load-q3 -H 'X-Databend-SQL:insert into streaming_load_parquet(c2,c3) from @_databend_load file_format = (type='\''parquet'\'', missing_field_as=field_default, null_if=())' -F upload=@/tmp/streaming_load_parquet/q3.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load
{"id":"load-q3","stats":{"rows":1,"bytes":23}}
{"id":"load-q3","stats":{"rows":1,"bytes":25}}
<<<<
>>>> select * from streaming_load_parquet;
ok NULL 2021-01-01
Expand All @@ -37,7 +37,7 @@ ok NULL 2021-01-01
q4.parquet 655 1
>>>> streaming load: q4.parquet error :
+ curl -sS -H x-databend-query-id:load-q4 -H 'X-Databend-SQL:insert into streaming_load_parquet(c1,c3) from @_databend_load file_format = (type='\''parquet'\'', missing_field_as=error, null_if=())' -F upload=@/tmp/streaming_load_parquet/q4.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load
{"id":"load-q4","stats":{"rows":1,"bytes":34}}
{"id":"load-q4","stats":{"rows":1,"bytes":30}}
<<<<
>>>> select * from streaming_load_parquet;
my_null NULL 2021-01-01
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
>>>> copy into @streaming_load_07/data.csv from (select '2020-01-02' as c4, 110 as c2) file_format=(type='csv') single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true;
data.csv 17 1
+ curl -sS -H x-databend-query-id:load-csv -H 'X-Databend-SQL:insert into streaming_load_07(c3, c4, c2) values ('\''a'\'', ?, ?) from @_databend_load file_format = (type=csv)' -F upload=@/tmp/streaming_load_07/data.csv -u root: -XPUT http://localhost:8000/v1/streaming_load
{"id":"load-csv","stats":{"rows":1,"bytes":44}}
{"id":"load-csv","stats":{"rows":1,"bytes":47}}
<<<<
>>>> select * from streaming_load_07;
ok 110 a 2020-01-02
Expand All @@ -14,7 +14,7 @@ ok 110 a 2020-01-02
>>>> copy into @streaming_load_07/data.text from (select '2020-01-02' as c4, 110 as c2) file_format=(type='text') single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true;
data.text 15 1
+ curl -sS -H x-databend-query-id:load-text -H 'X-Databend-SQL:insert into streaming_load_07(c3, c4, c2) values ('\''a'\'', ?, ?) from @_databend_load file_format = (type=text)' -F upload=@/tmp/streaming_load_07/data.text -u root: -XPUT http://localhost:8000/v1/streaming_load
{"id":"load-text","stats":{"rows":1,"bytes":44}}
{"id":"load-text","stats":{"rows":1,"bytes":47}}
<<<<
>>>> select * from streaming_load_07;
ok 110 a 2020-01-02
Expand All @@ -24,7 +24,7 @@ ok 110 a 2020-01-02
>>>> copy into @streaming_load_07/data.ndjson from (select '2020-01-02' as c4, 110 as c2) file_format=(type='ndjson') single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true;
data.ndjson 29 1
+ curl -sS -H x-databend-query-id:load-ndjson -H 'X-Databend-SQL:insert into streaming_load_07(c3, c4, c2) values ('\''a'\'', ?, ?) from @_databend_load file_format = (type=ndjson)' -F upload=@/tmp/streaming_load_07/data.ndjson -u root: -XPUT http://localhost:8000/v1/streaming_load
{"id":"load-ndjson","stats":{"rows":1,"bytes":44}}
{"id":"load-ndjson","stats":{"rows":1,"bytes":47}}
<<<<
>>>> select * from streaming_load_07;
ok 110 a 2020-01-02
Expand All @@ -34,7 +34,7 @@ ok 110 a 2020-01-02
>>>> copy into @streaming_load_07/data.parquet from (select '2020-01-02' as c4, 110 as c2) file_format=(type='parquet') single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true;
data.parquet 678 1
+ curl -sS -H x-databend-query-id:load-parquet -H 'X-Databend-SQL:insert into streaming_load_07(c3, c4, c2) values ('\''a'\'', ?, ?) from @_databend_load file_format = (type=parquet)' -F upload=@/tmp/streaming_load_07/data.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load
{"id":"load-parquet","stats":{"rows":1,"bytes":44}}
{"id":"load-parquet","stats":{"rows":1,"bytes":47}}
<<<<
>>>> select * from streaming_load_07;
ok 110 a 2020-01-02
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
>>>> copy into @streaming_load_08/data.csv from (select '2020-01-02', 110) file_format=(type=csv) single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true;
data.csv 17 1
{"need_sticky":false,"need_keep_alive":false,"internal":"{\"last_query_ids\":[\"load-csv\"]}"}
{"id":"load-csv","stats":{"rows":1,"bytes":22}}
{"id":"load-csv","stats":{"rows":1,"bytes":22}}
{"id":"load-csv","stats":{"rows":1,"bytes":32}}
{"id":"load-csv","stats":{"rows":1,"bytes":32}}
Loading