diff --git a/src/query/expression/src/block.rs b/src/query/expression/src/block.rs index b72ab86c11c91..4e27fd5e8c7e5 100644 --- a/src/query/expression/src/block.rs +++ b/src/query/expression/src/block.rs @@ -991,7 +991,7 @@ impl DataBlock { BlockEntry::Const(s, data_type, _) => { s.as_ref().estimated_scalar_repeat_size(num_rows, data_type) } - _ => entry.memory_size(), + _ => entry.memory_size_with_options(true), }) .sum() } diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs index 3e53ef6d60dfa..5a6f71cea7810 100755 --- a/src/query/expression/src/values.rs +++ b/src/query/expression/src/values.rs @@ -907,13 +907,13 @@ impl ScalarRef<'_> { ScalarRef::Decimal(_) => n * self.memory_size(), ScalarRef::Boolean(_) => n.div_ceil(8), ScalarRef::Binary(s) => s.len() * n + (n + 1) * 8, - ScalarRef::String(s) => n * 16 + if s.len() > 12 && n > 0 { s.len() } else { 0 }, + ScalarRef::String(s) => n * 16 + s.len() * n, ScalarRef::Timestamp(_) => n * 8, ScalarRef::TimestampTz(_) => n * 16, ScalarRef::Date(_) => n * 4, ScalarRef::Interval(_) => n * 16, - ScalarRef::Array(col) => col.memory_size(false) * n + (n + 1) * 8, - ScalarRef::Map(col) => col.memory_size(false) * n + (n + 1) * 8, + ScalarRef::Array(col) => col.memory_size(true) * n + (n + 1) * 8, + ScalarRef::Map(col) => col.memory_size(true) * n + (n + 1) * 8, ScalarRef::Bitmap(b) => b.len() * n + (n + 1) * 8, ScalarRef::Tuple(fields) => { let DataType::Tuple(fields_ty) = data_type else { diff --git a/src/query/expression/tests/it/kernel.rs b/src/query/expression/tests/it/kernel.rs index 75f43f25ad151..e71e5dea072dc 100644 --- a/src/query/expression/tests/it/kernel.rs +++ b/src/query/expression/tests/it/kernel.rs @@ -598,7 +598,7 @@ fn assert_estimated_scalar_repeat_size(scalar: ScalarRef, num_rows: usize, ty: D let col = builder.build(); assert_eq!( scalar.estimated_scalar_repeat_size(num_rows, &ty), - col.memory_size(false) + col.memory_size(true) ); } @@ -643,20 +643,34 @@ fn test_estimated_scalar_repeat_size() { assert_estimated_scalar_repeat_size(scalar, num_rows, ty); } - // string + // string short { let scalar = ScalarRef::String("abc"); let ty = DataType::String; assert_estimated_scalar_repeat_size(scalar, num_rows, ty); } - // string + // string short, single row + { + let scalar = ScalarRef::String("abc"); + let ty = DataType::String; + assert_estimated_scalar_repeat_size(scalar, 1, ty); + } + + // long string { let scalar = ScalarRef::String("abcdefghijklmn123"); let ty = DataType::String; assert_estimated_scalar_repeat_size(scalar, num_rows, ty); } + // nullable(string) + { + let scalar = ScalarRef::String("abc"); + let ty = DataType::Nullable(Box::new(DataType::String)); + assert_estimated_scalar_repeat_size(scalar, num_rows, ty); + } + // binary { let scalar = ScalarRef::Binary(&[1, 133, 244, 123]); @@ -697,4 +711,17 @@ fn test_estimated_scalar_repeat_size() { ]))); assert_estimated_scalar_repeat_size(scalar, num_rows, ty); } + + // tuple + { + let scalar = ScalarRef::Tuple(vec![ + ScalarRef::Number(NumberScalar::UInt8(1)), + ScalarRef::String("abc"), + ]); + let ty = DataType::Tuple(vec![ + DataType::Number(NumberDataType::UInt8), + DataType::String, + ]); + assert_estimated_scalar_repeat_size(scalar, num_rows, ty); + } } diff --git a/tests/suites/1_stateful/01_streaming_load/01_0006_streaming_load_parquet.result b/tests/suites/1_stateful/01_streaming_load/01_0006_streaming_load_parquet.result index 03049019faee1..fb5af87e01899 100755 --- a/tests/suites/1_stateful/01_streaming_load/01_0006_streaming_load_parquet.result +++ b/tests/suites/1_stateful/01_streaming_load/01_0006_streaming_load_parquet.result @@ -5,7 +5,7 @@ q1.parquet 637 1 >>>> streaming load: q1.parquet error : + curl -sS -H x-databend-query-id:load-q1 -H 'X-Databend-SQL:insert into streaming_load_parquet(c2,c3) from @_databend_load file_format = (type='\''parquet'\'', missing_field_as=error, null_if=())' -F upload=@/tmp/streaming_load_parquet/q1.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load -{"id":"load-q1","stats":{"rows":1,"bytes":27}} +{"id":"load-q1","stats":{"rows":1,"bytes":29}} <<<< >>>> select * from streaming_load_parquet; ok 1 2021-01-01 @@ -26,7 +26,7 @@ q2.parquet 431 1 q3.parquet 431 1 >>>> streaming load: q3.parquet field_default : + curl -sS -H x-databend-query-id:load-q3 -H 'X-Databend-SQL:insert into streaming_load_parquet(c2,c3) from @_databend_load file_format = (type='\''parquet'\'', missing_field_as=field_default, null_if=())' -F upload=@/tmp/streaming_load_parquet/q3.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load -{"id":"load-q3","stats":{"rows":1,"bytes":23}} +{"id":"load-q3","stats":{"rows":1,"bytes":25}} <<<< >>>> select * from streaming_load_parquet; ok NULL 2021-01-01 @@ -37,7 +37,7 @@ ok NULL 2021-01-01 q4.parquet 655 1 >>>> streaming load: q4.parquet error : + curl -sS -H x-databend-query-id:load-q4 -H 'X-Databend-SQL:insert into streaming_load_parquet(c1,c3) from @_databend_load file_format = (type='\''parquet'\'', missing_field_as=error, null_if=())' -F upload=@/tmp/streaming_load_parquet/q4.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load -{"id":"load-q4","stats":{"rows":1,"bytes":34}} +{"id":"load-q4","stats":{"rows":1,"bytes":30}} <<<< >>>> select * from streaming_load_parquet; my_null NULL 2021-01-01 diff --git a/tests/suites/1_stateful/01_streaming_load/01_0007_streaming_load_placeholder.result b/tests/suites/1_stateful/01_streaming_load/01_0007_streaming_load_placeholder.result index 86e95e71b0cda..195c2e58666fc 100755 --- a/tests/suites/1_stateful/01_streaming_load/01_0007_streaming_load_placeholder.result +++ b/tests/suites/1_stateful/01_streaming_load/01_0007_streaming_load_placeholder.result @@ -4,7 +4,7 @@ >>>> copy into @streaming_load_07/data.csv from (select '2020-01-02' as c4, 110 as c2) file_format=(type='csv') single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true; data.csv 17 1 + curl -sS -H x-databend-query-id:load-csv -H 'X-Databend-SQL:insert into streaming_load_07(c3, c4, c2) values ('\''a'\'', ?, ?) from @_databend_load file_format = (type=csv)' -F upload=@/tmp/streaming_load_07/data.csv -u root: -XPUT http://localhost:8000/v1/streaming_load -{"id":"load-csv","stats":{"rows":1,"bytes":44}} +{"id":"load-csv","stats":{"rows":1,"bytes":47}} <<<< >>>> select * from streaming_load_07; ok 110 a 2020-01-02 @@ -14,7 +14,7 @@ ok 110 a 2020-01-02 >>>> copy into @streaming_load_07/data.text from (select '2020-01-02' as c4, 110 as c2) file_format=(type='text') single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true; data.text 15 1 + curl -sS -H x-databend-query-id:load-text -H 'X-Databend-SQL:insert into streaming_load_07(c3, c4, c2) values ('\''a'\'', ?, ?) from @_databend_load file_format = (type=text)' -F upload=@/tmp/streaming_load_07/data.text -u root: -XPUT http://localhost:8000/v1/streaming_load -{"id":"load-text","stats":{"rows":1,"bytes":44}} +{"id":"load-text","stats":{"rows":1,"bytes":47}} <<<< >>>> select * from streaming_load_07; ok 110 a 2020-01-02 @@ -24,7 +24,7 @@ ok 110 a 2020-01-02 >>>> copy into @streaming_load_07/data.ndjson from (select '2020-01-02' as c4, 110 as c2) file_format=(type='ndjson') single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true; data.ndjson 29 1 + curl -sS -H x-databend-query-id:load-ndjson -H 'X-Databend-SQL:insert into streaming_load_07(c3, c4, c2) values ('\''a'\'', ?, ?) from @_databend_load file_format = (type=ndjson)' -F upload=@/tmp/streaming_load_07/data.ndjson -u root: -XPUT http://localhost:8000/v1/streaming_load -{"id":"load-ndjson","stats":{"rows":1,"bytes":44}} +{"id":"load-ndjson","stats":{"rows":1,"bytes":47}} <<<< >>>> select * from streaming_load_07; ok 110 a 2020-01-02 @@ -34,7 +34,7 @@ ok 110 a 2020-01-02 >>>> copy into @streaming_load_07/data.parquet from (select '2020-01-02' as c4, 110 as c2) file_format=(type='parquet') single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true; data.parquet 678 1 + curl -sS -H x-databend-query-id:load-parquet -H 'X-Databend-SQL:insert into streaming_load_07(c3, c4, c2) values ('\''a'\'', ?, ?) from @_databend_load file_format = (type=parquet)' -F upload=@/tmp/streaming_load_07/data.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load -{"id":"load-parquet","stats":{"rows":1,"bytes":44}} +{"id":"load-parquet","stats":{"rows":1,"bytes":47}} <<<< >>>> select * from streaming_load_07; ok 110 a 2020-01-02 diff --git a/tests/suites/1_stateful/01_streaming_load/01_0008_streaming_load_session.result b/tests/suites/1_stateful/01_streaming_load/01_0008_streaming_load_session.result index 33df07dfb197e..8a9e68aca03e9 100644 --- a/tests/suites/1_stateful/01_streaming_load/01_0008_streaming_load_session.result +++ b/tests/suites/1_stateful/01_streaming_load/01_0008_streaming_load_session.result @@ -3,5 +3,5 @@ >>>> copy into @streaming_load_08/data.csv from (select '2020-01-02', 110) file_format=(type=csv) single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true; data.csv 17 1 {"need_sticky":false,"need_keep_alive":false,"internal":"{\"last_query_ids\":[\"load-csv\"]}"} -{"id":"load-csv","stats":{"rows":1,"bytes":22}} -{"id":"load-csv","stats":{"rows":1,"bytes":22}} \ No newline at end of file +{"id":"load-csv","stats":{"rows":1,"bytes":32}} +{"id":"load-csv","stats":{"rows":1,"bytes":32}} \ No newline at end of file