diff --git a/cpp/src/common/tablet.cc b/cpp/src/common/tablet.cc index f2d09e53b..afe62ab11 100644 --- a/cpp/src/common/tablet.cc +++ b/cpp/src/common/tablet.cc @@ -404,6 +404,12 @@ int Tablet::add_value(uint32_t row_index, const std::string& measurement_name, return add_value(row_index, measurement_name, String(val)); } +template <> +int Tablet::add_value(uint32_t row_index, const std::string& measurement_name, + std::string val) { + return add_value(row_index, measurement_name, String(val)); +} + template int Tablet::add_value(uint32_t row_index, uint32_t schema_index, bool val); template int Tablet::add_value(uint32_t row_index, uint32_t schema_index, diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 620b8392f..8cf7b6223 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -373,6 +373,61 @@ ResultSet tsfile_query_table_on_tree(TsFileReader reader, char** columns, return table_result_set; } +ResultSet tsfile_reader_query_tree_by_row(TsFileReader reader, + char** device_ids, int device_ids_len, + char** measurement_names, + int measurement_names_len, int offset, + int limit, ERRNO* err_code) { + auto* r = static_cast(reader); + storage::ResultSet* result_set = nullptr; + + std::vector path_list; + if (device_ids_len > 0 && measurement_names_len > 0) { + path_list.reserve(static_cast(device_ids_len) * + static_cast(measurement_names_len)); + } + + for (int i = 0; i < device_ids_len; i++) { + const char* device_id = device_ids[i]; + if (device_id == nullptr) { + continue; + } + for (int j = 0; j < measurement_names_len; j++) { + const char* measurement_name = measurement_names[j]; + if (measurement_name == nullptr) { + continue; + } + path_list.emplace_back(std::string(device_id) + "." + + std::string(measurement_name)); + } + } + + *err_code = r->queryByRow(path_list, offset, limit, result_set); + return result_set; +} + +ResultSet tsfile_reader_query_table_by_row(TsFileReader reader, + const char* table_name, + char** column_names, + int column_names_len, int offset, + int limit, ERRNO* err_code) { + auto* r = static_cast(reader); + storage::ResultSet* result_set = nullptr; + + std::vector columns; + if (column_names_len > 0) { + columns.reserve(static_cast(column_names_len)); + } + for (int i = 0; i < column_names_len; i++) { + const char* name = column_names[i]; + columns.emplace_back(name == nullptr ? "" : std::string(name)); + } + + *err_code = r->queryByRow(table_name == nullptr ? "" : table_name, columns, + offset, limit, result_set); + return result_set; +} + ResultSet tsfile_query_table_batch(TsFileReader reader, const char* table_name, char** columns, uint32_t column_num, Timestamp start_time, Timestamp end_time, diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index 49de2b5b2..4f4ce8d6e 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -478,6 +478,50 @@ ResultSet tsfile_query_table(TsFileReader reader, const char* table_name, ResultSet tsfile_query_table_on_tree(TsFileReader reader, char** columns, uint32_t column_num, Timestamp start_time, Timestamp end_time, ERRNO* err_code); +/** + * @brief Query time series (tree model) by row with offset/limit. + * + * For tree model, each (device_id, measurement_name) pair maps to a full path + * "device_id.measurement_name". The result set merges multiple paths by + * timestamp, applies the global offset/limit at merge layer, and returns + * at most @p limit rows. < 0 limit means unlimited. + * + * @param reader [in] Valid TsFileReader handle obtained from + * tsfile_reader_new(). + * @param device_ids [in] Array of device identifiers. + * @param device_ids_len [in] Device id count. + * @param measurement_names [in] Array of measurement (sensor) names. + * @param measurement_names_len [in] Measurement name count. + * @param offset [in] Number of leading rows to skip (>= 0). + * @param limit [in] Maximum rows to return. < 0 means unlimited. + * @param err_code [out] Error code. E_OK(0) on success. + * @return ResultSet handle on success; NULL on failure. + */ +ResultSet tsfile_reader_query_tree_by_row(TsFileReader reader, + char** device_ids, int device_ids_len, + char** measurement_names, + int measurement_names_len, int offset, + int limit, ERRNO* err_code); + +/** + * @brief Query table-model data by row with offset/limit pushdown. + * + * @param reader [in] Valid TsFileReader handle obtained from + * tsfile_reader_new(). + * @param table_name [in] Target table name. + * @param column_names [in] Array of requested column names. + * @param column_names_len [in] Requested column count. + * @param offset [in] Number of leading rows to skip (>= 0). + * @param limit [in] Maximum rows to return. < 0 means unlimited. + * @param err_code [out] Error code. E_OK(0) on success. + * @return ResultSet handle on success; NULL on failure. + */ +ResultSet tsfile_reader_query_table_by_row(TsFileReader reader, + const char* table_name, + char** column_names, + int column_names_len, int offset, + int limit, ERRNO* err_code); + ResultSet tsfile_query_table_batch(TsFileReader reader, const char* table_name, char** columns, uint32_t column_num, Timestamp start_time, Timestamp end_time, diff --git a/cpp/src/file/tsfile_io_reader.cc b/cpp/src/file/tsfile_io_reader.cc index 405c55534..69e12e45a 100644 --- a/cpp/src/file/tsfile_io_reader.cc +++ b/cpp/src/file/tsfile_io_reader.cc @@ -298,12 +298,15 @@ int TsFileIOReader::load_device_index_entry( if (device_id_comparable == nullptr) { return E_INVALID_DATA_POINT; } - auto index_node = tsfile_meta_.table_metadata_index_node_map_ - [device_id_comparable->device_id_->get_table_name()]; - assert(tsfile_meta_.table_metadata_index_node_map_.find( - device_id_comparable->device_id_->get_table_name()) != - tsfile_meta_.table_metadata_index_node_map_.end()); - assert(index_node != nullptr); + std::string table_name = device_id_comparable->device_id_->get_table_name(); + auto it = tsfile_meta_.table_metadata_index_node_map_.find(table_name); + if (it == tsfile_meta_.table_metadata_index_node_map_.end()) { + return E_DEVICE_NOT_EXIST; + } + auto index_node = it->second; + if (index_node == nullptr) { + return E_DEVICE_NOT_EXIST; + } if (index_node->node_type_ == LEAF_DEVICE) { // FIXME ret = index_node->binary_search_children( diff --git a/cpp/src/reader/aligned_chunk_reader.cc b/cpp/src/reader/aligned_chunk_reader.cc index 14250e7f8..51da63e84 100644 --- a/cpp/src/reader/aligned_chunk_reader.cc +++ b/cpp/src/reader/aligned_chunk_reader.cc @@ -19,6 +19,8 @@ #include "aligned_chunk_reader.h" +#include + #include "compress/compressor_factory.h" #include "encoding/decoder_factory.h" @@ -67,6 +69,16 @@ void AlignedChunkReader::reset() { } void AlignedChunkReader::destroy() { + if (time_uncompressed_buf_ != nullptr && time_compressor_ != nullptr) { + time_compressor_->after_uncompress(time_uncompressed_buf_); + time_uncompressed_buf_ = nullptr; + } + if (value_uncompressed_buf_ != nullptr && value_compressor_ != nullptr) { + value_compressor_->after_uncompress(value_uncompressed_buf_); + value_uncompressed_buf_ = nullptr; + } + value_page_col_notnull_bitmap_.clear(); + value_page_col_notnull_bitmap_.shrink_to_fit(); if (time_decoder_ != nullptr) { time_decoder_->~Decoder(); DecoderFactory::free(time_decoder_); @@ -711,4 +723,92 @@ int AlignedChunkReader::STRING_DECODE_TYPED_TV_INTO_TSBLOCK( return ret; } +bool AlignedChunkReader::should_skip_page_by_time(int64_t min_time_hint) { + if (min_time_hint == std::numeric_limits::min()) { + return false; + } + // Use time page statistic for time-based skipping. + if (cur_time_page_header_.statistic_ != nullptr) { + return cur_time_page_header_.statistic_->end_time_ < min_time_hint; + } + if (cur_value_page_header_.statistic_ != nullptr) { + return cur_value_page_header_.statistic_->end_time_ < min_time_hint; + } + return false; +} + +bool AlignedChunkReader::should_skip_page_by_offset(int& row_offset) { + if (row_offset <= 0) { + return false; + } + // Use time page statistic for count. + Statistic* stat = cur_time_page_header_.statistic_; + if (stat == nullptr) { + stat = cur_value_page_header_.statistic_; + } + if (stat == nullptr || stat->count_ == 0) { + return false; + } + int32_t count = stat->count_; + if (row_offset >= count) { + row_offset -= count; + return true; + } + return false; +} + +int AlignedChunkReader::get_next_page(TsBlock* ret_tsblock, + Filter* oneshoot_filter, PageArena& pa, + int64_t min_time_hint, int& row_offset, + int& row_limit) { + int ret = E_OK; + Filter* filter = + (oneshoot_filter != nullptr ? oneshoot_filter : time_filter_); + + if (row_limit == 0) { + return E_NO_MORE_DATA; + } + + if (prev_time_page_not_finish() && prev_value_page_not_finish()) { + ret = decode_time_value_buf_into_tsblock(ret_tsblock, oneshoot_filter, + &pa); + return ret; + } + if (!prev_time_page_not_finish() && !prev_value_page_not_finish()) { + while (IS_SUCC(ret)) { + if (RET_FAIL(get_cur_page_header( + time_chunk_meta_, time_in_stream_, cur_time_page_header_, + time_chunk_visit_offset_, time_chunk_header_))) { + } else if (RET_FAIL(get_cur_page_header( + value_chunk_meta_, value_in_stream_, + cur_value_page_header_, value_chunk_visit_offset_, + value_chunk_header_))) { + } else if (!cur_page_statisify_filter(filter)) { + if (RET_FAIL(skip_cur_page())) { + } + } else if (should_skip_page_by_time(min_time_hint)) { + if (RET_FAIL(skip_cur_page())) { + } + } else if (should_skip_page_by_offset(row_offset)) { + if (RET_FAIL(skip_cur_page())) { + } + } else { + break; + } + if (!has_more_data()) { + ret = E_NO_MORE_DATA; + break; + } + } + if (IS_SUCC(ret)) { + ret = decode_cur_time_page_data() || decode_cur_value_page_data(); + } + } + if (IS_SUCC(ret)) { + ret = decode_time_value_buf_into_tsblock(ret_tsblock, oneshoot_filter, + &pa); + } + return ret; +} + } // end namespace storage \ No newline at end of file diff --git a/cpp/src/reader/aligned_chunk_reader.h b/cpp/src/reader/aligned_chunk_reader.h index 12e0b9289..91281215e 100644 --- a/cpp/src/reader/aligned_chunk_reader.h +++ b/cpp/src/reader/aligned_chunk_reader.h @@ -79,7 +79,13 @@ class AlignedChunkReader : public IChunkReader { int get_next_page(common::TsBlock* tsblock, Filter* oneshoot_filter, common::PageArena& pa) override; + int get_next_page(common::TsBlock* tsblock, Filter* oneshoot_filter, + common::PageArena& pa, int64_t min_time_hint, + int& row_offset, int& row_limit) override; + private: + bool should_skip_page_by_time(int64_t min_time_hint); + bool should_skip_page_by_offset(int& row_offset); FORCE_INLINE bool chunk_has_only_one_page( const ChunkHeader& chunk_header) const { return (chunk_header.chunk_type_ & ONLY_ONE_PAGE_CHUNK_HEADER_MARKER) == diff --git a/cpp/src/reader/block/device_ordered_tsblock_reader.cc b/cpp/src/reader/block/device_ordered_tsblock_reader.cc index 6fb540954..5b3834aed 100644 --- a/cpp/src/reader/block/device_ordered_tsblock_reader.cc +++ b/cpp/src/reader/block/device_ordered_tsblock_reader.cc @@ -23,15 +23,34 @@ namespace storage { int DeviceOrderedTsBlockReader::has_next(bool& has_next) { int ret = common::E_OK; + + if (remaining_limit_ == 0) { + has_next = false; + return common::E_OK; + } + if (current_reader_ != nullptr && IS_SUCC(current_reader_->has_next(has_next)) && has_next) { return common::E_OK; } if (current_reader_ != nullptr) { + remaining_offset_ = current_reader_->get_remaining_offset(); + remaining_limit_ = current_reader_->get_remaining_limit(); delete current_reader_; current_reader_ = nullptr; } - while (device_task_iterator_->has_next()) { + if (remaining_limit_ == 0) { + has_next = false; + return common::E_OK; + } + while (true) { + if (remaining_limit_ == 0) { + has_next = false; + return common::E_OK; + } + if (!device_task_iterator_->has_next()) { + break; + } DeviceQueryTask* task = nullptr; if (IS_FAIL(device_task_iterator_->next(task))) { return ret; @@ -47,7 +66,8 @@ int DeviceOrderedTsBlockReader::has_next(bool& has_next) { return common::E_OOM; } if (RET_FAIL(current_reader_->init(task, block_size_, time_filter_, - field_filter_))) { + field_filter_, remaining_offset_, + remaining_limit_))) { delete current_reader_; current_reader_ = nullptr; return ret; @@ -56,13 +76,12 @@ int DeviceOrderedTsBlockReader::has_next(bool& has_next) { if (RET_FAIL(current_reader_->has_next(has_next))) { return ret; } - // If current device has data, just return. if (has_next) { return ret; } - // If current device does not have data, get next device. - // Free current device reader. + remaining_offset_ = current_reader_->get_remaining_offset(); + remaining_limit_ = current_reader_->get_remaining_limit(); if (current_reader_) { delete current_reader_; current_reader_ = nullptr; @@ -86,6 +105,9 @@ void DeviceOrderedTsBlockReader::close() { delete current_reader_; current_reader_ = nullptr; } + if (device_task_iterator_) { + device_task_iterator_->flush_remaining_device_meta_cache(); + } if (time_filter_ != nullptr) { delete time_filter_; time_filter_ = nullptr; diff --git a/cpp/src/reader/block/device_ordered_tsblock_reader.h b/cpp/src/reader/block/device_ordered_tsblock_reader.h index b00d751a1..8018725c9 100644 --- a/cpp/src/reader/block/device_ordered_tsblock_reader.h +++ b/cpp/src/reader/block/device_ordered_tsblock_reader.h @@ -33,13 +33,15 @@ class DeviceOrderedTsBlockReader : public TsBlockReader { std::unique_ptr device_task_iterator, IMetadataQuerier* metadata_querier, int32_t block_size, TsFileIOReader* tsfile_io_reader, Filter* time_filter, - Filter* field_filter) + Filter* field_filter, int row_offset = 0, int row_limit = -1) : device_task_iterator_(std::move(device_task_iterator)), metadata_querier_(metadata_querier), block_size_(block_size), tsfile_io_reader_(tsfile_io_reader), time_filter_(time_filter), - field_filter_(field_filter) {} + field_filter_(field_filter), + remaining_offset_(row_offset), + remaining_limit_(row_limit) {} ~DeviceOrderedTsBlockReader() override { close(); } int has_next(bool& has_next) override; @@ -54,6 +56,8 @@ class DeviceOrderedTsBlockReader : public TsBlockReader { TsFileIOReader* tsfile_io_reader_; Filter* time_filter_ = nullptr; Filter* field_filter_ = nullptr; + int remaining_offset_ = 0; + int remaining_limit_ = -1; }; } // namespace storage diff --git a/cpp/src/reader/block/single_device_tsblock_reader.cc b/cpp/src/reader/block/single_device_tsblock_reader.cc index 836ab6956..9b21e41b1 100644 --- a/cpp/src/reader/block/single_device_tsblock_reader.cc +++ b/cpp/src/reader/block/single_device_tsblock_reader.cc @@ -34,6 +34,88 @@ SingleDeviceTsBlockReader::SingleDeviceTsBlockReader( int SingleDeviceTsBlockReader::init(DeviceQueryTask* device_query_task, uint32_t block_size, Filter* time_filter, Filter* field_filter) { + remaining_offset_ = 0; + remaining_limit_ = -1; + dense_row_count_ = -1; + return init_internal(device_query_task, block_size, time_filter, + field_filter); +} + +int SingleDeviceTsBlockReader::init(DeviceQueryTask* device_query_task, + uint32_t block_size, Filter* time_filter, + Filter* field_filter, int row_offset, + int row_limit) { + remaining_offset_ = row_offset; + remaining_limit_ = row_limit; + dense_row_count_ = -1; + return init_internal(device_query_task, block_size, time_filter, + field_filter); +} + +int32_t SingleDeviceTsBlockReader::compute_dense_row_count( + const std::vector& ts_indexes) { + int64_t reference_time_count = -1; + for (const auto* ts_index : ts_indexes) { + if (ts_index == nullptr) { + continue; + } + + int64_t time_count = 0; + int64_t value_count = 0; + + if (ts_index->get_data_type() == common::VECTOR) { + auto* time_list = ts_index->get_time_chunk_meta_list(); + auto* value_list = ts_index->get_value_chunk_meta_list(); + if (time_list == nullptr || value_list == nullptr) { + return -1; + } + + for (auto it = time_list->begin(); it != time_list->end(); it++) { + if (it.get()->statistic_) { + time_count += it.get()->statistic_->count_; + } + } + for (auto it = value_list->begin(); it != value_list->end(); it++) { + if (it.get()->statistic_) { + value_count += it.get()->statistic_->count_; + } + } + } else { + auto* list = ts_index->get_chunk_meta_list(); + if (list == nullptr) { + return -1; + } + for (auto it = list->begin(); it != list->end(); it++) { + if (it.get()->statistic_) { + time_count += it.get()->statistic_->count_; + } + } + value_count = time_count; + } + + if (time_count == 0 || value_count == 0) { + return -1; + } + if (reference_time_count < 0) { + reference_time_count = time_count; + } else if (time_count != reference_time_count) { + return -1; + } + if (value_count != reference_time_count) { + return -1; + } + } + + if (reference_time_count < 0) { + return -1; + } + return static_cast(reference_time_count); +} + +int SingleDeviceTsBlockReader::init_internal(DeviceQueryTask* device_query_task, + uint32_t block_size, + Filter* time_filter, + Filter* field_filter) { int ret = common::E_OK; pa_.init(512, common::AllocModID::MOD_TSFILE_READER); tuple_desc_.reset(); @@ -67,11 +149,34 @@ int SingleDeviceTsBlockReader::init(DeviceQueryTask* device_query_task, time_series_indexs, pa_))) { return ret; } + + dense_row_count_ = compute_dense_row_count(time_series_indexs); + + if (dense_row_count_ >= 0 && remaining_offset_ >= dense_row_count_) { + remaining_offset_ -= dense_row_count_; + delete current_block_; + current_block_ = nullptr; + return common::E_OK; + } + + int ssi_offset = 0; + int ssi_limit = -1; + if (dense_row_count_ >= 0) { + ssi_offset = remaining_offset_; + ssi_limit = remaining_limit_; + } + for (const auto& time_series_index : time_series_indexs) { - construct_column_context(time_series_index, time_filter); + construct_column_context(time_series_index, time_filter, ssi_offset, + ssi_limit); + } + + if (dense_row_count_ >= 0 && !field_column_contexts_.empty()) { + auto* first_ctx = field_column_contexts_.begin()->second; + remaining_offset_ = first_ctx->get_ssi_row_offset(); + remaining_limit_ = first_ctx->get_ssi_row_limit(); } - // There is no data in this single device tsblock reader. if (field_column_contexts_.empty()) { delete current_block_; current_block_ = nullptr; @@ -102,17 +207,28 @@ int SingleDeviceTsBlockReader::has_next(bool& has_next) { return common::E_OK; } + if (remaining_limit_ == 0) { + has_next = false; + return common::E_OK; + } + for (auto col_appender : col_appenders_) { col_appender->reset(); } current_block_->reset(); + uint32_t effective_block_size = block_size_; + if (remaining_limit_ > 0) { + effective_block_size = + std::min(block_size_, static_cast(remaining_limit_)); + } + bool next_time_set = false; next_time_ = -1; std::vector min_time_columns; - while (current_block_->get_row_count() < block_size_) { + while (current_block_->get_row_count() < effective_block_size) { for (auto& column_context : field_column_contexts_) { int64_t time; if (IS_FAIL(column_context.second->get_current_time(time))) { @@ -127,6 +243,27 @@ int SingleDeviceTsBlockReader::has_next(bool& has_next) { min_time_columns.push_back(column_context.second); } } + + if (!next_time_set) { + break; + } + + if (remaining_offset_ > 0) { + for (auto* col_ctx : min_time_columns) { + if (IS_FAIL(advance_column(col_ctx))) { + break; + } + } + remaining_offset_--; + min_time_columns.clear(); + next_time_set = false; + next_time_ = -1; + if (field_column_contexts_.empty()) { + break; + } + continue; + } + if (IS_FAIL(fill_measurements(min_time_columns))) { has_next = false; return common::E_OK; @@ -139,6 +276,9 @@ int SingleDeviceTsBlockReader::has_next(bool& has_next) { break; } } + if (remaining_limit_ > 0 && current_block_->get_row_count() > 0) { + remaining_limit_ -= current_block_->get_row_count(); + } int ret = common::E_OK; if (current_block_->get_row_count() > 0) { if (RET_FAIL(fill_ids())) { @@ -150,7 +290,7 @@ int SingleDeviceTsBlockReader::has_next(bool& has_next) { return ret; } has_next = false; - return ret; // return value is not used + return ret; } int SingleDeviceTsBlockReader::fill_measurements( @@ -283,7 +423,8 @@ void SingleDeviceTsBlockReader::close() { } int SingleDeviceTsBlockReader::construct_column_context( - const ITimeseriesIndex* time_series_index, Filter* time_filter) { + const ITimeseriesIndex* time_series_index, Filter* time_filter, + int ssi_offset, int ssi_limit) { int ret = common::E_OK; if (time_series_index == nullptr || (time_series_index->get_data_type() != common::TSDataType::VECTOR && @@ -294,17 +435,13 @@ int SingleDeviceTsBlockReader::construct_column_context( if (aligned_time_series_index == nullptr) { assert(false); } - // Todo: when multi value index is supported in aligned time series - // index, we need to change the column context to - // VectorMeasurementColumnContext SingleMeasurementColumnContext* column_context = new SingleMeasurementColumnContext(tsfile_io_reader_); - // May no more data. just return to avoid null pointer. if (RET_FAIL(column_context->init( device_query_task_, time_series_index, time_filter, device_query_task_->get_column_mapping()->get_column_pos( time_series_index->get_measurement_name().to_std_string()), - pa_))) { + pa_, ssi_offset, ssi_limit))) { delete column_context; return ret; } @@ -318,7 +455,7 @@ int SingleDeviceTsBlockReader::construct_column_context( device_query_task_, time_series_index, time_filter, device_query_task_->get_column_mapping()->get_column_pos( time_series_index->get_measurement_name().to_std_string()), - pa_))) { + pa_, ssi_offset, ssi_limit))) { delete column_context; return ret; } @@ -333,7 +470,8 @@ int SingleDeviceTsBlockReader::construct_column_context( int SingleMeasurementColumnContext::init( DeviceQueryTask* device_query_task, const ITimeseriesIndex* time_series_index, Filter* time_filter, - const std::vector& pos_in_result, common::PageArena& pa) { + const std::vector& pos_in_result, common::PageArena& pa, + int ssi_offset, int ssi_limit) { int ret = common::E_OK; pos_in_result_ = pos_in_result; column_name_ = time_series_index->get_measurement_name().to_std_string(); @@ -341,7 +479,10 @@ int SingleMeasurementColumnContext::init( device_query_task->get_device_id(), time_series_index->get_measurement_name().to_std_string(), ssi_, pa, time_filter))) { - } else if (RET_FAIL(get_next_tsblock(true))) { + } else { + ssi_->set_row_range(ssi_offset, ssi_limit); + if (RET_FAIL(get_next_tsblock(true))) { + } } return ret; } diff --git a/cpp/src/reader/block/single_device_tsblock_reader.h b/cpp/src/reader/block/single_device_tsblock_reader.h index 46ac8c417..07d16860c 100644 --- a/cpp/src/reader/block/single_device_tsblock_reader.h +++ b/cpp/src/reader/block/single_device_tsblock_reader.h @@ -44,15 +44,27 @@ class SingleDeviceTsBlockReader : public TsBlockReader { int next(common::TsBlock*& ret_block) override; int init(DeviceQueryTask* device_query_task, uint32_t block_size, Filter* time_filter, Filter* field_filter); + int init(DeviceQueryTask* device_query_task, uint32_t block_size, + Filter* time_filter, Filter* field_filter, int row_offset, + int row_limit); void close() override; + int get_remaining_offset() const { return remaining_offset_; } + int get_remaining_limit() const { return remaining_limit_; } + int32_t get_dense_row_count() const { return dense_row_count_; } + private: + int init_internal(DeviceQueryTask* device_query_task, uint32_t block_size, + Filter* time_filter, Filter* field_filter); int construct_column_context(const ITimeseriesIndex* time_series_index, - Filter* time_filter); + Filter* time_filter, int ssi_offset, + int ssi_limit); int fill_measurements( std::vector& column_contexts); int fill_ids(); int advance_column(MeasurementColumnContext* column_context); + int32_t compute_dense_row_count( + const std::vector& ts_indexes); DeviceQueryTask* device_query_task_; Filter* field_filter_; @@ -68,6 +80,9 @@ class SingleDeviceTsBlockReader : public TsBlockReader { int64_t time_column_index_ = 0; TsFileIOReader* tsfile_io_reader_; common::PageArena pa_; + int remaining_offset_ = 0; + int remaining_limit_ = -1; + int32_t dense_row_count_ = -1; }; class MeasurementColumnContext { @@ -91,6 +106,16 @@ class MeasurementColumnContext { virtual int move_iter() = 0; + virtual void set_ssi_row_range(int offset, int limit) { + if (ssi_) ssi_->set_row_range(offset, limit); + } + virtual int get_ssi_row_offset() const { + return ssi_ ? ssi_->get_row_offset() : 0; + } + virtual int get_ssi_row_limit() const { + return ssi_ ? ssi_->get_row_limit() : -1; + } + protected: TsFileIOReader* tsfile_io_reader_; TsFileSeriesScanIterator* ssi_ = nullptr; @@ -124,7 +149,8 @@ class SingleMeasurementColumnContext final : public MeasurementColumnContext { column_context_map) override; int init(DeviceQueryTask* device_query_task, const ITimeseriesIndex* time_series_index, Filter* time_filter, - const std::vector& pos_in_result, common::PageArena& pa); + const std::vector& pos_in_result, common::PageArena& pa, + int ssi_offset = 0, int ssi_limit = -1); int get_next_tsblock(bool alloc_mem) override; int get_current_time(int64_t& time) override; int get_current_value(char*& value, uint32_t& len) override; diff --git a/cpp/src/reader/chunk_reader.cc b/cpp/src/reader/chunk_reader.cc index 1b3160b72..b150f7851 100644 --- a/cpp/src/reader/chunk_reader.cc +++ b/cpp/src/reader/chunk_reader.cc @@ -19,6 +19,8 @@ #include "chunk_reader.h" +#include + #include "compress/compressor_factory.h" #include "encoding/decoder_factory.h" @@ -45,6 +47,13 @@ void ChunkReader::reset() { chunk_header_.reset(); cur_page_header_.reset(); + if (uncompressed_buf_ != nullptr && compressor_ != nullptr) { + compressor_->after_uncompress(uncompressed_buf_); + uncompressed_buf_ = nullptr; + } + time_in_.reset(); + value_in_.reset(); + char* file_data_buf = in_stream_.get_wrapped_buf(); if (file_data_buf != nullptr) { mem_free(file_data_buf); @@ -55,6 +64,13 @@ void ChunkReader::reset() { } void ChunkReader::destroy() { + if (uncompressed_buf_ != nullptr && compressor_ != nullptr) { + compressor_->after_uncompress(uncompressed_buf_); + uncompressed_buf_ = nullptr; + } + time_in_.reset(); + value_in_.reset(); + if (time_decoder_ != nullptr) { time_decoder_->~Decoder(); DecoderFactory::free(time_decoder_); @@ -490,4 +506,82 @@ int ChunkReader::decode_tv_buf_into_tsblock_by_datatype(ByteStream& time_in, return ret; } +bool ChunkReader::should_skip_page_by_time(int64_t min_time_hint) { + if (min_time_hint == std::numeric_limits::min()) { + return false; + } + if (cur_page_header_.statistic_ == nullptr) { + return false; + } + return cur_page_header_.statistic_->end_time_ < min_time_hint; +} + +bool ChunkReader::should_skip_page_by_offset(int& row_offset) { + if (row_offset <= 0) { + return false; + } + if (cur_page_header_.statistic_ == nullptr || + cur_page_header_.statistic_->count_ == 0) { + return false; + } + int32_t count = cur_page_header_.statistic_->count_; + if (row_offset >= count) { + row_offset -= count; + return true; + } + return false; +} + +int ChunkReader::get_next_page(TsBlock* ret_tsblock, Filter* oneshoot_filter, + PageArena& pa, int64_t min_time_hint, + int& row_offset, int& row_limit) { + int ret = E_OK; + Filter* filter = + (oneshoot_filter != nullptr ? oneshoot_filter : time_filter_); + + if (row_limit == 0) { + return E_NO_MORE_DATA; + } + + if (prev_page_not_finish()) { + ret = decode_tv_buf_into_tsblock_by_datatype(time_in_, value_in_, + ret_tsblock, filter, &pa); + if (ret == E_OVERFLOW) { + ret = E_OK; + } else { + if (uncompressed_buf_ != nullptr) { + compressor_->after_uncompress(uncompressed_buf_); + uncompressed_buf_ = nullptr; + } + time_in_.reset(); + value_in_.reset(); + } + return ret; + } + + while (IS_SUCC(ret)) { + if (!has_more_data()) { + return E_NO_MORE_DATA; + } + if (RET_FAIL(get_cur_page_header())) { + } else if (!cur_page_statisify_filter(filter)) { + if (RET_FAIL(skip_cur_page())) { + } + } else if (should_skip_page_by_time(min_time_hint)) { + if (RET_FAIL(skip_cur_page())) { + } + } else if (should_skip_page_by_offset(row_offset)) { + if (RET_FAIL(skip_cur_page())) { + } + } else { + break; + } + } + + if (IS_SUCC(ret)) { + ret = decode_cur_page_data(ret_tsblock, filter, pa); + } + return ret; +} + } // end namespace storage \ No newline at end of file diff --git a/cpp/src/reader/chunk_reader.h b/cpp/src/reader/chunk_reader.h index 106b8648b..3acd9c3cf 100644 --- a/cpp/src/reader/chunk_reader.h +++ b/cpp/src/reader/chunk_reader.h @@ -70,7 +70,13 @@ class ChunkReader : public IChunkReader { int get_next_page(common::TsBlock* tsblock, Filter* oneshoot_filter, common::PageArena& pa) override; + int get_next_page(common::TsBlock* tsblock, Filter* oneshoot_filter, + common::PageArena& pa, int64_t min_time_hint, + int& row_offset, int& row_limit) override; + private: + bool should_skip_page_by_time(int64_t min_time_hint); + bool should_skip_page_by_offset(int& row_offset); FORCE_INLINE bool chunk_has_only_one_page() const { return (chunk_header_.chunk_type_ & ONLY_ONE_PAGE_CHUNK_HEADER_MARKER) == diff --git a/cpp/src/reader/device_meta_iterator.cc b/cpp/src/reader/device_meta_iterator.cc index b1fc939fc..a41a29e6c 100644 --- a/cpp/src/reader/device_meta_iterator.cc +++ b/cpp/src/reader/device_meta_iterator.cc @@ -22,6 +22,22 @@ #include "filter/tag_filter.h" namespace storage { + +void DeviceMetaIterator::destroy_remaining_cached_devices() { + while (!result_cache_.empty()) { + auto p = result_cache_.front(); + result_cache_.pop(); + if (p.second != nullptr) { + p.second->~MetaIndexNode(); + } + } +} + +DeviceMetaIterator::~DeviceMetaIterator() { + destroy_remaining_cached_devices(); + pa_.destroy(); +} + bool DeviceMetaIterator::has_next() { if (!result_cache_.empty()) { return true; diff --git a/cpp/src/reader/device_meta_iterator.h b/cpp/src/reader/device_meta_iterator.h index e79c35a05..704098b4d 100644 --- a/cpp/src/reader/device_meta_iterator.h +++ b/cpp/src/reader/device_meta_iterator.h @@ -50,7 +50,9 @@ class DeviceMetaIterator { pa_.init(512, common::MOD_DEVICE_META_ITER); } - ~DeviceMetaIterator() { pa_.destroy(); } + ~DeviceMetaIterator(); + + void destroy_remaining_cached_devices(); bool has_next(); diff --git a/cpp/src/reader/ichunk_reader.h b/cpp/src/reader/ichunk_reader.h index ffb841fab..32985cfd2 100644 --- a/cpp/src/reader/ichunk_reader.h +++ b/cpp/src/reader/ichunk_reader.h @@ -52,6 +52,12 @@ class IChunkReader { return common::E_OK; } + virtual int get_next_page(common::TsBlock* tsblock, Filter* oneshoot_filter, + common::PageArena& pa, int64_t min_time_hint, + int& row_offset, int& row_limit) { + return get_next_page(tsblock, oneshoot_filter, pa); + } + virtual ChunkHeader& get_chunk_header() { return chunk_header_; } protected: diff --git a/cpp/src/reader/qds_without_timegenerator.cc b/cpp/src/reader/qds_without_timegenerator.cc index 90c782131..d8129ce0e 100644 --- a/cpp/src/reader/qds_without_timegenerator.cc +++ b/cpp/src/reader/qds_without_timegenerator.cc @@ -27,6 +27,22 @@ namespace storage { int QDSWithoutTimeGenerator::init(TsFileIOReader* io_reader, QueryExpression* qe) { + remaining_offset_ = 0; + remaining_limit_ = -1; + is_single_path_ = false; + return init_internal(io_reader, qe); +} + +int QDSWithoutTimeGenerator::init(TsFileIOReader* io_reader, + QueryExpression* qe, int offset, int limit) { + remaining_offset_ = offset; + remaining_limit_ = limit; + is_single_path_ = false; + return init_internal(io_reader, qe); +} + +int QDSWithoutTimeGenerator::init_internal(TsFileIOReader* io_reader, + QueryExpression* qe) { int ret = E_OK; // cppcheck-suppress unreadVariable pa_.reset(); pa_.init(512, common::MOD_TSFILE_READER); @@ -54,6 +70,9 @@ int QDSWithoutTimeGenerator::init(TsFileIOReader* io_reader, return ret; } else { index_lookup_.insert({paths[i].measurement_, i + 1}); + if (paths[i].full_path_ != paths[i].measurement_) { + index_lookup_.insert({paths[i].full_path_, i + 1}); + } ssi_vec_.push_back(ssi); valid_paths.push_back(paths[i]); column_names.push_back(paths[i].full_path_); @@ -61,6 +80,13 @@ int QDSWithoutTimeGenerator::init(TsFileIOReader* io_reader, } size_t path_count = valid_paths.size(); + is_single_path_ = (path_count == 1); + // Only push offset/limit to SSI for single-path; multi-path applies at + // merge. + for (size_t i = 0; i < path_count; i++) { + ssi_vec_[i]->set_row_range(is_single_path_ ? remaining_offset_ : 0, + is_single_path_ ? remaining_limit_ : -1); + } row_record_ = new RowRecord(path_count + 1); tsblocks_.resize(path_count); time_iters_.resize(path_count); @@ -72,6 +98,12 @@ int QDSWithoutTimeGenerator::init(TsFileIOReader* io_reader, ? value_iters_[i]->get_data_type() : TSDataType::NULL_TYPE); } + // Single-path: SSI may have consumed offset/limit by skipping chunks/pages + // during first get_next_tsblock(); sync so QDS does not double-apply. + if (is_single_path_) { + remaining_offset_ = ssi_vec_[0]->get_row_offset(); + remaining_limit_ = ssi_vec_[0]->get_row_limit(); + } result_set_metadata_ = std::make_shared(column_names, data_types); return E_OK; // ignore invalid timeseries @@ -111,37 +143,116 @@ void QDSWithoutTimeGenerator::close() { } int QDSWithoutTimeGenerator::next(bool& has_next) { - row_record_->reset(); - if (heap_time_.size() == 0) { - has_next = false; - return E_OK; + // For single path, apply offset/limit at row level. + if (is_single_path_) { + while (true) { + row_record_->reset(); + if (heap_time_.size() == 0) { + has_next = false; + return E_OK; + } + if (remaining_limit_ == 0) { + has_next = false; + return E_OK; + } + int64_t time = heap_time_.begin()->first; + bool skip_row = remaining_offset_ > 0; + if (skip_row) { + remaining_offset_--; + } else { + row_record_->set_timestamp(time); + row_record_->get_field(0)->set_value(INT64, &time, + get_len(INT64), pa_); + } + + uint32_t len = 0; + uint32_t idx = heap_time_.begin()->second; + auto val_datatype = value_iters_[idx]->get_data_type(); + void* val_ptr = value_iters_[idx]->read(&len); + if (!skip_row) { + row_record_->get_field(idx + 1)->set_value(val_datatype, + val_ptr, len, pa_); + } + value_iters_[idx]->next(); + + heap_time_.erase(heap_time_.begin()); + + if (!time_iters_[idx]->end()) { + int64_t timev = *(int64_t*)(time_iters_[idx]->read(&len)); + heap_time_.insert(std::make_pair(timev, idx)); + time_iters_[idx]->next(); + } else { + get_next_tsblock(idx, false); + } + + if (skip_row) { + continue; + } + + // Emit this row and decrement limit. + if (remaining_limit_ > 0) { + remaining_limit_--; + } + has_next = true; + return E_OK; + } } - int64_t time = heap_time_.begin()->first; - row_record_->set_timestamp(time); - row_record_->get_field(0)->set_value(INT64, &time, get_len(INT64), pa_); - uint32_t count = heap_time_.count(time); - std::multimap::iterator iter = heap_time_.find(time); - for (uint32_t i = 0; i < count; ++i) { - uint32_t len = 0; - auto val_datatype = value_iters_[iter->second]->get_data_type(); - void* val_ptr = value_iters_[iter->second]->read(&len); - row_record_->get_field(iter->second + 1) - ->set_value(val_datatype, val_ptr, len, pa_); - value_iters_[iter->second]->next(); - if (!time_iters_[iter->second]->end()) { - int64_t timev = *(int64_t*)(time_iters_[iter->second]->read(&len)); - heap_time_.insert(std::make_pair(timev, iter->second)); - time_iters_[iter->second]->next(); - } else { - get_next_tsblock(iter->second, false); + // Multi-path: apply offset/limit at merge layer. + while (true) { + row_record_->reset(); + if (heap_time_.size() == 0) { + has_next = false; + return E_OK; + } + // Check limit (limit == 0 means no more rows needed). + if (remaining_limit_ == 0) { + has_next = false; + return E_OK; + } + + int64_t time = heap_time_.begin()->first; + row_record_->set_timestamp(time); + row_record_->get_field(0)->set_value(INT64, &time, get_len(INT64), pa_); + + uint32_t count = heap_time_.count(time); + std::multimap::iterator iter = heap_time_.find(time); + for (uint32_t i = 0; i < count; ++i) { + uint32_t len = 0; + auto val_datatype = value_iters_[iter->second]->get_data_type(); + void* val_ptr = value_iters_[iter->second]->read(&len); + row_record_->get_field(iter->second + 1) + ->set_value(val_datatype, val_ptr, len, pa_); + value_iters_[iter->second]->next(); + if (!time_iters_[iter->second]->end()) { + int64_t timev = + *(int64_t*)(time_iters_[iter->second]->read(&len)); + heap_time_.insert(std::make_pair(timev, iter->second)); + time_iters_[iter->second]->next(); + } else { + // Pass merge_cursor (current time) as min_time_hint + // to help SSI skip chunks/pages that are entirely before + // the current merge position. + get_next_tsblock_with_hint(iter->second, false, time); + } + std::multimap::iterator cur = iter; + iter++; // cppcheck-suppress postfixOperator + heap_time_.erase(cur); + } + + // Apply offset: skip this row. + if (remaining_offset_ > 0) { + remaining_offset_--; + continue; + } + + // Emit this row and decrement limit. + if (remaining_limit_ > 0) { + remaining_limit_--; } - std::multimap::iterator cur = iter; - iter++; // cppcheck-suppress postfixOperator - heap_time_.erase(cur); + has_next = true; + return E_OK; } - has_next = true; - return E_OK; } bool QDSWithoutTimeGenerator::is_null(const std::string& column_name) { @@ -181,7 +292,45 @@ int QDSWithoutTimeGenerator::get_next_tsblock(uint32_t index, bool alloc_mem) { time_iters_[index]->next(); heap_time_.insert(std::pair(time, index)); value_iters_[index] = new ColIterator(1, tsblocks_[index]); - } else { + } else if (ret == E_NO_MORE_DATA) { + if (time_iters_[index]) { + delete time_iters_[index]; + time_iters_[index] = nullptr; + } + if (value_iters_[index]) { + delete value_iters_[index]; + value_iters_[index] = nullptr; + } + if (tsblocks_[index]) { + ssi_vec_[index]->destroy(); + tsblocks_[index] = nullptr; + } + ret = E_OK; + } + return ret; +} + +int QDSWithoutTimeGenerator::get_next_tsblock_with_hint(uint32_t index, + bool alloc_mem, + int64_t min_time_hint) { + if (tsblocks_[index] != nullptr) { + delete time_iters_[index]; + time_iters_[index] = nullptr; + delete value_iters_[index]; + value_iters_[index] = nullptr; + tsblocks_[index]->reset(); + } + + int ret = ssi_vec_[index]->get_next(tsblocks_[index], alloc_mem, nullptr, + min_time_hint); + if (IS_SUCC(ret)) { + time_iters_[index] = new ColIterator(0, tsblocks_[index]); + uint32_t len = 0; + int64_t time = *(int64_t*)(time_iters_[index]->read(&len)); + time_iters_[index]->next(); + heap_time_.insert(std::pair(time, index)); + value_iters_[index] = new ColIterator(1, tsblocks_[index]); + } else if (ret == E_NO_MORE_DATA) { if (time_iters_[index]) { delete time_iters_[index]; time_iters_[index] = nullptr; @@ -194,7 +343,7 @@ int QDSWithoutTimeGenerator::get_next_tsblock(uint32_t index, bool alloc_mem) { ssi_vec_[index]->destroy(); tsblocks_[index] = nullptr; } - ret = E_OK; // TODO + ret = E_OK; } return ret; } diff --git a/cpp/src/reader/qds_without_timegenerator.h b/cpp/src/reader/qds_without_timegenerator.h index 0619fa673..1d929e575 100644 --- a/cpp/src/reader/qds_without_timegenerator.h +++ b/cpp/src/reader/qds_without_timegenerator.h @@ -39,9 +39,14 @@ class QDSWithoutTimeGenerator : public ResultSet { tsblocks_(), time_iters_(), value_iters_(), - heap_time_() {} + heap_time_(), + remaining_offset_(0), + remaining_limit_(-1), + is_single_path_(false) {} ~QDSWithoutTimeGenerator() { close(); } int init(TsFileIOReader* io_reader, QueryExpression* qe); + int init(TsFileIOReader* io_reader, QueryExpression* qe, int offset, + int limit); void close(); int next(bool& has_next); bool is_null(const std::string& column_name); @@ -50,7 +55,10 @@ class QDSWithoutTimeGenerator : public ResultSet { std::shared_ptr get_metadata(); private: + int init_internal(TsFileIOReader* io_reader, QueryExpression* qe); int get_next_tsblock(uint32_t index, bool alloc_mem); + int get_next_tsblock_with_hint(uint32_t index, bool alloc_mem, + int64_t min_time_hint); private: std::shared_ptr result_set_metadata_; @@ -62,6 +70,9 @@ class QDSWithoutTimeGenerator : public ResultSet { std::vector value_iters_; std::multimap heap_time_; // key-->time, value-->path_index + int remaining_offset_; + int remaining_limit_; + bool is_single_path_; }; } // namespace storage diff --git a/cpp/src/reader/table_query_executor.cc b/cpp/src/reader/table_query_executor.cc index 30fce4a75..c23ffc0f4 100644 --- a/cpp/src/reader/table_query_executor.cc +++ b/cpp/src/reader/table_query_executor.cc @@ -24,7 +24,7 @@ namespace storage { int TableQueryExecutor::query(const std::string& table_name, const std::vector& columns, - Filter* time_filter, Filter* id_filter, + Filter* time_filter, Filter* tag_filter, Filter* field_filter, ResultSet*& ret_qds) { int ret = common::E_OK; TsFileMeta* file_metadata = nullptr; @@ -41,6 +41,7 @@ int TableQueryExecutor::query(const std::string& table_name, if (IS_FAIL(ret)) { ret_qds = nullptr; + delete time_filter; return ret; } std::vector lower_case_column_names(columns); @@ -68,7 +69,7 @@ int TableQueryExecutor::query(const std::string& table_name, auto device_task_iterator = std::unique_ptr(new DeviceTaskIterator( lower_case_column_names, table_root, column_mapping, - meta_data_querier_, id_filter, table_schema)); + meta_data_querier_, tag_filter, table_schema)); std::unique_ptr tsblock_reader; switch (table_query_ordering_) { @@ -80,7 +81,9 @@ int TableQueryExecutor::query(const std::string& table_name, break; case TableQueryOrdering::TIME: default: - ret = common::E_UNSUPPORTED_ORDER; + delete time_filter; + ret_qds = nullptr; + return common::E_UNSUPPORTED_ORDER; } assert(tsblock_reader != nullptr); ret_qds = @@ -89,6 +92,76 @@ int TableQueryExecutor::query(const std::string& table_name, return ret; } +int TableQueryExecutor::query(const std::string& table_name, + const std::vector& columns, + Filter* time_filter, Filter* tag_filter, + Filter* field_filter, int offset, int limit, + ResultSet*& ret_qds) { + int ret = common::E_OK; + TsFileMeta* file_metadata = nullptr; + file_metadata = tsfile_io_reader_->get_tsfile_meta(); + common::PageArena pa; + pa.init(512, common::MOD_TSFILE_READER); + MetaIndexNode* table_root = nullptr; + std::shared_ptr table_schema; + if (RET_FAIL( + file_metadata->get_table_metaindex_node(table_name, table_root))) { + } else if (RET_FAIL( + file_metadata->get_table_schema(table_name, table_schema))) { + } + + if (IS_FAIL(ret)) { + ret_qds = nullptr; + delete time_filter; + return ret; + } + std::vector lower_case_column_names(columns); + for (auto& column : lower_case_column_names) { + to_lowercase_inplace(column); + } + std::shared_ptr column_mapping = + std::make_shared(); + for (size_t i = 0; i < lower_case_column_names.size(); ++i) { + column_mapping->add(lower_case_column_names[i], static_cast(i), + *table_schema); + } + std::vector data_types; + data_types.reserve(lower_case_column_names.size()); + for (size_t i = 0; i < lower_case_column_names.size(); ++i) { + auto ind = table_schema->find_column_index(lower_case_column_names[i]); + if (ind < 0) { + delete time_filter; + return common::E_COLUMN_NOT_EXIST; + } + data_types.push_back(table_schema->get_data_types()[ind]); + } + + auto device_task_iterator = + std::unique_ptr(new DeviceTaskIterator( + lower_case_column_names, table_root, column_mapping, + meta_data_querier_, tag_filter, table_schema)); + + std::unique_ptr tsblock_reader; + switch (table_query_ordering_) { + case TableQueryOrdering::DEVICE: + tsblock_reader = std::unique_ptr( + new DeviceOrderedTsBlockReader(std::move(device_task_iterator), + meta_data_querier_, block_size_, + tsfile_io_reader_, time_filter, + field_filter, offset, limit)); + break; + case TableQueryOrdering::TIME: + default: + delete time_filter; + ret_qds = nullptr; + return common::E_UNSUPPORTED_ORDER; + } + assert(tsblock_reader != nullptr); + ret_qds = new TableResultSet(std::move(tsblock_reader), + lower_case_column_names, data_types); + return ret; +} + int TableQueryExecutor::query_on_tree( const std::vector>& devices, const std::vector& tag_columns, diff --git a/cpp/src/reader/table_query_executor.h b/cpp/src/reader/table_query_executor.h index 718947e5a..4cbd8ea3d 100644 --- a/cpp/src/reader/table_query_executor.h +++ b/cpp/src/reader/table_query_executor.h @@ -71,7 +71,11 @@ class TableQueryExecutor { } int query(const std::string& table_name, const std::vector& columns, Filter* time_filter, - Filter* id_filter, Filter* field_filter, ResultSet*& ret_qds); + Filter* tag_filter, Filter* field_filter, ResultSet*& ret_qds); + int query(const std::string& table_name, + const std::vector& columns, Filter* time_filter, + Filter* tag_filter, Filter* field_filter, int offset, int limit, + ResultSet*& ret_qds); int query_on_tree(const std::vector>& devices, const std::vector& tag_columns, const std::vector& field_columns, diff --git a/cpp/src/reader/task/device_task_iterator.cc b/cpp/src/reader/task/device_task_iterator.cc index 86a1fe363..dbe763303 100644 --- a/cpp/src/reader/task/device_task_iterator.cc +++ b/cpp/src/reader/task/device_task_iterator.cc @@ -20,6 +20,11 @@ #include "reader/task/device_task_iterator.h" namespace storage { + +void DeviceTaskIterator::flush_remaining_device_meta_cache() { + device_meta_iterator_->destroy_remaining_cached_devices(); +} + bool DeviceTaskIterator::has_next() const { return device_meta_iterator_->has_next(); } diff --git a/cpp/src/reader/task/device_task_iterator.h b/cpp/src/reader/task/device_task_iterator.h index 728e407d2..061711c17 100644 --- a/cpp/src/reader/task/device_task_iterator.h +++ b/cpp/src/reader/task/device_task_iterator.h @@ -60,6 +60,8 @@ class DeviceTaskIterator { ~DeviceTaskIterator() { pa_.destroy(); } + void flush_remaining_device_meta_cache(); + bool has_next() const; int next(DeviceQueryTask*& task); diff --git a/cpp/src/reader/tsfile_executor.cc b/cpp/src/reader/tsfile_executor.cc index 223d4cac3..f2d34167d 100644 --- a/cpp/src/reader/tsfile_executor.cc +++ b/cpp/src/reader/tsfile_executor.cc @@ -86,6 +86,22 @@ int TsFileExecutor::execute(QueryExpression* query_expr, ResultSet*& ret_qds) { } } +int TsFileExecutor::execute(QueryExpression* query_expr, ResultSet*& ret_qds, + int offset, int limit) { + ASSERT(is_inited_); + query_exprs_ = query_expr; + + int ret = E_OK; + QDSWithoutTimeGenerator* qds = new QDSWithoutTimeGenerator; + ret = qds->init(&io_reader_, query_expr, offset, limit); + if (ret != E_OK) { + delete qds; + qds = nullptr; + } + ret_qds = qds; + return ret; +} + int TsFileExecutor::execute_may_with_global_timefilter(QueryExpression* qe, ResultSet*& ret_qds) { int ret = E_OK; diff --git a/cpp/src/reader/tsfile_executor.h b/cpp/src/reader/tsfile_executor.h index 1c370246c..335134c89 100644 --- a/cpp/src/reader/tsfile_executor.h +++ b/cpp/src/reader/tsfile_executor.h @@ -35,6 +35,8 @@ class TsFileExecutor // : public QueryExecutor int init(ReadFile* read_file); int init(const std::string& file_path); int execute(QueryExpression* query_expr, ResultSet*& ret_qds); + int execute(QueryExpression* query_expr, ResultSet*& ret_qds, int offset, + int limit); void destroy_query_data_set(ResultSet* qds); TsFileMeta* get_tsfile_meta() { return io_reader_.get_tsfile_meta(); } TsFileIOReader* get_tsfile_io_reader() { return &io_reader_; } diff --git a/cpp/src/reader/tsfile_reader.cc b/cpp/src/reader/tsfile_reader.cc index 03f4bf1d1..196189f0f 100644 --- a/cpp/src/reader/tsfile_reader.cc +++ b/cpp/src/reader/tsfile_reader.cc @@ -101,7 +101,7 @@ int TsFileReader::query(const std::string& table_name, int ret = E_OK; TsFileMeta* tsfile_meta = tsfile_executor_->get_tsfile_meta(); if (tsfile_meta == nullptr) { - return E_TSFILE_WRITER_META_ERR; + return E_FILE_READ_ERR; } std::shared_ptr table_schema = tsfile_meta->table_schemas_.at(to_lower(table_name)); @@ -119,13 +119,51 @@ int TsFileReader::query(const std::string& table_name, return ret; } +int TsFileReader::queryByRow(std::vector& path_list, int offset, + int limit, ResultSet*& result_set) { + int ret = E_OK; + std::vector path_list_vec; + for (const auto& path : path_list) { + path_list_vec.emplace_back(Path(path, true)); + } + QueryExpression* query_expression = + QueryExpression::create(path_list_vec, nullptr); + ret = + tsfile_executor_->execute(query_expression, result_set, offset, limit); + return ret; +} + +int TsFileReader::queryByRow(const std::string& table_name, + const std::vector& column_names, + int offset, int limit, ResultSet*& result_set) { + int ret = E_OK; + TsFileMeta* tsfile_meta = tsfile_executor_->get_tsfile_meta(); + if (tsfile_meta == nullptr) { + return E_FILE_READ_ERR; + } + auto it = tsfile_meta->table_schemas_.find(to_lower(table_name)); + if (it == tsfile_meta->table_schemas_.end() || it->second == nullptr) { + return E_TABLE_NOT_EXIST; + } + + if (table_query_executor_ == nullptr) { + table_query_executor_ = new TableQueryExecutor(read_file_); + } + ret = table_query_executor_->query(to_lower(table_name), column_names, + /*time_filter=*/nullptr, + /*tag_filter=*/nullptr, + /*field_filter=*/nullptr, offset, limit, + result_set); + return ret; +} + int TsFileReader::query_table_on_tree( const std::vector& measurement_names, int64_t star_time, int64_t end_time, ResultSet*& result_set) { int ret = E_OK; TsFileMeta* tsfile_meta = tsfile_executor_->get_tsfile_meta(); if (tsfile_meta == nullptr) { - return E_TSFILE_WRITER_META_ERR; + return E_FILE_READ_ERR; } auto device_ids = this->get_all_device_ids(); std::vector> satisfied_device_ids; @@ -211,9 +249,11 @@ std::vector> TsFileReader::get_all_devices( PageArena pa; pa.init(512, MOD_TSFILE_READER); to_lowercase_inplace(table_name); - auto index_node = - tsfile_meta->table_metadata_index_node_map_[table_name]; - get_all_devices(device_ids, index_node, pa); + auto it = tsfile_meta->table_metadata_index_node_map_.find(table_name); + if (it != tsfile_meta->table_metadata_index_node_map_.end() && + it->second != nullptr) { + get_all_devices(device_ids, it->second, pa); + } } return device_ids; } diff --git a/cpp/src/reader/tsfile_reader.h b/cpp/src/reader/tsfile_reader.h index 9f5e38ec5..324d202d3 100644 --- a/cpp/src/reader/tsfile_reader.h +++ b/cpp/src/reader/tsfile_reader.h @@ -116,6 +116,39 @@ class TsFileReader { int64_t end_time, ResultSet*& result_set, Filter* tag_filter, int batch_size = 0); + /** + * @brief Query tree-model time series by row with offset and limit. + * + * @param path_list Full paths (device.measurement) to query. + * @param offset Number of leading rows to skip (>= 0). + * @param limit Maximum rows to return. < 0 means unlimited. + * @param[out] result_set The result set containing query results. + * @return Returns 0 on success, or a non-zero error code on failure. + */ + int queryByRow(std::vector& path_list, int offset, int limit, + ResultSet*& result_set); + + /** + * @brief Query table-model data by row with offset/limit pushdown. + * + * For dense devices (all columns have the same row count), + * offset/limit is pushed down to chunk/page level via SSI, + * skipping entire chunks/pages without decoding. + * For sparse devices, offset/limit is applied at the row-merge level. + * Entire devices can be skipped when their total row count + * falls within the offset range. + * + * @param table_name Table to query. + * @param column_names Columns to select. + * @param offset Number of leading rows to skip (>= 0). + * @param limit Maximum rows to return. < 0 means unlimited. + * @param[out] result_set The result set containing query results. + * @return Returns 0 on success, or a non-zero error code on failure. + */ + int queryByRow(const std::string& table_name, + const std::vector& column_names, int offset, + int limit, ResultSet*& result_set); + int query_table_on_tree(const std::vector& measurement_names, int64_t star_time, int64_t end_time, ResultSet*& result_set); diff --git a/cpp/src/reader/tsfile_series_scan_iterator.cc b/cpp/src/reader/tsfile_series_scan_iterator.cc index 8130bd8ba..c363d0a4d 100644 --- a/cpp/src/reader/tsfile_series_scan_iterator.cc +++ b/cpp/src/reader/tsfile_series_scan_iterator.cc @@ -36,52 +36,104 @@ void TsFileSeriesScanIterator::destroy() { } } +bool TsFileSeriesScanIterator::should_skip_chunk_by_time( + ChunkMeta* cm, int64_t min_time_hint) { + if (min_time_hint == std::numeric_limits::min() || + cm->statistic_ == nullptr) { + return false; + } + return cm->statistic_->end_time_ < min_time_hint; +} + +bool TsFileSeriesScanIterator::should_skip_chunk_by_offset(ChunkMeta* cm) { + if (row_offset_ <= 0) { + return false; + } + if (cm->statistic_ == nullptr || cm->statistic_->count_ == 0) { + return false; + } + int32_t count = cm->statistic_->count_; + if (row_offset_ >= count) { + row_offset_ -= count; + return true; + } + return false; +} + int TsFileSeriesScanIterator::get_next(TsBlock*& ret_tsblock, bool alloc, - Filter* oneshoot_filter) { - // TODO @filter + Filter* oneshoot_filter, + int64_t min_time_hint) { int ret = E_OK; Filter* filter = (oneshoot_filter != nullptr) ? oneshoot_filter : time_filter_; - if (!chunk_reader_->has_more_data()) { - while (true) { - if (!has_next_chunk()) { - return E_NO_MORE_DATA; - } else { - if (!is_aligned_) { - ChunkMeta* cm = get_current_chunk_meta(); - advance_to_next_chunk(); - if (filter != nullptr && cm->statistic_ != nullptr && - !filter->satisfy(cm->statistic_)) { - continue; - } - chunk_reader_->reset(); - if (RET_FAIL(chunk_reader_->load_by_meta(cm))) { - } - break; + + while (true) { + if (!chunk_reader_->has_more_data()) { + while (true) { + if (!has_next_chunk()) { + return E_NO_MORE_DATA; } else { - ChunkMeta* value_cm = value_chunk_meta_cursor_.get(); - ChunkMeta* time_cm = time_chunk_meta_cursor_.get(); - advance_to_next_chunk(); - if (filter != nullptr && value_cm->statistic_ != nullptr && - !filter->satisfy(value_cm->statistic_)) { - continue; - } - chunk_reader_->reset(); - if (RET_FAIL(chunk_reader_->load_by_aligned_meta( - time_cm, value_cm))) { + if (!is_aligned_) { + ChunkMeta* cm = get_current_chunk_meta(); + advance_to_next_chunk(); + // Skip by time filter. + if (filter != nullptr && cm->statistic_ != nullptr && + !filter->satisfy(cm->statistic_)) { + continue; + } + // Skip by min_time_hint (merge cursor). + if (should_skip_chunk_by_time(cm, min_time_hint)) { + continue; + } + // Single-path: skip entire chunk by offset using count. + if (should_skip_chunk_by_offset(cm)) { + continue; + } + chunk_reader_->reset(); + if (RET_FAIL(chunk_reader_->load_by_meta(cm))) { + } + break; + } else { + ChunkMeta* value_cm = value_chunk_meta_cursor_.get(); + ChunkMeta* time_cm = time_chunk_meta_cursor_.get(); + advance_to_next_chunk(); + if (filter != nullptr && + value_cm->statistic_ != nullptr && + !filter->satisfy(value_cm->statistic_)) { + continue; + } + if (should_skip_chunk_by_time(value_cm, + min_time_hint)) { + continue; + } + if (should_skip_chunk_by_offset(value_cm)) { + continue; + } + chunk_reader_->reset(); + if (RET_FAIL(chunk_reader_->load_by_aligned_meta( + time_cm, value_cm))) { + } + break; } - break; } } } - } - if (IS_SUCC(ret)) { - if (alloc) { - ret_tsblock = alloc_tsblock(); + if (IS_SUCC(ret)) { + if (alloc && ret_tsblock == nullptr) { + ret_tsblock = alloc_tsblock(); + } + ret = chunk_reader_->get_next_page(ret_tsblock, filter, *data_pa_, + min_time_hint, row_offset_, + row_limit_); } - ret = chunk_reader_->get_next_page(ret_tsblock, filter, *data_pa_); + // When current chunk is exhausted (e.g. all pages skipped by offset) + // but there are more chunks, load next chunk and retry. + if (ret == common::E_NO_MORE_DATA && has_next_chunk()) { + ret = E_OK; + continue; + } + return ret; } - return ret; } void TsFileSeriesScanIterator::revert_tsblock() { @@ -100,14 +152,9 @@ int TsFileSeriesScanIterator::init_chunk_reader() { common::mem_alloc(sizeof(ChunkReader), common::MOD_CHUNK_READER); chunk_reader_ = new (buf) ChunkReader; chunk_meta_cursor_ = itimeseries_index_->get_chunk_meta_list()->begin(); - ChunkMeta* cm = chunk_meta_cursor_.get(); - ASSERT(!chunk_reader_->has_more_data()); if (RET_FAIL(chunk_reader_->init( read_file_, itimeseries_index_->get_measurement_name(), itimeseries_index_->get_data_type(), time_filter_))) { - } else if (RET_FAIL(chunk_reader_->load_by_meta(cm))) { - } else { - chunk_meta_cursor_++; } } else { void* buf = common::mem_alloc(sizeof(AlignedChunkReader), @@ -117,17 +164,9 @@ int TsFileSeriesScanIterator::init_chunk_reader() { itimeseries_index_->get_time_chunk_meta_list()->begin(); value_chunk_meta_cursor_ = itimeseries_index_->get_value_chunk_meta_list()->begin(); - ChunkMeta* time_cm = time_chunk_meta_cursor_.get(); - ChunkMeta* value_cm = value_chunk_meta_cursor_.get(); - ASSERT(!chunk_reader_->has_more_data()); if (RET_FAIL(chunk_reader_->init( read_file_, itimeseries_index_->get_measurement_name(), itimeseries_index_->get_data_type(), time_filter_))) { - } else if (RET_FAIL(chunk_reader_->load_by_aligned_meta(time_cm, - value_cm))) { - } else { - time_chunk_meta_cursor_++; - value_chunk_meta_cursor_++; } } diff --git a/cpp/src/reader/tsfile_series_scan_iterator.h b/cpp/src/reader/tsfile_series_scan_iterator.h index ad6fe8d94..06b35ba16 100644 --- a/cpp/src/reader/tsfile_series_scan_iterator.h +++ b/cpp/src/reader/tsfile_series_scan_iterator.h @@ -20,6 +20,7 @@ #ifndef READER_TSFILE_SERIES_SCAN_ITERATOR_H #define READER_TSFILE_SERIES_SCAN_ITERATOR_H +#include #include #include "aligned_chunk_reader.h" @@ -48,7 +49,9 @@ class TsFileSeriesScanIterator { tuple_desc_(), tsblock_(nullptr), time_filter_(nullptr), - is_aligned_(false) {} + is_aligned_(false), + row_offset_(0), + row_limit_(-1) {} ~TsFileSeriesScanIterator() { destroy(); } int init(std::shared_ptr device_id, const std::string& measurement_name, ReadFile* read_file, @@ -62,11 +65,32 @@ class TsFileSeriesScanIterator { return common::E_OK; } void destroy(); + + /** + * Set row-level offset and limit for single-path optimization. + * When set, the SSI uses chunk/page statistics (count) to skip + * entire chunks/pages without decoding. + */ + void set_row_range(int offset, int limit) { + row_offset_ = offset; + row_limit_ = limit; + } + + /** Current row offset/limit after chunk/page skip; used to sync with QDS + * for single-path. */ + int get_row_offset() const { return row_offset_; } + int get_row_limit() const { return row_limit_; } + /* - * If oneshoot filter specified, use it instead of this->time_filter_ + * If oneshoot filter specified, use it instead of this->time_filter_. + * @param min_time_hint When not INT64_MIN, chunks whose end_time + * < min_time_hint are skipped without loading. + * Used by merge layer to push down the current + * merge cursor. */ int get_next(common::TsBlock*& ret_tsblock, bool alloc_tsblock, - Filter* oneshoot_filter = nullptr); + Filter* oneshoot_filter = nullptr, + int64_t min_time_hint = std::numeric_limits::min()); void revert_tsblock(); friend class TsFileIOReader; @@ -93,6 +117,8 @@ class TsFileSeriesScanIterator { FORCE_INLINE ChunkMeta* get_current_chunk_meta() { return chunk_meta_cursor_.get(); } + bool should_skip_chunk_by_time(ChunkMeta* cm, int64_t min_time_hint); + bool should_skip_chunk_by_offset(ChunkMeta* cm); common::TsBlock* alloc_tsblock(); private: @@ -112,6 +138,8 @@ class TsFileSeriesScanIterator { common::TsBlock* tsblock_; Filter* time_filter_; bool is_aligned_ = false; + int row_offset_; + int row_limit_; }; } // end namespace storage diff --git a/cpp/src/reader/tsfile_tree_reader.cc b/cpp/src/reader/tsfile_tree_reader.cc index 1b58c359d..6d7b77e1b 100644 --- a/cpp/src/reader/tsfile_tree_reader.cc +++ b/cpp/src/reader/tsfile_tree_reader.cc @@ -47,6 +47,20 @@ int TsFileTreeReader::query(const std::vector& device_ids, return tsfile_reader_->query(path_list, start_time, end_time, result_set); } +int TsFileTreeReader::queryByRow( + const std::vector& device_ids, + const std::vector& measurement_names, int offset, int limit, + ResultSet*& result_set) { + std::vector path_list; + for (auto& device_id : device_ids) { + for (auto& measurement : measurement_names) { + path_list.emplace_back(device_id + PATH_SEPARATOR_CHAR + + measurement); + } + } + return tsfile_reader_->queryByRow(path_list, offset, limit, result_set); +} + void TsFileTreeReader::destroy_query_data_set(ResultSet* qds) { tsfile_reader_->destroy_query_data_set(qds); } diff --git a/cpp/src/reader/tsfile_tree_reader.h b/cpp/src/reader/tsfile_tree_reader.h index 535180409..9550fc12d 100644 --- a/cpp/src/reader/tsfile_tree_reader.h +++ b/cpp/src/reader/tsfile_tree_reader.h @@ -67,6 +67,28 @@ class TsFileTreeReader { const std::vector& measurement_names, int64_t start_time, int64_t end_time, ResultSet*& result_set); + /** + * @brief Query time series data by row with offset and limit. + * + * Merges multiple paths by time, skips the first @p offset rows, + * and returns at most @p limit rows. When only a single path is + * selected, chunk/page statistics are used to skip entire blocks + * without decoding. Once @p limit rows have been returned, no + * further data is loaded from storage. + * + * @param device_ids List of device identifiers to query. + * @param measurement_names List of measurement names to query. + * @param offset Number of leading rows to skip (>= 0). + * @param limit Maximum rows to return. < 0 means unlimited. + * @param[out] result_set The result set containing query results. + * @return Returns 0 on success, or a non-zero error code on failure. + * The caller is responsible for destroying the result set using + * destroy_query_data_set(). + */ + int queryByRow(const std::vector& device_ids, + const std::vector& measurement_names, + int offset, int limit, ResultSet*& result_set); + /** * @brief Destroy and deallocate the query result set * diff --git a/cpp/test/cwrapper/query_by_row_cwrapper_test.cc b/cpp/test/cwrapper/query_by_row_cwrapper_test.cc new file mode 100644 index 000000000..a84625975 --- /dev/null +++ b/cpp/test/cwrapper/query_by_row_cwrapper_test.cc @@ -0,0 +1,254 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include "common/global.h" +#include "common/record.h" +#include "common/schema.h" +#include "file/write_file.h" +#include "reader/tsfile_reader.h" +#include "writer/tsfile_table_writer.h" +#include "writer/tsfile_tree_writer.h" + +extern "C" { +#include "cwrapper/errno_define_c.h" +#include "cwrapper/tsfile_cwrapper.h" +} + +class CWrapperQueryByRowTest : public ::testing::Test { + protected: + static void write_tree_tsfile(const char* filename, + const std::vector& device_ids, + const std::vector& measurements, + int num_rows) { + storage::WriteFile write_file; + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + mode_t mode = 0666; + ASSERT_EQ(common::E_OK, write_file.create(filename, flags, mode)); + + storage::TsFileTreeWriter writer(&write_file); + for (size_t d = 0; d < device_ids.size(); d++) { + std::string device_id = device_ids[d]; + for (size_t m = 0; m < measurements.size(); m++) { + auto* schema = new storage::MeasurementSchema( + measurements[m], common::TSDataType::INT64); + ASSERT_EQ(common::E_OK, + writer.register_timeseries(device_id, schema)); + delete schema; + } + } + + for (int t = 0; t < num_rows; t++) { + for (size_t d = 0; d < device_ids.size(); d++) { + storage::TsRecord record(device_ids[d], + static_cast(t)); + for (size_t m = 0; m < measurements.size(); m++) { + int64_t value = static_cast(t) * 100 + + static_cast(m) + + static_cast(d) * 10000; + record.add_point(measurements[m], value); + } + ASSERT_EQ(common::E_OK, writer.write(record)); + } + } + ASSERT_EQ(common::E_OK, writer.flush()); + ASSERT_EQ(common::E_OK, writer.close()); + } + + static void write_table_tsfile(const char* filename, + const std::string& table_name, + const std::vector& columns, + int num_rows) { + storage::WriteFile write_file; + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + mode_t mode = 0666; + ASSERT_EQ(common::E_OK, write_file.create(filename, flags, mode)); + + std::vector col_schemas = { + common::ColumnSchema(columns[0], common::TSDataType::STRING, + common::CompressionType::UNCOMPRESSED, + common::TSEncoding::PLAIN, + common::ColumnCategory::TAG), + common::ColumnSchema(columns[1], common::TSDataType::INT64, + common::CompressionType::UNCOMPRESSED, + common::TSEncoding::PLAIN, + common::ColumnCategory::FIELD), + }; + auto* schema = new storage::TableSchema(table_name, col_schemas); + auto* writer = new storage::TsFileTableWriter(&write_file, schema); + + storage::Tablet tablet( + table_name, {columns[0], columns[1]}, + {common::TSDataType::STRING, common::TSDataType::INT64}, + {common::ColumnCategory::TAG, common::ColumnCategory::FIELD}, + num_rows); + + for (int t = 0; t < num_rows; t++) { + tablet.add_timestamp(t, static_cast(t)); + tablet.add_value(t, columns[0], + std::string("device_") + std::to_string(t)); + tablet.add_value(t, columns[1], static_cast(t) * 10); + } + + ASSERT_EQ(common::E_OK, writer->write_table(tablet)); + ASSERT_EQ(common::E_OK, writer->flush()); + ASSERT_EQ(common::E_OK, writer->close()); + delete writer; + delete schema; + } +}; + +TEST_F(CWrapperQueryByRowTest, TreeByRowOffsetLimit) { + storage::libtsfile_init(); + + const char* file_name = "cwrapper_tree_query_by_row_test.tsfile"; + remove(file_name); + + std::vector device_ids = {"root.d1", "root.d2"}; + std::vector measurements = {"s1", "s2"}; + const int num_rows = 10; + write_tree_tsfile(file_name, device_ids, measurements, num_rows); + + ERRNO code = 0; + TsFileReader reader = tsfile_reader_new(file_name, &code); + ASSERT_EQ(code, RET_OK); + ASSERT_NE(reader, nullptr); + + char* device_ids_c[2]; + device_ids_c[0] = strdup(device_ids[0].c_str()); + device_ids_c[1] = strdup(device_ids[1].c_str()); + + char* measurement_ids_c[2]; + measurement_ids_c[0] = strdup(measurements[0].c_str()); + measurement_ids_c[1] = strdup(measurements[1].c_str()); + + const int offset = 3; + const int limit = 5; + ResultSet rs = tsfile_reader_query_tree_by_row( + reader, device_ids_c, 2, measurement_ids_c, 2, offset, limit, &code); + ASSERT_EQ(code, RET_OK); + ASSERT_NE(rs, nullptr); + + bool has_next = tsfile_result_set_next(rs, &code); + ASSERT_EQ(code, RET_OK); + int row = 0; + while (has_next) { + int64_t ts = tsfile_result_set_get_value_by_index_int64_t(rs, 1); + ASSERT_EQ(ts, static_cast(offset + row)); + + ASSERT_EQ(tsfile_result_set_get_value_by_index_int64_t(rs, 2), + ts * 100 + 0 + 0 * 10000); + ASSERT_EQ(tsfile_result_set_get_value_by_index_int64_t(rs, 3), + ts * 100 + 1 + 0 * 10000); + ASSERT_EQ(tsfile_result_set_get_value_by_index_int64_t(rs, 4), + ts * 100 + 0 + 1 * 10000); + ASSERT_EQ(tsfile_result_set_get_value_by_index_int64_t(rs, 5), + ts * 100 + 1 + 1 * 10000); + + row++; + has_next = tsfile_result_set_next(rs, &code); + ASSERT_EQ(code, RET_OK); + } + + ASSERT_EQ(row, limit); + + free_tsfile_result_set(&rs); + ASSERT_EQ(tsfile_reader_close(reader), RET_OK); + + free(device_ids_c[0]); + free(device_ids_c[1]); + free(measurement_ids_c[0]); + free(measurement_ids_c[1]); + remove(file_name); + + storage::libtsfile_destroy(); +} + +TEST_F(CWrapperQueryByRowTest, TableByRowOffsetLimit) { + storage::libtsfile_init(); + + const char* file_name = "cwrapper_table_query_by_row_test.tsfile"; + remove(file_name); + + std::string table_name = "t1"; + std::vector columns = {"device", "s1"}; + const int num_rows = 10; + write_table_tsfile(file_name, table_name, columns, num_rows); + + ERRNO code = 0; + TsFileReader reader = tsfile_reader_new(file_name, &code); + ASSERT_EQ(code, RET_OK); + ASSERT_NE(reader, nullptr); + + char* column_names_c[2]; + column_names_c[0] = strdup(columns[0].c_str()); + column_names_c[1] = strdup(columns[1].c_str()); + + const int offset = 3; + const int limit = 5; + ResultSet rs = tsfile_reader_query_table_by_row( + reader, table_name.c_str(), column_names_c, 2, offset, limit, &code); + ASSERT_EQ(code, RET_OK); + ASSERT_NE(rs, nullptr); + + bool has_next = tsfile_result_set_next(rs, &code); + ASSERT_EQ(code, RET_OK); + + int row = 0; + while (has_next) { + int64_t ts = tsfile_result_set_get_value_by_index_int64_t(rs, 1); + ASSERT_EQ(ts, static_cast(offset + row)); + + char* device = tsfile_result_set_get_value_by_index_string(rs, 2); + ASSERT_NE(device, nullptr); + ASSERT_EQ(std::string(device), + std::string("device_") + std::to_string(ts)); + free(device); + + ASSERT_EQ(tsfile_result_set_get_value_by_index_int64_t(rs, 3), ts * 10); + + row++; + has_next = tsfile_result_set_next(rs, &code); + ASSERT_EQ(code, RET_OK); + } + + ASSERT_EQ(row, limit); + + free_tsfile_result_set(&rs); + ASSERT_EQ(tsfile_reader_close(reader), RET_OK); + + free(column_names_c[0]); + free(column_names_c[1]); + remove(file_name); + + storage::libtsfile_destroy(); +} diff --git a/cpp/test/reader/query_by_row_performance_test.cc b/cpp/test/reader/query_by_row_performance_test.cc new file mode 100644 index 000000000..4caf26f71 --- /dev/null +++ b/cpp/test/reader/query_by_row_performance_test.cc @@ -0,0 +1,703 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * Performance comparison (4 groups): + * - Tree model: query single time series / multi time series + * - Table model: query single time series / multi time series + * + * Dataset: + * - points per time series = 50000 (a time series = device_id + measurement_id) + * - offset default = 25000, limit default = 1000 + * - random "none" (NULL / missing values) to make the data non-dense + * + * Run (suite is DISABLED by default; omit DISABLED_ in filter when using + * --gtest_also_run_disabled_tests): + * TsFile_Test --gtest_also_run_disabled_tests + * --gtest_filter=DISABLED_QueryByRowPerformance* + * + * Dynamic offset (optional): + * QUERY_BY_ROW_PERF_OFFSET= + * QUERY_BY_ROW_PERF_OFFSET_RATIO= + * + * Optional output aggregation: + * QUERY_BY_ROW_PERF_RESULT=/path/to/result.md (append) + * + * More iterations for stabler avg (default 5): + * QUERY_BY_ROW_PERF_ITERS=30 + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/global.h" +#include "common/record.h" +#include "common/schema.h" +#include "common/tablet.h" +#include "file/write_file.h" +#include "reader/tsfile_reader.h" +#include "reader/tsfile_tree_reader.h" +#include "writer/tsfile_table_writer.h" +#include "writer/tsfile_tree_writer.h" + +using namespace storage; +using namespace common; + +static bool get_env_int(const char* name, int& out) { + const char* v = std::getenv(name); + if (v == nullptr || v[0] == '\0') return false; + char* end = nullptr; + long x = std::strtol(v, &end, 10); + if (end == v) return false; + out = static_cast(x); + return true; +} + +/** Average latency iterations per (offset,limit) case; clamp [1,200]. */ +static int query_by_row_perf_iters() { + int n = 100; + if (get_env_int("QUERY_BY_ROW_PERF_ITERS", n)) { + if (n < 1) n = 1; + if (n > 200) n = 200; + } + return n; +} + +static int compute_offset_with_env(int num_rows, int default_offset) { + int offset = default_offset; + int abs = 0; + if (get_env_int("QUERY_BY_ROW_PERF_OFFSET", abs)) { + offset = abs; + } else { + int ratio = 0; + if (get_env_int("QUERY_BY_ROW_PERF_OFFSET_RATIO", ratio)) { + if (ratio < 0) ratio = 0; + if (ratio > 100) ratio = 100; + offset = + static_cast(static_cast(num_rows) * ratio / 100); + } + } + + if (num_rows <= 0) return 0; + if (offset < 0) offset = 0; + if (offset >= num_rows) offset = num_rows - 1; + return offset; +} + +static void write_result_if_needed(const std::string& md) { + const char* result_path = std::getenv("QUERY_BY_ROW_PERF_RESULT"); + if (result_path == nullptr || result_path[0] == '\0') return; + std::ofstream f(result_path, std::ios::app); + if (f) f << md << "\n"; +} + +// Entire suite skipped in default runs +class DISABLED_QueryByRowPerformanceTest : public ::testing::Test { + protected: + void SetUp() override { + libtsfile_init(); + file_name_ = std::string("query_by_row_perf_") + + generate_random_string(8) + std::string(".tsfile"); + remove(file_name_.c_str()); + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + mode_t mode = 0666; + write_file_.create(file_name_, flags, mode); + } + + void TearDown() override { + remove(file_name_.c_str()); + libtsfile_destroy(); + } + + static std::string generate_random_string(int length) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(0, 61); + const std::string chars = + "0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + std::string result; + for (int i = 0; i < length; ++i) { + result += chars[dis(gen)]; + } + return result; + } + + void write_tree_multi_device_file( + int num_rows_total, int device_count, + const std::vector& measurement_ids, double none_prob_s1, + double none_prob_s2, uint32_t seed) { + TsFileTreeWriter writer(&write_file_); + + // Register all selected measurements (only those in measurement_ids). + std::vector device_ids; + for (int d = 0; d < device_count; ++d) { + device_ids.push_back("d" + std::to_string(d)); + } + for (auto& device_id : device_ids) { + for (const auto& measurement_id : measurement_ids) { + auto* schema = + new MeasurementSchema(measurement_id, TSDataType::INT64); + std::string device_id_nc = + device_id; // register_timeseries needs std::string& + ASSERT_EQ(E_OK, + writer.register_timeseries(device_id_nc, schema)); + delete schema; + } + } + + std::mt19937 rng(seed); + std::uniform_real_distribution u01(0.0, 1.0); + + // Ensure every (device_id + measurement_id) time series has exactly + // num_rows_total points at timestamps [0, num_rows_total). + for (int64_t timestamp = 0; timestamp < num_rows_total; ++timestamp) { + for (const auto& device_id : device_ids) { + TsRecord record(timestamp, device_id, + static_cast(measurement_ids.size())); + for (size_t m = 0; m < measurement_ids.size(); ++m) { + const auto& measurement_id = measurement_ids[m]; + bool make_null = false; + if (measurement_id == "s1") { + make_null = (u01(rng) < none_prob_s1); + } else if (measurement_id == "s2") { + make_null = (u01(rng) < none_prob_s2); + } + + if (make_null) { + // DataPoint(measurement_name) creates a NULL point. + record.points_.emplace_back(DataPoint(measurement_id)); + } else { + // Make values unique per (timestamp, measurement) to + // prevent constant folding. + int64_t value = + timestamp * 100 + static_cast(m); + record.add_point(measurement_id, value); + } + } + + ASSERT_EQ(E_OK, writer.write(record)); + } + } + writer.flush(); + writer.close(); + } + + void write_table_multi_device_file(int num_rows_total, int device_count, + double none_prob_s1, double none_prob_s2, + uint32_t seed) { + // Schema always contains both s1 and s2; single-sequence tests will + // only query s1. + std::vector col_schemas = { + ColumnSchema("id1", TSDataType::STRING, + CompressionType::UNCOMPRESSED, TSEncoding::PLAIN, + ColumnCategory::TAG), + ColumnSchema("s1", TSDataType::INT64, CompressionType::UNCOMPRESSED, + TSEncoding::PLAIN, ColumnCategory::FIELD), + ColumnSchema("s2", TSDataType::INT64, CompressionType::UNCOMPRESSED, + TSEncoding::PLAIN, ColumnCategory::FIELD), + }; + auto* schema = new TableSchema("t1", col_schemas); + auto* writer = new TsFileTableWriter(&write_file_, schema); + + // Each device has num_rows_total points, and timestamps are aligned + // across devices. + const int num_total_rows = num_rows_total * device_count; + Tablet tablet( + "t1", {"id1", "s1", "s2"}, + {TSDataType::STRING, TSDataType::INT64, TSDataType::INT64}, + {ColumnCategory::TAG, ColumnCategory::FIELD, ColumnCategory::FIELD}, + num_total_rows); + std::mt19937 rng(seed); + std::uniform_real_distribution u01(0.0, 1.0); + + for (int device_index = 0; device_index < device_count; + ++device_index) { + for (int row_in_device = 0; row_in_device < num_rows_total; + ++row_in_device) { + const int row = device_index * num_rows_total + row_in_device; + const int64_t timestamp = static_cast(row_in_device); + + tablet.add_timestamp(row, timestamp); + tablet.add_value(row, "id1", + "device_" + std::to_string(device_index)); + + if (u01(rng) >= none_prob_s1) { + tablet.add_value(row, "s1", timestamp * 10); + } + if (u01(rng) >= none_prob_s2) { + tablet.add_value(row, "s2", timestamp * 100); + } + } + } + + ASSERT_EQ(writer->write_table(tablet), E_OK); + ASSERT_EQ(writer->flush(), E_OK); + ASSERT_EQ(writer->close(), E_OK); + delete writer; + delete schema; + } + + std::string file_name_; + WriteFile write_file_; +}; + +static const int kNumRowsTotal = 50000; // points per time series +static const int kDeviceCount = + 1; // keep "offset/limit" aligned to a single time series + +struct OffsetLimitCase { + int offset; + int limit; + const char* label; +}; + +// Multiple (offset, limit) groups for performance analysis. +// Notes: +// - The dataset has offsets in [0, kNumRowsTotal-1]. (50000, 1000) means +// "offset beyond end" and should return empty results for that series. +static const OffsetLimitCase kOffsetLimitCases[] = { + {0, 1000, "(0,1000)"}, {12500, 1000, "(12500,1000)"}, + {25000, 1000, "(25000,1000)"}, {37500, 1000, "(37500,1000)"}, + {50000, 1000, "(50000,1000)"}, {25000, 10, "(25000,10)"}, + {25000, 100, "(25000,100)"}, {25000, 1000, "(25000,1000)"}, + {25000, 10000, "(25000,10000)"}, +}; + +static const int kOffsetLimitCaseCount = + static_cast(sizeof(kOffsetLimitCases) / sizeof(kOffsetLimitCases[0])); + +// Keep dataset fully dense (no NULL) to maximize offset/limit pushdown effects. +static const double kNoneProbSingle = 0.0; // s1 in single-sequence +static const double kNoneProbS2 = 0.0; // s2 in multi-sequence +static const double kNoneProbMultiS1 = 0.0; // s1 in multi-sequence + +template +static void compute_avg_times(RunByRowFn&& run_by_row, RunManualFn&& run_manual, + int iters, double& avg_by_row, double& avg_manual, + int& valid_iters) { + double sum_by_row = 0.0; + double sum_manual = 0.0; + valid_iters = 0; + for (int i = 0; i < iters; i++) { + const double t1 = run_by_row(); + const double t2 = run_manual(); + if (t1 > 0 && t2 > 0) { + sum_by_row += t1; + sum_manual += t2; + valid_iters++; + } + } + avg_by_row = (valid_iters > 0) ? (sum_by_row / valid_iters) : -1.0; + avg_manual = (valid_iters > 0) ? (sum_manual / valid_iters) : -1.0; +} + +TEST_F(DISABLED_QueryByRowPerformanceTest, TreeModel_SingleSequence) { + const std::vector measurement_ids = {"s1"}; + write_tree_multi_device_file(kNumRowsTotal, kDeviceCount, measurement_ids, + kNoneProbSingle, /*none_prob_s2=*/0.0, 123); + std::vector devices{"d0"}; + + const int perf_iters = query_by_row_perf_iters(); + std::ostringstream out; + out << "# QueryByRow (Tree, single) vs Manual Next – Performance Result\n\n" + << "Avg iterations per cell: **" << perf_iters + << "** (`QUERY_BY_ROW_PERF_ITERS`)\n\n" + << "| Case | Offset | Limit | queryByRow(avg ms) | Manual(avg ms) | " + "Speedup |\n" + << "|------|--------|-------|-------------------|--------------|-------" + "--|\n"; + + double best_speedup = 0.0; + for (int c = 0; c < kOffsetLimitCaseCount; ++c) { + const OffsetLimitCase& cs = kOffsetLimitCases[c]; + const int offset = cs.offset; + const int limit = cs.limit; + + auto run_query_by_row = [this, &devices, &measurement_ids, offset, + limit]() { + TsFileTreeReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.queryByRow(devices, measurement_ids, offset, limit, + rs) != E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + while (IS_SUCC(rs->next(has_next)) && has_next) { + (void)rs->get_row_record()->get_timestamp(); + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start) + .count(); + }; + + auto run_manual_next = [this, &devices, &measurement_ids, offset, + limit]() { + TsFileTreeReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.query(devices, measurement_ids, INT64_MIN, INT64_MAX, + rs) != E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + int skipped = 0; + int taken = 0; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (skipped < offset) { + skipped++; + continue; + } + if (taken >= limit) break; + (void)rs->get_row_record()->get_timestamp(); + taken++; + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start) + .count(); + }; + + double avg_by_row = -1.0; + double avg_manual = -1.0; + int valid_iters = 0; + compute_avg_times(run_query_by_row, run_manual_next, perf_iters, + avg_by_row, avg_manual, valid_iters); + ASSERT_GT(valid_iters, 0); + ASSERT_GT(avg_manual, 0.0); + + const double speedup = + (avg_by_row > 0.0) ? (avg_manual / avg_by_row) : 0.0; + best_speedup = std::max(best_speedup, speedup); + + out << "| " << cs.label << " | " << offset << " | " << limit << " | " + << avg_by_row << " | " << avg_manual << " | " << speedup << "x |\n"; + } + + out << "\n"; + std::cout << "\n" << out.str() << "\n"; + write_result_if_needed(out.str()); + EXPECT_GT(best_speedup, 1.0); +} + +TEST_F(DISABLED_QueryByRowPerformanceTest, TreeModel_MultiSequence) { + const std::vector measurement_ids = {"s1", "s2"}; + write_tree_multi_device_file(kNumRowsTotal, kDeviceCount, measurement_ids, + kNoneProbMultiS1, kNoneProbS2, 456); + std::vector devices; + for (int d = 0; d < kDeviceCount; ++d) + devices.push_back("d" + std::to_string(d)); + + const int perf_iters = query_by_row_perf_iters(); + std::ostringstream out; + out << "# QueryByRow (Tree, multi) vs Manual Next – Performance Result\n\n" + << "Avg iterations per cell: **" << perf_iters + << "** (`QUERY_BY_ROW_PERF_ITERS`)\n\n" + << "| Case | Offset | Limit | queryByRow(avg ms) | Manual(avg ms) | " + "Speedup |\n" + << "|------|--------|-------|-------------------|--------------|-------" + "--|\n"; + + double best_speedup = 0.0; + for (int c = 0; c < kOffsetLimitCaseCount; ++c) { + const OffsetLimitCase& cs = kOffsetLimitCases[c]; + const int offset = cs.offset; + const int limit = cs.limit; + + auto run_query_by_row = [this, &devices, &measurement_ids, offset, + limit]() { + TsFileTreeReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.queryByRow(devices, measurement_ids, offset, limit, + rs) != E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + while (IS_SUCC(rs->next(has_next)) && has_next) { + (void)rs->get_row_record()->get_timestamp(); + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start) + .count(); + }; + + auto run_manual_next = [this, &devices, &measurement_ids, offset, + limit]() { + TsFileTreeReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.query(devices, measurement_ids, INT64_MIN, INT64_MAX, + rs) != E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + int skipped = 0; + int taken = 0; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (skipped < offset) { + skipped++; + continue; + } + if (taken >= limit) break; + (void)rs->get_row_record()->get_timestamp(); + taken++; + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start) + .count(); + }; + + double avg_by_row = -1.0; + double avg_manual = -1.0; + int valid_iters = 0; + compute_avg_times(run_query_by_row, run_manual_next, perf_iters, + avg_by_row, avg_manual, valid_iters); + ASSERT_GT(valid_iters, 0); + ASSERT_GT(avg_manual, 0.0); + + const double speedup = + (avg_by_row > 0.0) ? (avg_manual / avg_by_row) : 0.0; + best_speedup = std::max(best_speedup, speedup); + + out << "| " << cs.label << " | " << offset << " | " << limit << " | " + << avg_by_row << " | " << avg_manual << " | " << speedup << "x |\n"; + } + + out << "\n"; + std::cout << "\n" << out.str() << "\n"; + write_result_if_needed(out.str()); + EXPECT_GT(best_speedup, 1.0); +} + +TEST_F(DISABLED_QueryByRowPerformanceTest, TableModel_SingleSequence) { + write_table_multi_device_file(kNumRowsTotal, kDeviceCount, kNoneProbSingle, + 0.0, 789); + const std::vector cols = {"id1", "s1"}; + + const int perf_iters = query_by_row_perf_iters(); + std::ostringstream out; + out << "# QueryByRow (Table, single) vs Manual Next – Performance " + "Result\n\n" + << "Avg iterations per cell: **" << perf_iters + << "** (`QUERY_BY_ROW_PERF_ITERS`)\n\n" + << "| Case | Offset | Limit | queryByRow(avg ms) | Manual(avg ms) | " + "Speedup |\n" + << "|------|--------|-------|-------------------|--------------|-------" + "--|\n"; + + double best_speedup = 0.0; + for (int c = 0; c < kOffsetLimitCaseCount; ++c) { + const OffsetLimitCase& cs = kOffsetLimitCases[c]; + const int offset = cs.offset; + const int limit = cs.limit; + + auto run_query_by_row = [this, cols, offset, limit]() { + TsFileReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.queryByRow("t1", cols, offset, limit, rs) != E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (!rs->is_null("s1")) { + (void)rs->get_value("s1"); + } + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start) + .count(); + }; + + auto run_manual_next = [this, cols, offset, limit]() { + TsFileReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.query("t1", cols, INT64_MIN, INT64_MAX, rs) != E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + int skipped = 0; + int taken = 0; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (skipped < offset) { + skipped++; + continue; + } + if (taken >= limit) break; + if (!rs->is_null("s1")) { + (void)rs->get_value("s1"); + } + taken++; + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start) + .count(); + }; + + double avg_by_row = -1.0; + double avg_manual = -1.0; + int valid_iters = 0; + compute_avg_times(run_query_by_row, run_manual_next, perf_iters, + avg_by_row, avg_manual, valid_iters); + ASSERT_GT(valid_iters, 0); + ASSERT_GT(avg_manual, 0.0); + + const double speedup = + (avg_by_row > 0.0) ? (avg_manual / avg_by_row) : 0.0; + best_speedup = std::max(best_speedup, speedup); + + out << "| " << cs.label << " | " << offset << " | " << limit << " | " + << avg_by_row << " | " << avg_manual << " | " << speedup << "x |\n"; + } + + out << "\n"; + std::cout << "\n" << out.str() << "\n"; + write_result_if_needed(out.str()); + EXPECT_GT(best_speedup, 1.0); +} + +TEST_F(DISABLED_QueryByRowPerformanceTest, TableModel_MultiSequence) { + write_table_multi_device_file(kNumRowsTotal, kDeviceCount, kNoneProbMultiS1, + kNoneProbS2, 101); + const std::vector cols = {"id1", "s1", "s2"}; + + const int perf_iters = query_by_row_perf_iters(); + std::ostringstream out; + out << "# QueryByRow (Table, multi) vs Manual Next – Performance Result\n\n" + << "Avg iterations per cell: **" << perf_iters + << "** (`QUERY_BY_ROW_PERF_ITERS`)\n\n" + << "| Case | Offset | Limit | queryByRow(avg ms) | Manual(avg ms) | " + "Speedup |\n" + << "|------|--------|-------|-------------------|--------------|-------" + "--|\n"; + + double best_speedup = 0.0; + for (int c = 0; c < kOffsetLimitCaseCount; ++c) { + const OffsetLimitCase& cs = kOffsetLimitCases[c]; + const int offset = cs.offset; + const int limit = cs.limit; + + auto run_query_by_row = [this, cols, offset, limit]() { + TsFileReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.queryByRow("t1", cols, offset, limit, rs) != E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (!rs->is_null("s1")) (void)rs->get_value("s1"); + (void)rs->is_null("s2"); + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start) + .count(); + }; + + auto run_manual_next = [this, cols, offset, limit]() { + TsFileReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.query("t1", cols, INT64_MIN, INT64_MAX, rs) != E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + int skipped = 0; + int taken = 0; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (skipped < offset) { + skipped++; + continue; + } + if (taken >= limit) break; + if (!rs->is_null("s1")) (void)rs->get_value("s1"); + (void)rs->is_null("s2"); + taken++; + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start) + .count(); + }; + + double avg_by_row = -1.0; + double avg_manual = -1.0; + int valid_iters = 0; + compute_avg_times(run_query_by_row, run_manual_next, perf_iters, + avg_by_row, avg_manual, valid_iters); + ASSERT_GT(valid_iters, 0); + ASSERT_GT(avg_manual, 0.0); + + const double speedup = + (avg_by_row > 0.0) ? (avg_manual / avg_by_row) : 0.0; + best_speedup = std::max(best_speedup, speedup); + + out << "| " << cs.label << " | " << offset << " | " << limit << " | " + << avg_by_row << " | " << avg_manual << " | " << speedup << "x |\n"; + } + + out << "\n"; + std::cout << "\n" << out.str() << "\n"; + write_result_if_needed(out.str()); + EXPECT_GT(best_speedup, 1.0); +} diff --git a/cpp/test/reader/table_view/tsfile_table_query_by_row_test.cc b/cpp/test/reader/table_view/tsfile_table_query_by_row_test.cc new file mode 100644 index 000000000..13a0257d3 --- /dev/null +++ b/cpp/test/reader/table_view/tsfile_table_query_by_row_test.cc @@ -0,0 +1,726 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include + +#include +#include +#include + +#include "common/global.h" +#include "common/record.h" +#include "common/schema.h" +#include "common/tablet.h" +#include "file/write_file.h" +#include "reader/table_result_set.h" +#include "reader/tsfile_reader.h" +#include "writer/tsfile_table_writer.h" + +using namespace storage; +using namespace common; + +class TableQueryByRowTest : public ::testing::Test { + protected: + void SetUp() override { + libtsfile_init(); + file_name_ = std::string("table_query_by_row_test_") + + generate_random_string(10) + std::string(".tsfile"); + remove(file_name_.c_str()); + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + mode_t mode = 0666; + write_file_.create(file_name_, flags, mode); + } + + void TearDown() override { + remove(file_name_.c_str()); + libtsfile_destroy(); + } + + static std::string generate_random_string(int length) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(0, 61); + const std::string chars = + "0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + std::string result; + for (int i = 0; i < length; ++i) { + result += chars[dis(gen)]; + } + return result; + } + + void write_single_device_file(int num_rows) { + std::vector col_schemas = { + ColumnSchema("id1", TSDataType::STRING, + CompressionType::UNCOMPRESSED, TSEncoding::PLAIN, + ColumnCategory::TAG), + ColumnSchema("s1", TSDataType::INT64, CompressionType::UNCOMPRESSED, + TSEncoding::PLAIN, ColumnCategory::FIELD), + ColumnSchema("s2", TSDataType::INT64, CompressionType::UNCOMPRESSED, + TSEncoding::PLAIN, ColumnCategory::FIELD), + }; + auto* schema = new TableSchema("t1", col_schemas); + auto* writer = new TsFileTableWriter(&write_file_, schema); + + Tablet tablet( + "t1", {"id1", "s1", "s2"}, + {TSDataType::STRING, TSDataType::INT64, TSDataType::INT64}, + {ColumnCategory::TAG, ColumnCategory::FIELD, ColumnCategory::FIELD}, + num_rows); + + for (int i = 0; i < num_rows; i++) { + tablet.add_timestamp(i, static_cast(i)); + tablet.add_value(i, "id1", "device_a"); + tablet.add_value(i, "s1", static_cast(i * 10)); + tablet.add_value(i, "s2", static_cast(i * 100)); + } + + ASSERT_EQ(writer->write_table(tablet), E_OK); + ASSERT_EQ(writer->flush(), E_OK); + ASSERT_EQ(writer->close(), E_OK); + delete writer; + delete schema; + } + + void write_multi_device_file(int rows_per_device, int device_count) { + std::vector col_schemas = { + ColumnSchema("id1", TSDataType::STRING, + CompressionType::UNCOMPRESSED, TSEncoding::PLAIN, + ColumnCategory::TAG), + ColumnSchema("s1", TSDataType::INT64, CompressionType::UNCOMPRESSED, + TSEncoding::PLAIN, ColumnCategory::FIELD), + }; + auto* schema = new TableSchema("t1", col_schemas); + auto* writer = new TsFileTableWriter(&write_file_, schema); + + int total = rows_per_device * device_count; + Tablet tablet("t1", {"id1", "s1"}, + {TSDataType::STRING, TSDataType::INT64}, + {ColumnCategory::TAG, ColumnCategory::FIELD}, total); + + int row = 0; + for (int d = 0; d < device_count; d++) { + std::string device_id = "dev" + std::to_string(d); + for (int t = 0; t < rows_per_device; t++) { + tablet.add_timestamp(row, static_cast(t)); + tablet.add_value(row, "id1", device_id); + tablet.add_value(row, "s1", static_cast(d * 1000 + t)); + row++; + } + } + + ASSERT_EQ(writer->write_table(tablet), E_OK); + ASSERT_EQ(writer->flush(), E_OK); + ASSERT_EQ(writer->close(), E_OK); + delete writer; + delete schema; + } + + // Writes single-device dense data in multiple batches with flush each time, + // so the file has multiple ChunkGroups (multiple Chunks per column). Used + // to exercise SSI-level pushdown where set_row_range causes + // whole-Chunk/Page skip by count. memory_threshold_bytes should be small to + // trigger flush. + void write_single_device_dense_multi_chunk( + int rows_per_batch, int num_batches, uint64_t memory_threshold_bytes) { + std::vector col_schemas = { + ColumnSchema("id1", TSDataType::STRING, + CompressionType::UNCOMPRESSED, TSEncoding::PLAIN, + ColumnCategory::TAG), + ColumnSchema("s1", TSDataType::INT64, CompressionType::UNCOMPRESSED, + TSEncoding::PLAIN, ColumnCategory::FIELD), + ColumnSchema("s2", TSDataType::INT64, CompressionType::UNCOMPRESSED, + TSEncoding::PLAIN, ColumnCategory::FIELD), + }; + auto* schema = new TableSchema("t1", col_schemas); + auto* writer = + new TsFileTableWriter(&write_file_, schema, memory_threshold_bytes); + + for (int b = 0; b < num_batches; b++) { + Tablet tablet( + "t1", {"id1", "s1", "s2"}, + {TSDataType::STRING, TSDataType::INT64, TSDataType::INT64}, + {ColumnCategory::TAG, ColumnCategory::FIELD, + ColumnCategory::FIELD}, + rows_per_batch); + int base = b * rows_per_batch; + for (int i = 0; i < rows_per_batch; i++) { + int row_idx = base + i; + tablet.add_timestamp(i, static_cast(row_idx)); + tablet.add_value(i, "id1", "device_a"); + tablet.add_value(i, "s1", static_cast(row_idx * 10)); + tablet.add_value(i, "s2", static_cast(row_idx * 100)); + } + ASSERT_EQ(writer->write_table(tablet), E_OK); + ASSERT_EQ(writer->flush(), E_OK); + } + ASSERT_EQ(writer->close(), E_OK); + delete writer; + delete schema; + } + + void write_single_device_sparse_multi_chunk_with_equal_missing( + int rows_per_batch, int num_batches, uint64_t memory_threshold_bytes, + int64_t null_start, int64_t null_end) { + std::vector col_schemas = { + ColumnSchema("id1", TSDataType::STRING, + CompressionType::UNCOMPRESSED, TSEncoding::PLAIN, + ColumnCategory::TAG), + ColumnSchema("s1", TSDataType::INT64, CompressionType::UNCOMPRESSED, + TSEncoding::PLAIN, ColumnCategory::FIELD), + ColumnSchema("s2", TSDataType::INT64, CompressionType::UNCOMPRESSED, + TSEncoding::PLAIN, ColumnCategory::FIELD), + }; + auto* schema = new TableSchema("t1", col_schemas); + auto* writer = + new TsFileTableWriter(&write_file_, schema, memory_threshold_bytes); + + // Make s1/s2 have the same amount of missing points, but missing + // positions differ across columns. + const int64_t total_rows = static_cast(rows_per_batch) * + static_cast(num_batches); + const int64_t missing_len = null_end - null_start; + ASSERT_GT(missing_len, 0); + + // Pick a shifted missing window of the same length for s2. + int64_t null2_start = null_start + missing_len / 2; + if (null2_start < 0) null2_start = 0; + if (null2_start + missing_len > total_rows) { + null2_start = total_rows - missing_len; + } + ASSERT_GE(null2_start, 0); + ASSERT_LE(null2_start + missing_len, total_rows); + const int64_t null2_end = null2_start + missing_len; + + for (int b = 0; b < num_batches; b++) { + Tablet tablet( + "t1", {"id1", "s1", "s2"}, + {TSDataType::STRING, TSDataType::INT64, TSDataType::INT64}, + {ColumnCategory::TAG, ColumnCategory::FIELD, + ColumnCategory::FIELD}, + rows_per_batch); + int64_t base = static_cast(b) * rows_per_batch; + for (int i = 0; i < rows_per_batch; i++) { + int64_t row_idx = base + i; + tablet.add_timestamp(i, row_idx); + tablet.add_value(i, "id1", "device_a"); + + const bool s1_missing = + (row_idx >= null_start && row_idx < null_end); + const bool s2_missing = + (row_idx >= null2_start && row_idx < null2_end); + + if (!s1_missing) { + tablet.add_value(i, "s1", row_idx * 10); + } + if (!s2_missing) { + tablet.add_value(i, "s2", row_idx * 100); + } + } + + ASSERT_EQ(writer->write_table(tablet), E_OK); + ASSERT_EQ(writer->flush(), E_OK); + } + + ASSERT_EQ(writer->close(), E_OK); + delete writer; + delete schema; + } + + std::vector query_all_s1(const std::string& table_name, + const std::vector& columns) { + TsFileReader reader; + EXPECT_EQ(reader.open(file_name_), E_OK); + ResultSet* rs = nullptr; + EXPECT_EQ(reader.query(table_name, columns, INT64_MIN, INT64_MAX, rs), + E_OK); + std::vector result; + bool has_next = false; + while (IS_SUCC(rs->next(has_next)) && has_next) { + result.push_back(rs->get_value("s1")); + } + reader.destroy_query_data_set(rs); + reader.close(); + return result; + } + + std::vector query_by_row_s1(const std::string& table_name, + const std::vector& cols, + int offset, int limit) { + TsFileReader reader; + EXPECT_EQ(reader.open(file_name_), E_OK); + ResultSet* rs = nullptr; + EXPECT_EQ(reader.queryByRow(table_name, cols, offset, limit, rs), E_OK); + EXPECT_NE(rs, nullptr); + std::vector result; + bool has_next = false; + while (IS_SUCC(rs->next(has_next)) && has_next) { + result.push_back(rs->get_value("s1")); + } + reader.destroy_query_data_set(rs); + reader.close(); + return result; + } + + std::vector> query_by_row_time_and_s1( + const std::string& table_name, const std::vector& cols, + int offset, int limit) { + TsFileReader reader; + EXPECT_EQ(reader.open(file_name_), E_OK); + ResultSet* rs = nullptr; + EXPECT_EQ(reader.queryByRow(table_name, cols, offset, limit, rs), E_OK); + EXPECT_NE(rs, nullptr); + + std::vector> result; + bool has_next = false; + while (IS_SUCC(rs->next(has_next)) && has_next) { + int64_t time = rs->get_value("time"); + // s1 is INT64, use sentinel -1 for NULL. + int64_t s1_val = + rs->is_null("s1") ? -1 : rs->get_value("s1"); + result.emplace_back(time, s1_val); + } + + reader.destroy_query_data_set(rs); + reader.close(); + return result; + } + + std::vector> query_manual_time_and_s1( + const std::string& table_name, const std::vector& cols, + int offset, int limit) { + TsFileReader reader; + EXPECT_EQ(reader.open(file_name_), E_OK); + + ResultSet* rs = nullptr; + EXPECT_EQ(reader.query(table_name, cols, INT64_MIN, INT64_MAX, rs), + E_OK); + + std::vector> manual; + bool has_next = false; + int skipped = 0; + int taken = 0; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (skipped < offset) { + skipped++; + continue; + } + if (taken >= limit) { + break; + } + int64_t time = rs->get_value("time"); + int64_t s1_val = + rs->is_null("s1") ? -1 : rs->get_value("s1"); + manual.emplace_back(time, s1_val); + taken++; + } + + reader.destroy_query_data_set(rs); + reader.close(); + return manual; + } + + std::string file_name_; + WriteFile write_file_; +}; + +// No offset or limit: queryByRow(0, -1) returns the same rows as full query. +TEST_F(TableQueryByRowTest, NoOffsetNoLimit) { + int num_rows = 50; + write_single_device_file(num_rows); + + auto all = query_all_s1("t1", {"id1", "s1", "s2"}); + auto result = query_by_row_s1("t1", {"id1", "s1", "s2"}, 0, -1); + ASSERT_EQ(result.size(), all.size()); + ASSERT_EQ(result, all); +} + +// Offset only: skip first N rows, return the rest; limit=-1 means no cap. +TEST_F(TableQueryByRowTest, OffsetOnly) { + int num_rows = 50; + write_single_device_file(num_rows); + + auto all = query_all_s1("t1", {"id1", "s1", "s2"}); + int offset = 20; + auto result = query_by_row_s1("t1", {"id1", "s1", "s2"}, offset, -1); + ASSERT_EQ(result.size(), static_cast(num_rows - offset)); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i], all[i + offset]); + } +} + +// Limit only: return at most M rows from the start; offset=0. +TEST_F(TableQueryByRowTest, LimitOnly) { + int num_rows = 50; + write_single_device_file(num_rows); + + auto all = query_all_s1("t1", {"id1", "s1", "s2"}); + int limit = 10; + auto result = query_by_row_s1("t1", {"id1", "s1", "s2"}, 0, limit); + ASSERT_EQ(result.size(), static_cast(limit)); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i], all[i]); + } +} + +// Both offset and limit: skip first N rows, then return at most M rows. +TEST_F(TableQueryByRowTest, OffsetAndLimit) { + int num_rows = 100; + write_single_device_file(num_rows); + + auto all = query_all_s1("t1", {"id1", "s1", "s2"}); + int offset = 30; + int limit = 25; + auto result = query_by_row_s1("t1", {"id1", "s1", "s2"}, offset, limit); + ASSERT_EQ(result.size(), static_cast(limit)); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i], all[i + offset]); + } +} + +// Offset beyond total row count: returns empty result. +TEST_F(TableQueryByRowTest, OffsetBeyondData) { + int num_rows = 30; + write_single_device_file(num_rows); + + auto result = query_by_row_s1("t1", {"id1", "s1", "s2"}, 100, -1); + ASSERT_EQ(result.size(), 0u); +} + +// Limit zero: returns no rows (no data read). +TEST_F(TableQueryByRowTest, LimitZero) { + int num_rows = 30; + write_single_device_file(num_rows); + + auto result = query_by_row_s1("t1", {"id1", "s1", "s2"}, 0, 0); + ASSERT_EQ(result.size(), 0u); +} + +// Offset + limit exceeds total: returns all rows after offset (less than +// limit). +TEST_F(TableQueryByRowTest, OffsetPlusLimitExceedsTotal) { + int num_rows = 50; + write_single_device_file(num_rows); + + auto all = query_all_s1("t1", {"id1", "s1", "s2"}); + int offset = 40; + int limit = 100; + auto result = query_by_row_s1("t1", {"id1", "s1", "s2"}, offset, limit); + ASSERT_EQ(result.size(), static_cast(num_rows - offset)); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i], all[i + offset]); + } +} + +// Multi-device, no offset/limit: queryByRow(0, -1) matches full query order. +TEST_F(TableQueryByRowTest, MultiDeviceNoOffset) { + int rows_per_device = 20; + int device_count = 3; + write_multi_device_file(rows_per_device, device_count); + + auto all = query_all_s1("t1", {"id1", "s1"}); + auto result = query_by_row_s1("t1", {"id1", "s1"}, 0, -1); + ASSERT_EQ(result.size(), all.size()); + ASSERT_EQ(result, all); +} + +// Multi-device, offset within first device: skip applies to global row order. +TEST_F(TableQueryByRowTest, MultiDeviceOffsetWithinFirstDevice) { + int rows_per_device = 20; + int device_count = 3; + write_multi_device_file(rows_per_device, device_count); + + auto all = query_all_s1("t1", {"id1", "s1"}); + int offset = 5; + auto result = query_by_row_s1("t1", {"id1", "s1"}, offset, -1); + ASSERT_EQ(result.size(), all.size() - offset); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i], all[i + offset]); + } +} + +// Multi-device, offset skips entire first device(s): verifies device-level +// skip. +TEST_F(TableQueryByRowTest, MultiDeviceOffsetSkipsEntireDevice) { + int rows_per_device = 20; + int device_count = 3; + write_multi_device_file(rows_per_device, device_count); + + auto all = query_all_s1("t1", {"id1", "s1"}); + int offset = 25; + int limit = 10; + auto result = query_by_row_s1("t1", {"id1", "s1"}, offset, limit); + ASSERT_EQ(result.size(), static_cast(limit)); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i], all[i + offset]); + } +} + +// Multi-device, offset and limit span device boundary: correct cross-device +// slice. +TEST_F(TableQueryByRowTest, MultiDeviceOffsetSpansDeviceBoundary) { + int rows_per_device = 20; + int device_count = 3; + write_multi_device_file(rows_per_device, device_count); + + auto all = query_all_s1("t1", {"id1", "s1"}); + int offset = 18; + int limit = 15; + auto result = query_by_row_s1("t1", {"id1", "s1"}, offset, limit); + ASSERT_EQ(result.size(), static_cast(limit)); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i], all[i + offset]); + } +} + +// Multi-device, offset beyond all data: returns empty. +TEST_F(TableQueryByRowTest, MultiDeviceOffsetSkipsAllDevices) { + int rows_per_device = 10; + int device_count = 3; + write_multi_device_file(rows_per_device, device_count); + + auto result = query_by_row_s1("t1", {"id1", "s1"}, 100, 10); + ASSERT_EQ(result.size(), 0u); +} + +// Single device: queryByRow(offset, limit) equals full query + manual +// skip/limit in app. +TEST_F(TableQueryByRowTest, EquivalenceWithManualSkip) { + int num_rows = 200; + write_single_device_file(num_rows); + + int offset = 73; + int limit = 42; + + auto by_row = query_by_row_s1("t1", {"id1", "s1", "s2"}, offset, limit); + + TsFileReader reader; + ASSERT_EQ(reader.open(file_name_), E_OK); + ResultSet* rs = nullptr; + ASSERT_EQ(reader.query("t1", {"id1", "s1", "s2"}, INT64_MIN, INT64_MAX, rs), + E_OK); + std::vector manual; + bool has_next = false; + int skipped = 0; + int collected = 0; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (skipped < offset) { + skipped++; + continue; + } + if (collected >= limit) break; + manual.push_back(rs->get_value("s1")); + collected++; + } + reader.destroy_query_data_set(rs); + reader.close(); + + ASSERT_EQ(by_row.size(), manual.size()); + ASSERT_EQ(by_row, manual); +} + +// Multi-device: queryByRow(offset, limit) equals full query + manual skip/limit +// in app. +TEST_F(TableQueryByRowTest, MultiDeviceEquivalenceWithManualSkip) { + int rows_per_device = 30; + int device_count = 4; + write_multi_device_file(rows_per_device, device_count); + + int offset = 50; + int limit = 40; + + auto by_row = query_by_row_s1("t1", {"id1", "s1"}, offset, limit); + + TsFileReader reader; + ASSERT_EQ(reader.open(file_name_), E_OK); + ResultSet* rs = nullptr; + ASSERT_EQ(reader.query("t1", {"id1", "s1"}, INT64_MIN, INT64_MAX, rs), + E_OK); + std::vector manual; + bool has_next = false; + int skipped = 0; + int collected = 0; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (skipped < offset) { + skipped++; + continue; + } + if (collected >= limit) break; + manual.push_back(rs->get_value("s1")); + collected++; + } + reader.destroy_query_data_set(rs); + reader.close(); + + ASSERT_EQ(by_row.size(), manual.size()); + ASSERT_EQ(by_row, manual); +} + +// Large single-device dataset: offset and limit correctness with many rows. +TEST_F(TableQueryByRowTest, LargeDatasetOffsetLimit) { + int num_rows = 5000; + write_single_device_file(num_rows); + + auto all = query_all_s1("t1", {"id1", "s1", "s2"}); + int offset = 2500; + int limit = 1000; + auto result = query_by_row_s1("t1", {"id1", "s1", "s2"}, offset, limit); + ASSERT_EQ(result.size(), static_cast(limit)); + for (size_t i = 0; i < result.size(); i++) { + ASSERT_EQ(result[i], all[i + offset]); + } +} + +TEST_F(TableQueryByRowTest, DenseAlignedNullsMustUseTimeRowCount) { + const int rows_per_batch = 200; + const int num_batches = 4; + write_single_device_sparse_multi_chunk_with_equal_missing( + rows_per_batch, num_batches, /*memory_threshold_bytes=*/8 * 1024, + /*null_start=*/250, /*null_end=*/550); + + const int offset = 260; + const int limit = 100; + + const std::vector cols = {"id1", "s1", "s2"}; + auto by_row = query_by_row_time_and_s1("t1", cols, offset, limit); + auto manual = query_manual_time_and_s1("t1", cols, offset, limit); + + ASSERT_EQ(by_row.size(), manual.size()); + ASSERT_EQ(by_row, manual); +} + +// SSI-level pushdown: dense single-device data with multiple Chunks per column. +// set_row_range(offset, limit) is applied to each column's SSI; SSI skips whole +// Chunks/Pages by ChunkMeta/PageHeader count without decoding. Multi-chunk +// file is produced by small memory_threshold and multiple flush; offset/limit +// are chosen so that at least one Chunk is skipped and result is correct. +TEST_F(TableQueryByRowTest, DenseSingleDeviceSsiLevelPushdown) { + const int rows_per_batch = 300; + const int num_batches = 4; + write_single_device_dense_multi_chunk(rows_per_batch, num_batches, + 8 * 1024); + + int offset = 400; + int limit = 200; + auto by_row = query_by_row_s1("t1", {"id1", "s1", "s2"}, offset, limit); + + TsFileReader reader; + ASSERT_EQ(reader.open(file_name_), E_OK); + ResultSet* rs = nullptr; + ASSERT_EQ(reader.query("t1", {"id1", "s1", "s2"}, INT64_MIN, INT64_MAX, rs), + E_OK); + std::vector manual; + bool has_next = false; + int skipped = 0; + int collected = 0; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (skipped < offset) { + skipped++; + continue; + } + if (collected >= limit) break; + manual.push_back(rs->get_value("s1")); + collected++; + } + reader.destroy_query_data_set(rs); + reader.close(); + + ASSERT_EQ(by_row.size(), manual.size()); + ASSERT_EQ(by_row, manual); +} + +// Pushdown is faster than full query + manual next: queryByRow(offset, limit) +// skips at device/SSI/Chunk level; old query then manual next decodes every +// row. Timing tolerance 5% to allow measurement noise. +TEST_F(TableQueryByRowTest, QueryByRowFasterThanManualNext) { + const int num_rows = 8000; + const int offset = 3000; + const int limit = 1000; + write_single_device_file(num_rows); + + const int num_iters = 5; + const double tolerance = 0.1; // 10% tolerance to allow for timing noise + + auto run_query_by_row = [this, offset, limit]() { + TsFileReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.queryByRow("t1", {"id1", "s1", "s2"}, offset, limit, rs) != + E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + while (IS_SUCC(rs->next(has_next)) && has_next) { + (void)rs->get_value("s1"); + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start).count(); + }; + + auto run_manual_next = [this, offset, limit]() { + TsFileReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.query("t1", {"id1", "s1", "s2"}, INT64_MIN, INT64_MAX, rs) != + E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + int skipped = 0; + int taken = 0; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (skipped < offset) { + skipped++; + continue; + } + if (taken >= limit) break; + (void)rs->get_value("s1"); + taken++; + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start).count(); + }; + + double min_by_row = 1e9; + double min_manual = 1e9; + for (int i = 0; i < num_iters; i++) { + double t1 = run_query_by_row(); + double t2 = run_manual_next(); + if (t1 > 0 && t1 < min_by_row) min_by_row = t1; + if (t2 > 0 && t2 < min_manual) min_manual = t2; + } + ASSERT_GT(min_manual, 0.0) << "manual next timed run failed"; + ASSERT_GT(min_by_row, 0.0) << "queryByRow timed run failed"; + EXPECT_LT(min_by_row, min_manual * (1.0 + tolerance)) + << "queryByRow (pushdown) should be faster than query+manual next " + "(min_by_row=" + << min_by_row << " ms, min_manual=" << min_manual << " ms)"; +} diff --git a/cpp/test/reader/tree_view/tsfile_tree_query_by_row_test.cc b/cpp/test/reader/tree_view/tsfile_tree_query_by_row_test.cc new file mode 100644 index 000000000..56f8c113a --- /dev/null +++ b/cpp/test/reader/tree_view/tsfile_tree_query_by_row_test.cc @@ -0,0 +1,1179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include + +#include +#include + +#include "common/global.h" +#include "common/record.h" +#include "common/schema.h" +#include "file/write_file.h" +#include "reader/tsfile_reader.h" +#include "reader/tsfile_tree_reader.h" +#include "writer/tsfile_tree_writer.h" + +using namespace storage; +using namespace common; + +class TreeQueryByRowTest : public ::testing::Test { + protected: + void SetUp() override { + libtsfile_init(); + file_name_ = std::string("tree_query_by_row_test_") + + generate_random_string(10) + std::string(".tsfile"); + remove(file_name_.c_str()); + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + mode_t mode = 0666; + write_file_.create(file_name_, flags, mode); + } + + void TearDown() override { + remove(file_name_.c_str()); + libtsfile_destroy(); + } + + static std::string generate_random_string(int length) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(0, 61); + const std::string chars = + "0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + std::string result; + for (int i = 0; i < length; ++i) { + result += chars[dis(gen)]; + } + return result; + } + + // Write a simple tsfile with given devices, measurements, and row count. + // Each device has all measurements, timestamps are 0, 1, ..., num_rows-1. + void write_test_file(const std::vector& device_ids, + const std::vector& measurement_ids, + int num_rows) { + TsFileTreeWriter writer(&write_file_); + for (auto device_id : device_ids) { + for (auto& measurement : measurement_ids) { + auto* schema = + new MeasurementSchema(measurement, TSDataType::INT64); + ASSERT_EQ(E_OK, writer.register_timeseries(device_id, schema)); + delete schema; + } + } + for (int row = 0; row < num_rows; ++row) { + for (const auto& device_id : device_ids) { + TsRecord record(device_id, static_cast(row)); + for (size_t m = 0; m < measurement_ids.size(); ++m) { + record.add_point(measurement_ids[m], + static_cast(row * 100 + m)); + } + ASSERT_EQ(E_OK, writer.write(record)); + } + } + writer.flush(); + writer.close(); + } + + // Helper: collect all rows from a result set into a vector of timestamps. + std::vector collect_timestamps(ResultSet* result_set) { + std::vector timestamps; + bool has_next = false; + while (IS_SUCC(result_set->next(has_next)) && has_next) { + timestamps.push_back(result_set->get_row_record()->get_timestamp()); + } + return timestamps; + } + + std::string file_name_; + WriteFile write_file_; +}; + +// Basic test: queryByRow returns correct total count with no offset/limit. +TEST_F(TreeQueryByRowTest, NoOffsetNoLimit) { + std::vector devices = {"d1"}; + std::vector measurements = {"s1"}; + int num_rows = 10; + write_test_file(devices, measurements, num_rows); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow(devices, measurements, 0, -1, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + EXPECT_EQ(timestamps.size(), static_cast(num_rows)); + for (int i = 0; i < num_rows; ++i) { + EXPECT_EQ(timestamps[i], i); + } + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Test: offset skips leading rows. +TEST_F(TreeQueryByRowTest, OffsetOnly) { + std::vector devices = {"d1"}; + std::vector measurements = {"s1"}; + int num_rows = 10; + write_test_file(devices, measurements, num_rows); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + int offset = 3; + ASSERT_EQ(E_OK, + reader.queryByRow(devices, measurements, offset, -1, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + EXPECT_EQ(timestamps.size(), static_cast(num_rows - offset)); + for (size_t i = 0; i < timestamps.size(); ++i) { + EXPECT_EQ(timestamps[i], static_cast(i + offset)); + } + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Test: limit caps the number of rows returned. +TEST_F(TreeQueryByRowTest, LimitOnly) { + std::vector devices = {"d1"}; + std::vector measurements = {"s1"}; + int num_rows = 10; + write_test_file(devices, measurements, num_rows); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + int limit = 5; + ASSERT_EQ(E_OK, reader.queryByRow(devices, measurements, 0, limit, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + EXPECT_EQ(timestamps.size(), static_cast(limit)); + for (int i = 0; i < limit; ++i) { + EXPECT_EQ(timestamps[i], i); + } + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Test: offset + limit combined. +TEST_F(TreeQueryByRowTest, OffsetAndLimit) { + std::vector devices = {"d1"}; + std::vector measurements = {"s1"}; + int num_rows = 20; + write_test_file(devices, measurements, num_rows); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + int offset = 5; + int limit = 7; + ASSERT_EQ(E_OK, + reader.queryByRow(devices, measurements, offset, limit, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + EXPECT_EQ(timestamps.size(), static_cast(limit)); + for (int i = 0; i < limit; ++i) { + EXPECT_EQ(timestamps[i], static_cast(i + offset)); + } + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Test: offset exceeds total rows → empty result. +TEST_F(TreeQueryByRowTest, OffsetExceedsTotalRows) { + std::vector devices = {"d1"}; + std::vector measurements = {"s1"}; + int num_rows = 5; + write_test_file(devices, measurements, num_rows); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow(devices, measurements, 100, -1, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + EXPECT_EQ(timestamps.size(), 0u); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Test: limit=0 → empty result. +TEST_F(TreeQueryByRowTest, LimitZero) { + std::vector devices = {"d1"}; + std::vector measurements = {"s1"}; + int num_rows = 10; + write_test_file(devices, measurements, num_rows); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow(devices, measurements, 0, 0, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + EXPECT_EQ(timestamps.size(), 0u); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Test: multi-path (multiple devices, same measurement) merged by time. +// All devices write at same timestamps, so merged row count = num_rows. +TEST_F(TreeQueryByRowTest, MultiPathMerge) { + std::vector devices = {"d1", "d2", "d3"}; + std::vector measurements = {"s1"}; + int num_rows = 10; + write_test_file(devices, measurements, num_rows); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + // Without offset/limit, should get num_rows rows (merged by time). + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow(devices, measurements, 0, -1, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + // Timestamps 0..9, each appearing once (all devices have same timestamps). + EXPECT_EQ(timestamps.size(), static_cast(num_rows)); + for (int i = 0; i < num_rows; ++i) { + EXPECT_EQ(timestamps[i], i); + } + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Test: multi-path with offset and limit. +TEST_F(TreeQueryByRowTest, MultiPathOffsetLimit) { + std::vector devices = {"d1", "d2"}; + std::vector measurements = {"s1"}; + int num_rows = 20; + write_test_file(devices, measurements, num_rows); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + int offset = 5; + int limit = 8; + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, + reader.queryByRow(devices, measurements, offset, limit, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + EXPECT_EQ(timestamps.size(), static_cast(limit)); + for (int i = 0; i < limit; ++i) { + EXPECT_EQ(timestamps[i], static_cast(i + offset)); + } + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Test: single path with multiple measurements. +TEST_F(TreeQueryByRowTest, SingleDeviceMultipleMeasurements) { + std::vector devices = {"d1"}; + std::vector measurements = {"s1", "s2", "s3"}; + int num_rows = 15; + write_test_file(devices, measurements, num_rows); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + int offset = 3; + int limit = 5; + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, + reader.queryByRow(devices, measurements, offset, limit, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + // Multiple measurements from same device at same time → merged row count. + EXPECT_EQ(timestamps.size(), static_cast(limit)); + for (int i = 0; i < limit; ++i) { + EXPECT_EQ(timestamps[i], static_cast(i + offset)); + } + + // Verify values in the returned rows. + reader.destroy_query_data_set(result); + + // Re-query without offset/limit for verification. + ResultSet* result2 = nullptr; + ASSERT_EQ(E_OK, + reader.queryByRow(devices, measurements, offset, limit, result2)); + ASSERT_NE(result2, nullptr); + + bool has_next = false; + int row_count = 0; + while (IS_SUCC(result2->next(has_next)) && has_next) { + RowRecord* rr = result2->get_row_record(); + int64_t ts = rr->get_timestamp(); + int expected_row = static_cast(ts); + // Check first measurement value (s1). + if (!result2->is_null(1)) { + // Column 0 is time, column 1 is first path + Field* f = rr->get_field(1); + if (f != nullptr && f->type_ == INT64) { + EXPECT_EQ(f->get_value(), + static_cast(expected_row * 100 + 0)); + } + } + row_count++; + } + EXPECT_EQ(row_count, limit); + + reader.destroy_query_data_set(result2); + reader.close(); +} + +// Test: limit larger than available rows → returns all rows. +TEST_F(TreeQueryByRowTest, LimitLargerThanAvailable) { + std::vector devices = {"d1"}; + std::vector measurements = {"s1"}; + int num_rows = 5; + write_test_file(devices, measurements, num_rows); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow(devices, measurements, 0, 100, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + EXPECT_EQ(timestamps.size(), static_cast(num_rows)); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Test: larger dataset to exercise chunk/page boundaries. +TEST_F(TreeQueryByRowTest, LargeDatasetOffsetLimit) { + std::vector devices = {"d1"}; + std::vector measurements = {"s1"}; + int num_rows = 5000; + write_test_file(devices, measurements, num_rows); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + int offset = 1000; + int limit = 500; + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, + reader.queryByRow(devices, measurements, offset, limit, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + EXPECT_EQ(timestamps.size(), static_cast(limit)); + for (int i = 0; i < limit; ++i) { + EXPECT_EQ(timestamps[i], static_cast(i + offset)); + } + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Test: multi-device multi-measurement with interleaved timestamps. +TEST_F(TreeQueryByRowTest, MultiDeviceMultiMeasurementInterleaved) { + // Device d1 has timestamps 0,2,4,6,... + // Device d2 has timestamps 1,3,5,7,... + // After merge, rows are 0,1,2,3,... + TsFileTreeWriter writer(&write_file_); + std::string device1 = "d1"; + std::string device2 = "d2"; + std::string measurement = "s1"; + + auto* schema1 = new MeasurementSchema(measurement, TSDataType::INT64); + auto* schema2 = new MeasurementSchema(measurement, TSDataType::INT64); + ASSERT_EQ(E_OK, writer.register_timeseries(device1, schema1)); + ASSERT_EQ(E_OK, writer.register_timeseries(device2, schema2)); + delete schema1; + delete schema2; + + int num_per_device = 10; + for (int i = 0; i < num_per_device; ++i) { + TsRecord r1(device1, static_cast(i * 2)); + r1.add_point(measurement, static_cast(i * 2)); + ASSERT_EQ(E_OK, writer.write(r1)); + + TsRecord r2(device2, static_cast(i * 2 + 1)); + r2.add_point(measurement, static_cast(i * 2 + 1)); + ASSERT_EQ(E_OK, writer.write(r2)); + } + writer.flush(); + writer.close(); + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + std::vector devices = {device1, device2}; + std::vector measurements = {measurement}; + + // Total merged rows = 20, offset=5, limit=8 → rows 5..12 + int offset = 5; + int limit = 8; + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, + reader.queryByRow(devices, measurements, offset, limit, result)); + ASSERT_NE(result, nullptr); + + auto timestamps = collect_timestamps(result); + EXPECT_EQ(timestamps.size(), static_cast(limit)); + for (int i = 0; i < limit; ++i) { + EXPECT_EQ(timestamps[i], static_cast(i + offset)); + } + + reader.destroy_query_data_set(result); + reader.close(); +} + +// ============================================================ +// Helpers for chunk/page boundary tests +// ============================================================ + +// PageGuard: RAII wrapper that temporarily sets page_writer_max_point_num_ +// so that every `page_size` written data points a new page is flushed. +struct PageGuard { + uint32_t saved_; + explicit PageGuard(uint32_t page_size) { + saved_ = g_config_value_.page_writer_max_point_num_; + g_config_value_.page_writer_max_point_num_ = page_size; + } + ~PageGuard() { g_config_value_.page_writer_max_point_num_ = saved_; } +}; + +// Write `num_rows` rows to `device`/`measurement` starting at timestamp +// `t_start`. Flushes the writer after every `flush_every` rows to force a +// new Chunk boundary. All values are INT64 = timestamp. +// Pass flush_every=0 to disable mid-write flushes. +static void write_single_path_multi_chunk(TsFileTreeWriter& writer, + const std::string& device, + const std::string& measurement, + int64_t t_start, int num_rows, + int flush_every) { + auto* schema = new MeasurementSchema(measurement, TSDataType::INT64); + auto device_name = device; + ASSERT_EQ(E_OK, writer.register_timeseries(device_name, schema)); + delete schema; + for (int r = 0; r < num_rows; ++r) { + int64_t ts = t_start + r; + TsRecord rec(device, ts); + rec.add_point(measurement, ts); + ASSERT_EQ(E_OK, writer.write(rec)); + if (flush_every > 0 && (r + 1) % flush_every == 0 && r + 1 < num_rows) { + ASSERT_EQ(E_OK, writer.flush()); + } + } + ASSERT_EQ(E_OK, writer.flush()); +} + +// ============================================================ +// Single-path: skip entire Chunks via count-based offset pushdown +// ============================================================ +// +// Layout: page_size=10, 3 chunks x 30 rows each (forced by flush). +// Each Chunk has Statistic.count=30. +// should_skip_chunk_by_offset fires when remaining_offset >= chunk.count. +// +// Chunk1 [t=0..29, count=30] +// Chunk2 [t=30..59, count=30] +// Chunk3 [t=60..89, count=30] + +// offset exactly equals one chunk: Chunk1 is skipped wholesale. +TEST_F(TreeQueryByRowTest, SinglePath_SkipChunk_OffsetEqualsOneChunk) { + PageGuard pg(10); // 10 pts/page -> 3 pages/chunk + { + TsFileTreeWriter writer(&write_file_); + write_single_path_multi_chunk(writer, "d1", "s1", 0, 90, 30); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + // offset=30 -> count(Chunk1)=30 == offset -> skip Chunk1. + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1"}, {"s1"}, 30, 10, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 10u); + for (int i = 0; i < 10; ++i) EXPECT_EQ(ts[i], 30 + i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// offset equals two chunk counts: both Chunk1 and Chunk2 are skipped. +TEST_F(TreeQueryByRowTest, SinglePath_SkipChunk_OffsetEqualsTwoChunks) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + write_single_path_multi_chunk(writer, "d1", "s1", 0, 90, 30); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + // offset=60 -> skip Chunk1 (offset->30) then Chunk2 (offset->0). + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1"}, {"s1"}, 60, 5, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 5u); + for (int i = 0; i < 5; ++i) EXPECT_EQ(ts[i], 60 + i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// offset = chunk_count - 1: Chunk1 cannot be skipped (count=30 > 29); +// 29 rows consumed inside Chunk1, then result spans into Chunk2. +TEST_F(TreeQueryByRowTest, SinglePath_OffsetJustBeforeChunkBoundary) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + write_single_path_multi_chunk(writer, "d1", "s1", 0, 60, 30); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1"}, {"s1"}, 29, 10, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 10u); + for (int i = 0; i < 10; ++i) EXPECT_EQ(ts[i], 29 + i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// offset = chunk_count + 1: Chunk1 is skipped; 1 row consumed inside +// Chunk2; result starts at t=31. +TEST_F(TreeQueryByRowTest, SinglePath_OffsetJustAfterChunkBoundary) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + write_single_path_multi_chunk(writer, "d1", "s1", 0, 60, 30); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1"}, {"s1"}, 31, 5, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 5u); + for (int i = 0; i < 5; ++i) EXPECT_EQ(ts[i], 31 + i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// ============================================================ +// Single-path: skip entire Pages via count-based offset pushdown +// ============================================================ +// +// Layout: page_size=10, 30 rows in ONE chunk -> 3 pages. +// Each Page has Statistic.count=10. +// should_skip_page_by_offset fires when remaining_offset >= page.count. +// +// Page1 [t=0..9, count=10] +// Page2 [t=10..19, count=10] +// Page3 [t=20..29, count=10] + +// offset exactly equals one page: Page1 is skipped wholesale. +TEST_F(TreeQueryByRowTest, SinglePath_SkipPage_OffsetEqualsOnePage) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + write_single_path_multi_chunk(writer, "d1", "s1", 0, 30, 0); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + // offset=10 -> count(Page1)=10 == offset -> skip Page1. + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1"}, {"s1"}, 10, 5, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 5u); + for (int i = 0; i < 5; ++i) EXPECT_EQ(ts[i], 10 + i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// offset equals two page counts: Page1 + Page2 are both skipped. +TEST_F(TreeQueryByRowTest, SinglePath_SkipPage_OffsetEqualsTwoPages) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + write_single_path_multi_chunk(writer, "d1", "s1", 0, 30, 0); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + // offset=20 -> skip Page1 (offset->10) then Page2 (offset->0). + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1"}, {"s1"}, 20, 5, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 5u); + for (int i = 0; i < 5; ++i) EXPECT_EQ(ts[i], 20 + i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// offset = page_count - 1: Page1 cannot be skipped (count=10 > 9); +// 9 rows consumed row-by-row inside Page1, then result spans Page2. +TEST_F(TreeQueryByRowTest, SinglePath_SkipPage_OffsetJustBeforePageBoundary) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + write_single_path_multi_chunk(writer, "d1", "s1", 0, 30, 0); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1"}, {"s1"}, 9, 5, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 5u); + for (int i = 0; i < 5; ++i) EXPECT_EQ(ts[i], 9 + i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// ============================================================ +// Single-path: early termination via row_limit_ = 0 +// ============================================================ + +// limit < page_size: stop inside the first page. +// row_limit_ reaches 0 mid-page; subsequent pages/chunks must not load. +TEST_F(TreeQueryByRowTest, SinglePath_LimitStopsMidPage) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + write_single_path_multi_chunk(writer, "d1", "s1", 0, 30, 0); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1"}, {"s1"}, 0, 3, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 3u); + for (int i = 0; i < 3; ++i) EXPECT_EQ(ts[i], i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// limit = exactly one page: stop at the page boundary. +TEST_F(TreeQueryByRowTest, SinglePath_LimitEqualsOnePage) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + write_single_path_multi_chunk(writer, "d1", "s1", 0, 30, 0); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1"}, {"s1"}, 0, 10, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 10u); + for (int i = 0; i < 10; ++i) EXPECT_EQ(ts[i], i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// limit = exactly one chunk (3 pages): stop at the chunk boundary. +TEST_F(TreeQueryByRowTest, SinglePath_LimitEqualsOneChunk) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + write_single_path_multi_chunk(writer, "d1", "s1", 0, 90, 30); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + // limit=30 -> consume exactly Chunk1, row_limit_=0 prevents Chunk2. + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1"}, {"s1"}, 0, 30, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 30u); + for (int i = 0; i < 30; ++i) EXPECT_EQ(ts[i], i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// offset skips 2 chunks; limit stops mid-page inside the 3rd chunk. +TEST_F(TreeQueryByRowTest, SinglePath_SkipTwoChunksThenLimitMidPage) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + write_single_path_multi_chunk(writer, "d1", "s1", 0, 90, 30); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1"}, {"s1"}, 60, 7, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 7u); + for (int i = 0; i < 7; ++i) EXPECT_EQ(ts[i], 60 + i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// ============================================================ +// Multi-path: offset/limit pushdown with multiple chunks per path +// ============================================================ +// +// For multi-path, per-path chunk/page count skip is disabled because +// merged row order does not equal per-path row order. Offset/limit +// are consumed at the merge layer row-by-row. These tests verify +// correctness while exercising the multi-chunk merge path including +// get_next_tsblock_with_hint (min_time_hint forwarding). + +TEST_F(TreeQueryByRowTest, MultiPath_OffsetLimitWithMultipleChunks) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + for (auto dev : {"d1", "d2"}) { + auto* s = new MeasurementSchema("s1", TSDataType::INT64); + std::string device_name = dev; + ASSERT_EQ(E_OK, writer.register_timeseries(device_name, s)); + delete s; + } + // 60 timestamps, flush every 20 -> 3 chunks per device. + for (int r = 0; r < 60; ++r) { + for (auto& dev : {"d1", "d2"}) { + TsRecord rec(dev, static_cast(r)); + rec.add_point("s1", static_cast(r)); + ASSERT_EQ(E_OK, writer.write(rec)); + } + if ((r + 1) % 20 == 0 && r + 1 < 60) { + ASSERT_EQ(E_OK, writer.flush()); + } + } + ASSERT_EQ(E_OK, writer.flush()); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + // offset=25, limit=10 -> merged rows t=25..34 + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1", "d2"}, {"s1"}, 25, 10, result)); + ASSERT_NE(result, nullptr); + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 10u); + for (int i = 0; i < 10; ++i) EXPECT_EQ(ts[i], 25 + i); + reader.destroy_query_data_set(result); + + // offset=40 (crosses chunk boundary) -> rows t=40..49 + ASSERT_EQ(E_OK, reader.queryByRow({"d1", "d2"}, {"s1"}, 40, 10, result)); + ASSERT_NE(result, nullptr); + ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 10u); + for (int i = 0; i < 10; ++i) EXPECT_EQ(ts[i], 40 + i); + reader.destroy_query_data_set(result); + + reader.close(); +} + +// Two devices with interleaved timestamps (d1=even, d2=odd), multiple +// chunks each. Merged stream is t=0,1,2,...,79 (80 rows). +TEST_F(TreeQueryByRowTest, MultiPath_InterleavedTimestamps_MultipleChunks) { + PageGuard pg(5); + { + std::string d1 = "d1", d2 = "d2"; + TsFileTreeWriter writer(&write_file_); + auto* s1 = new MeasurementSchema("s1", TSDataType::INT64); + auto* s2 = new MeasurementSchema("s1", TSDataType::INT64); + ASSERT_EQ(E_OK, writer.register_timeseries(d1, s1)); + ASSERT_EQ(E_OK, writer.register_timeseries(d2, s2)); + delete s1; + delete s2; + + // d1: t=0,2,4,...,78 d2: t=1,3,5,...,79 flush every 20 pairs. + for (int i = 0; i < 40; ++i) { + TsRecord r1("d1", static_cast(i * 2)); + r1.add_point("s1", static_cast(i * 2)); + ASSERT_EQ(E_OK, writer.write(r1)); + + TsRecord r2("d2", static_cast(i * 2 + 1)); + r2.add_point("s1", static_cast(i * 2 + 1)); + ASSERT_EQ(E_OK, writer.write(r2)); + + if ((i + 1) % 20 == 0 && i + 1 < 40) { + ASSERT_EQ(E_OK, writer.flush()); + } + } + ASSERT_EQ(E_OK, writer.flush()); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + // offset=30, limit=20 -> t=30..49 + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1", "d2"}, {"s1"}, 30, 20, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 20u); + for (int i = 0; i < 20; ++i) EXPECT_EQ(ts[i], 30 + i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Three devices, offset at exact chunk boundary, limit cuts mid-chunk. +TEST_F(TreeQueryByRowTest, MultiPath_OffsetAtMergedChunkBoundary) { + PageGuard pg(10); + { + TsFileTreeWriter writer(&write_file_); + for (auto& dev : {"d1", "d2", "d3"}) { + auto* s = new MeasurementSchema("s1", TSDataType::INT64); + std::string device_name = dev; + ASSERT_EQ(E_OK, writer.register_timeseries(device_name, s)); + delete s; + } + for (int r = 0; r < 60; ++r) { + for (auto& dev : {"d1", "d2", "d3"}) { + TsRecord rec(dev, static_cast(r)); + rec.add_point("s1", static_cast(r)); + ASSERT_EQ(E_OK, writer.write(rec)); + } + if ((r + 1) % 20 == 0 && r + 1 < 60) { + ASSERT_EQ(E_OK, writer.flush()); + } + } + ASSERT_EQ(E_OK, writer.flush()); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + // offset=20 -> skip first 20 merged rows, get t=20..26 + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, + reader.queryByRow({"d1", "d2", "d3"}, {"s1"}, 20, 7, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 7u); + for (int i = 0; i < 7; ++i) EXPECT_EQ(ts[i], 20 + i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// ============================================================ +// Multi-path: min_time_hint skips stale (out-of-order) chunks +// ============================================================ +// +// TsFile allows multiple flushes for the same path. A later flush can +// produce a chunk whose time range is entirely BEFORE the current merge +// cursor. The min_time_hint pushdown (should_skip_chunk_by_time) detects +// end_time < hint and skips that chunk to avoid re-inserting already-passed +// timestamps into the heap. +// +// File layout: +// d2 flush-1: t=50..59 -> chunk-d2-1 (end_time=59) +// d1 flush-2: t=0..99 -> chunk-d1-1 +// d2 flush-3: t=0..9 -> chunk-d2-2 (end_time=9) <- stale! +// +// When the merge exhausts chunk-d2-1 (cursor ~59), it calls +// get_next_tsblock_with_hint(d2, hint=59). chunk-d2-2 has end_time=9 < 59 +// -> should_skip_chunk_by_time returns true -> chunk-d2-2 is skipped. +// +// Observable effect: +// - Total merged rows = 100 (all from d1, t=0..99). +// - d2 is non-null only at t=50..59 (the 10 rows from chunk-d2-1). +// - No out-of-order or duplicate timestamps. +TEST_F(TreeQueryByRowTest, MultiPath_TimeHint_SkipsStaleChunk) { + { + TsFileTreeWriter writer(&write_file_); + std::string d1 = "d1", d2 = "d2"; + auto* sd1 = new MeasurementSchema("s1", TSDataType::INT64); + auto* sd2 = new MeasurementSchema("s1", TSDataType::INT64); + ASSERT_EQ(E_OK, writer.register_timeseries(d1, sd1)); + ASSERT_EQ(E_OK, writer.register_timeseries(d2, sd2)); + delete sd1; + delete sd2; + + // d2 chunk1: t=50..59 + for (int64_t t = 50; t < 60; ++t) { + TsRecord rec("d2", t); + rec.add_point("s1", t * 10); + ASSERT_EQ(E_OK, writer.write(rec)); + } + ASSERT_EQ(E_OK, writer.flush()); + + // d1: t=0..99 + for (int64_t t = 0; t < 100; ++t) { + TsRecord rec("d1", t); + rec.add_point("s1", t); + ASSERT_EQ(E_OK, writer.write(rec)); + } + ASSERT_EQ(E_OK, writer.flush()); + + // d2 chunk2 (stale): t=0..9 + for (int64_t t = 0; t < 10; ++t) { + TsRecord rec("d2", t); + rec.add_point("s1", t * 10); + ASSERT_EQ(E_OK, writer.write(rec)); + } + ASSERT_EQ(E_OK, writer.flush()); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1", "d2"}, {"s1"}, 0, -1, result)); + ASSERT_NE(result, nullptr); + + int d2_nonnull_count = 0; + int64_t prev_ts = INT64_MIN; + size_t total_rows = 0; + bool has_next = false; + while (IS_SUCC(result->next(has_next)) && has_next) { + RowRecord* rr = result->get_row_record(); + int64_t t = rr->get_timestamp(); + + EXPECT_GT(t, prev_ts) + << "Non-monotonic timestamp: " << t << " after " << prev_ts; + prev_ts = t; + total_rows++; + + if (!result->is_null("d2.s1")) { + EXPECT_GE(t, 50) << "d2 non-null at unexpected time " << t; + EXPECT_LT(t, 60) << "d2 non-null at unexpected time " << t; + d2_nonnull_count++; + } + } + + EXPECT_EQ(total_rows, 100u); // d1 drives 100 rows + EXPECT_EQ(d2_nonnull_count, 10); // only chunk-d2-1 (t=50..59) emitted + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Same stale-chunk scenario but with offset/limit applied on top. +// offset=60, limit=10 -> rows t=60..69; d2 is null for all of them. +// Verifies that offset counting is not confused by the skipped stale chunk. +TEST_F(TreeQueryByRowTest, MultiPath_TimeHint_SkipsStaleChunk_WithOffset) { + { + TsFileTreeWriter writer(&write_file_); + std::string d1 = "d1", d2 = "d2"; + auto* sd1 = new MeasurementSchema("s1", TSDataType::INT64); + auto* sd2 = new MeasurementSchema("s1", TSDataType::INT64); + ASSERT_EQ(E_OK, writer.register_timeseries(d1, sd1)); + ASSERT_EQ(E_OK, writer.register_timeseries(d2, sd2)); + delete sd1; + delete sd2; + + for (int64_t t = 50; t < 60; ++t) { + TsRecord rec("d2", t); + rec.add_point("s1", t * 10); + ASSERT_EQ(E_OK, writer.write(rec)); + } + ASSERT_EQ(E_OK, writer.flush()); + + for (int64_t t = 0; t < 100; ++t) { + TsRecord rec("d1", t); + rec.add_point("s1", t); + ASSERT_EQ(E_OK, writer.write(rec)); + } + ASSERT_EQ(E_OK, writer.flush()); + + for (int64_t t = 0; t < 10; ++t) { + TsRecord rec("d2", t); + rec.add_point("s1", t * 10); + ASSERT_EQ(E_OK, writer.write(rec)); + } + ASSERT_EQ(E_OK, writer.flush()); + writer.close(); + } + + TsFileTreeReader reader; + ASSERT_EQ(E_OK, reader.open(file_name_)); + + ResultSet* result = nullptr; + ASSERT_EQ(E_OK, reader.queryByRow({"d1", "d2"}, {"s1"}, 60, 10, result)); + ASSERT_NE(result, nullptr); + + auto ts = collect_timestamps(result); + ASSERT_EQ(ts.size(), 10u); + for (int i = 0; i < 10; ++i) EXPECT_EQ(ts[i], 60 + i); + + reader.destroy_query_data_set(result); + reader.close(); +} + +// Pushdown is faster than full query + manual next: queryByRow(offset, limit) +// skips at Chunk/Page level; old query then manual next decodes every row. +// Timing tolerance 5% to allow measurement noise. +TEST_F(TreeQueryByRowTest, QueryByRowFasterThanManualNext) { + std::vector devices = {"d1"}; + std::vector measurements = {"s1"}; + const int num_rows = 8000; + const int offset = 3000; + const int limit = 1000; + write_test_file(devices, measurements, num_rows); + + const int num_iters = 5; + const double tolerance = 0.05; + + auto run_query_by_row = [this, &devices, &measurements, offset, limit]() { + TsFileTreeReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.queryByRow(devices, measurements, offset, limit, rs) != + E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + while (IS_SUCC(rs->next(has_next)) && has_next) { + (void)rs->get_row_record()->get_timestamp(); + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start).count(); + }; + + auto run_manual_next = [this, &devices, &measurements, offset, limit]() { + TsFileTreeReader reader; + if (reader.open(file_name_) != E_OK) return -1.0; + ResultSet* rs = nullptr; + if (reader.query(devices, measurements, INT64_MIN, INT64_MAX, rs) != + E_OK) { + reader.close(); + return -1.0; + } + auto start = std::chrono::steady_clock::now(); + bool has_next = false; + int skipped = 0; + int taken = 0; + while (IS_SUCC(rs->next(has_next)) && has_next) { + if (skipped < offset) { + skipped++; + continue; + } + if (taken >= limit) break; + (void)rs->get_row_record()->get_timestamp(); + taken++; + } + auto end = std::chrono::steady_clock::now(); + reader.destroy_query_data_set(rs); + reader.close(); + return std::chrono::duration(end - start).count(); + }; + + double min_by_row = 1e9; + double min_manual = 1e9; + for (int i = 0; i < num_iters; i++) { + double t1 = run_query_by_row(); + double t2 = run_manual_next(); + if (t1 > 0 && t1 < min_by_row) min_by_row = t1; + if (t2 > 0 && t2 < min_manual) min_manual = t2; + } + ASSERT_GT(min_manual, 0.0) << "manual next timed run failed"; + ASSERT_GT(min_by_row, 0.0) << "queryByRow timed run failed"; + EXPECT_LT(min_by_row, min_manual * (1.0 + tolerance)) + << "queryByRow (pushdown) should be faster than query+manual next " + "(min_by_row=" + << min_by_row << " ms, min_manual=" << min_manual << " ms)"; +} diff --git a/python/tests/test_query_by_row.py b/python/tests/test_query_by_row.py new file mode 100644 index 000000000..e45cd1b20 --- /dev/null +++ b/python/tests/test_query_by_row.py @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +import os + +import pytest + +from tsfile import ColumnCategory, ColumnSchema, Field, RowRecord, TableSchema, TSDataType +from tsfile import TimeseriesSchema, TsFileReader, TsFileTableWriter, TsFileWriter, Tablet + + +def test_query_tree_by_row_offset_limit(): + file_path = "python_tree_query_by_row_test.tsfile" + if os.path.exists(file_path): + os.remove(file_path) + + try: + device_ids = ["root.d1", "root.d2"] + measurement_names = ["s1", "s2"] + num_rows = 10 + + writer = TsFileWriter(file_path) + for device_id in device_ids: + for measurement in measurement_names: + writer.register_timeseries(device_id, TimeseriesSchema(measurement, TSDataType.INT64)) + + for t in range(num_rows): + for dev_idx, device_id in enumerate(device_ids): + fields = [] + for meas_idx, measurement in enumerate(measurement_names): + value = t * 100 + meas_idx + dev_idx * 10000 + fields.append(Field(measurement, value, TSDataType.INT64)) + writer.write_row_record(RowRecord(device_id, t, fields)) + + writer.close() + + reader = TsFileReader(file_path) + offset = 3 + limit = 5 + with reader.query_tree_by_row(device_ids, measurement_names, offset, limit) as result: + row = 0 + while result.next(): + ts = result.get_value_by_index(1) + assert ts == offset + row + # Column order follows (device_ids outer loop) + (measurement_names inner loop). + assert result.get_value_by_index(2) == ts * 100 + 0 + 0 * 10000 + assert result.get_value_by_index(3) == ts * 100 + 1 + 0 * 10000 + assert result.get_value_by_index(4) == ts * 100 + 0 + 1 * 10000 + assert result.get_value_by_index(5) == ts * 100 + 1 + 1 * 10000 + row += 1 + assert row == limit + reader.close() + finally: + if os.path.exists(file_path): + os.remove(file_path) + + +def test_query_table_by_row_offset_limit(): + file_path = "python_table_query_by_row_test.tsfile" + if os.path.exists(file_path): + os.remove(file_path) + + try: + table_name = "t1" + schema = TableSchema( + table_name, + [ + ColumnSchema("device", TSDataType.STRING, ColumnCategory.TAG), + ColumnSchema("s1", TSDataType.INT64, ColumnCategory.FIELD), + ], + ) + + num_rows = 10 + with TsFileTableWriter(file_path, schema) as writer: + tablet = Tablet(["device", "s1"], [TSDataType.STRING, TSDataType.INT64], num_rows) + for t in range(num_rows): + tablet.add_timestamp(t, t) + tablet.add_value_by_name("device", t, f"device_{t}") + tablet.add_value_by_name("s1", t, t * 10) + writer.write_table(tablet) + + reader = TsFileReader(file_path) + offset = 3 + limit = 5 + with reader.query_table_by_row(table_name, ["device", "s1"], offset, limit) as result: + row = 0 + while result.next(): + ts = result.get_value_by_index(1) + assert ts == offset + row + assert result.get_value_by_index(2) == f"device_{ts}" + assert result.get_value_by_index(3) == ts * 10 + row += 1 + assert row == limit + reader.close() + finally: + if os.path.exists(file_path): + os.remove(file_path) + diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd index 65681257a..29008148d 100644 --- a/python/tsfile/tsfile_cpp.pxd +++ b/python/tsfile/tsfile_cpp.pxd @@ -184,6 +184,21 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": int64_t start_time, int64_t end_time, ErrorCode* err_code); + ResultSet tsfile_reader_query_tree_by_row(TsFileReader reader, + char** device_ids, + int device_ids_len, + char** measurement_names, + int measurement_names_len, + int offset, int limit, + ErrorCode* err_code); + + ResultSet tsfile_reader_query_table_by_row(TsFileReader reader, + const char* table_name, + char** column_names, + int column_names_len, + int offset, int limit, + ErrorCode* err_code); + ResultSet tsfile_query_table_batch(TsFileReader reader, const char * table_name, char** columns, uint32_t column_num, diff --git a/python/tsfile/tsfile_py_cpp.pxd b/python/tsfile/tsfile_py_cpp.pxd index cdc4f5c62..197a4ec87 100644 --- a/python/tsfile/tsfile_py_cpp.pxd +++ b/python/tsfile/tsfile_py_cpp.pxd @@ -56,6 +56,14 @@ cdef public api ResultSet tsfile_reader_query_table_batch_c(TsFileReader reader, int64_t start_time, int64_t end_time, int batch_size) cdef public api ResultSet tsfile_reader_query_paths_c(TsFileReader reader, object device_name, object sensor_list, int64_t start_time, int64_t end_time) + +cdef public api ResultSet tsfile_reader_query_tree_by_row_c(TsFileReader reader, object device_ids, + object measurement_names, int offset, + int limit) + +cdef public api ResultSet tsfile_reader_query_table_by_row_c(TsFileReader reader, object table_name, + object column_list, int offset, + int limit) cdef public api object get_table_schema(TsFileReader reader, object table_name) cdef public api object get_all_table_schema(TsFileReader reader) cdef public api object get_all_timeseries_schema(TsFileReader reader) diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx index 374a56eb7..4febeb731 100644 --- a/python/tsfile/tsfile_py_cpp.pyx +++ b/python/tsfile/tsfile_py_cpp.pyx @@ -749,6 +749,88 @@ cdef ResultSet tsfile_reader_query_table_on_tree_c(TsFileReader reader, object c free( columns) columns = NULL +cdef ResultSet tsfile_reader_query_tree_by_row_c(TsFileReader reader, + object device_ids, + object measurement_names, + int offset, int limit): + cdef ResultSet result + cdef int device_num = len(device_ids) + cdef int measurement_num = len(measurement_names) + cdef char** device_ids_c = malloc(sizeof(char *) * device_num) + cdef char** measurement_names_c = malloc(sizeof(char *) * measurement_num) + cdef int i + cdef int j + cdef ErrorCode code = 0 + + if device_ids_c == NULL or measurement_names_c == NULL: + raise MemoryError("Failed to allocate memory for tree by-row query arrays") + + try: + for i in range(device_num): + device_ids_c[i] = strdup(( device_ids[i]).encode('utf-8')) + if device_ids_c[i] == NULL: + raise MemoryError("Failed to allocate memory for device id") + for j in range(measurement_num): + measurement_names_c[j] = strdup(( measurement_names[j]).encode('utf-8')) + if measurement_names_c[j] == NULL: + raise MemoryError("Failed to allocate memory for measurement name") + + result = tsfile_reader_query_tree_by_row(reader, + device_ids_c, device_num, + measurement_names_c, measurement_num, + offset, limit, &code) + check_error(code) + return result + finally: + if device_ids_c != NULL: + for i in range(device_num): + if device_ids_c[i] != NULL: + free( device_ids_c[i]) + device_ids_c[i] = NULL + free( device_ids_c) + device_ids_c = NULL + if measurement_names_c != NULL: + for j in range(measurement_num): + if measurement_names_c[j] != NULL: + free( measurement_names_c[j]) + measurement_names_c[j] = NULL + free( measurement_names_c) + measurement_names_c = NULL + +cdef ResultSet tsfile_reader_query_table_by_row_c(TsFileReader reader, + object table_name, + object column_list, + int offset, int limit): + cdef ResultSet result + cdef int column_num = len(column_list) + cdef char** columns = malloc(sizeof(char *) * column_num) + cdef int i + cdef bytes table_name_bytes = PyUnicode_AsUTF8String(table_name) + cdef const char * table_name_c = table_name_bytes + cdef ErrorCode code = 0 + + if columns == NULL: + raise MemoryError("Failed to allocate memory for table by-row query columns") + try: + for i in range(column_num): + columns[i] = strdup(( column_list[i]).encode('utf-8')) + if columns[i] == NULL: + raise MemoryError("Failed to allocate memory for column name") + + result = tsfile_reader_query_table_by_row(reader, + table_name_c, columns, column_num, + offset, limit, &code) + check_error(code) + return result + finally: + if columns != NULL: + for i in range(column_num): + if columns[i] != NULL: + free( columns[i]) + columns[i] = NULL + free( columns) + columns = NULL + cdef ResultSet tsfile_reader_query_table_batch_c(TsFileReader reader, object table_name, object column_list, int64_t start_time, int64_t end_time, int batch_size): cdef ResultSet result @@ -765,14 +847,17 @@ cdef ResultSet tsfile_reader_query_table_batch_c(TsFileReader reader, object tab columns[i] = strdup(( column_list[i]).encode('utf-8')) if columns[i] == NULL: raise MemoryError("Failed to allocate memory for column name") - result = tsfile_query_table_batch(reader, table_name_c, columns, column_num, start_time, end_time, batch_size, &code) + result = tsfile_query_table_batch(reader, table_name_c, columns, + column_num, start_time, end_time, + batch_size, &code) check_error(code) return result finally: if columns != NULL: for i in range(column_num): - free( columns[i]) - columns[i] = NULL + if columns[i] != NULL: + free( columns[i]) + columns[i] = NULL free( columns) columns = NULL diff --git a/python/tsfile/tsfile_reader.pyx b/python/tsfile/tsfile_reader.pyx index 44ffd8604..3a1a15d4d 100644 --- a/python/tsfile/tsfile_reader.pyx +++ b/python/tsfile/tsfile_reader.pyx @@ -357,6 +357,38 @@ cdef class TsFileReaderPy: self.activate_result_set_list.add(pyresult) return pyresult + def query_tree_by_row(self, device_ids : List[str], measurement_names : List[str], + offset : int = 0, limit : int = -1) -> ResultSetPy: + """ + Execute tree-model query by row with offset/limit. + """ + if len(device_ids) == 0: + raise ValueError("device_ids must not be empty") + if len(measurement_names) == 0: + raise ValueError("measurement_names must not be empty") + + cdef ResultSet result + result = tsfile_reader_query_tree_by_row_c(self.reader, device_ids, + measurement_names, offset, limit) + pyresult = ResultSetPy(self, True) + pyresult.init_c(result, device_ids[0]) + self.activate_result_set_list.add(pyresult) + return pyresult + + def query_table_by_row(self, table_name : str, column_names : List[str], + offset : int = 0, limit : int = -1) -> ResultSetPy: + """ + Execute table-model query by row with offset/limit. + """ + cdef ResultSet result + result = tsfile_reader_query_table_by_row_c(self.reader, table_name.lower(), + [column_name.lower() for column_name in column_names], + offset, limit) + pyresult = ResultSetPy(self) + pyresult.init_c(result, table_name) + self.activate_result_set_list.add(pyresult) + return pyresult + def query_timeseries(self, device_name : str, sensor_list : List[str], start_time : int = 0, end_time : int = 0) -> ResultSetPy: """