diff --git a/tests/core/framework/config/config_json_test.cpp b/tests/core/framework/config/config_json_test.cpp index ffde378a5..d22515138 100644 --- a/tests/core/framework/config/config_json_test.cpp +++ b/tests/core/framework/config/config_json_test.cpp @@ -17,6 +17,7 @@ limitations under the License. #include #include +#include #include #include @@ -58,12 +59,63 @@ class ConfigJsonFileFlagGuard final { std::string old_config_json_file_; }; +class DumpConfigJsonFlagGuard final { + public: + explicit DumpConfigJsonFlagGuard(const std::string& dump_config_json_file) + : old_enable_dump_config_json_(FLAGS_enable_dump_config_json), + old_dump_config_json_file_(FLAGS_dump_config_json_file) { + FLAGS_dump_config_json_file = dump_config_json_file; + } + + ~DumpConfigJsonFlagGuard() { + FLAGS_enable_dump_config_json = old_enable_dump_config_json_; + FLAGS_dump_config_json_file = old_dump_config_json_file_; + } + + private: + bool old_enable_dump_config_json_; + std::string old_dump_config_json_file_; +}; + +class StartupConfigGuard final { + public: + StartupConfigGuard() + : kv_cache_config_(KVCacheConfig::get_instance()), + scheduler_config_(SchedulerConfig::get_instance()), + old_block_size_(kv_cache_config_.block_size()), + old_enable_prefix_cache_(kv_cache_config_.enable_prefix_cache()), + old_max_tokens_per_batch_(scheduler_config_.max_tokens_per_batch()), + old_enable_chunked_prefill_( + scheduler_config_.enable_chunked_prefill()) {} + + ~StartupConfigGuard() { + kv_cache_config_.block_size(old_block_size_) + .enable_prefix_cache(old_enable_prefix_cache_); + scheduler_config_.max_tokens_per_batch(old_max_tokens_per_batch_) + .enable_chunked_prefill(old_enable_chunked_prefill_); + } + + private: + KVCacheConfig& kv_cache_config_; + SchedulerConfig& scheduler_config_; + int32_t old_block_size_; + bool old_enable_prefix_cache_; + int32_t old_max_tokens_per_batch_; + bool old_enable_chunked_prefill_; +}; + void write_config_file(const std::filesystem::path& config_path, std::string_view config_json) { std::ofstream config_file(config_path); config_file << config_json; } +nlohmann::ordered_json read_json_file(const std::filesystem::path& file_path) { + std::ifstream input_file(file_path); + EXPECT_TRUE(input_file.is_open()) << file_path; + return nlohmann::ordered_json::parse(input_file); +} + std::filesystem::path config_test_file_path() { const std::filesystem::path source_config_path = std::filesystem::path(__FILE__).parent_path() / "config_test.json"; @@ -222,5 +274,49 @@ TEST(ConfigJsonTest, MissingJsonFileKeepsFlagDefaults) { EXPECT_EQ(scheduler_config.max_seqs_per_batch(), 1024); } +TEST(ConfigJsonTest, DumpStartupConfigSkipsWhenDisabled) { + const std::filesystem::path dump_path = + std::filesystem::temp_directory_path() / + "xllm_dump_config_json_test_disabled.json"; + std::filesystem::remove(dump_path); + DumpConfigJsonFlagGuard flag_guard(dump_path.string()); + FLAGS_enable_dump_config_json = false; + + config::dump_startup_config(); + + EXPECT_FALSE(std::filesystem::exists(dump_path)); +} + +TEST(ConfigJsonTest, DumpStartupConfigWritesNonDefaultValuesOnly) { + const std::filesystem::path dump_path = + std::filesystem::temp_directory_path() / + "xllm_dump_config_json_test_non_default.json"; + std::filesystem::remove(dump_path); + DumpConfigJsonFlagGuard flag_guard(dump_path.string()); + StartupConfigGuard startup_config_guard; + + KVCacheConfig::get_instance().block_size(256).enable_prefix_cache(false); + SchedulerConfig::get_instance() + .max_tokens_per_batch(2048) + .enable_chunked_prefill(false); + FLAGS_enable_dump_config_json = true; + + config::dump_startup_config(); + + ASSERT_TRUE(std::filesystem::exists(dump_path)); + const nlohmann::ordered_json config_json = read_json_file(dump_path); + EXPECT_EQ(config_json.at("block_size").get(), 256); + EXPECT_FALSE(config_json.at("enable_prefix_cache").get()); + EXPECT_EQ(config_json.at("max_tokens_per_batch").get(), 2048); + EXPECT_FALSE(config_json.at("enable_chunked_prefill").get()); + + EXPECT_FALSE(config_json.contains("max_cache_size")); + EXPECT_FALSE(config_json.contains("kv_cache_dtype")); + EXPECT_FALSE(config_json.contains("max_seqs_per_batch")); + EXPECT_FALSE(config_json.contains("priority_strategy")); + + std::filesystem::remove(dump_path); +} + } // namespace } // namespace xllm diff --git a/xllm/core/common/global_flags.h b/xllm/core/common/global_flags.h index 322f8d8ba..bfe092e30 100644 --- a/xllm/core/common/global_flags.h +++ b/xllm/core/common/global_flags.h @@ -19,6 +19,10 @@ limitations under the License. DECLARE_string(config_json_file); +DECLARE_bool(enable_dump_config_json); + +DECLARE_string(dump_config_json_file); + DECLARE_string(host); DECLARE_int32(port); diff --git a/xllm/core/framework/config/CMakeLists.txt b/xllm/core/framework/config/CMakeLists.txt index 6b4341473..ab01ef45c 100644 --- a/xllm/core/framework/config/CMakeLists.txt +++ b/xllm/core/framework/config/CMakeLists.txt @@ -46,5 +46,6 @@ cc_library( DEPS gflags::gflags glog::glog + nlohmann_json::nlohmann_json :util ) diff --git a/xllm/core/framework/config/beam_search_config.cpp b/xllm/core/framework/config/beam_search_config.cpp index efc840de2..843ce5e30 100644 --- a/xllm/core/framework/config/beam_search_config.cpp +++ b/xllm/core/framework/config/beam_search_config.cpp @@ -57,6 +57,19 @@ void BeamSearchConfig::from_json(const JsonReader& json) { json.value_or("enable_topk_sorted", enable_topk_sorted())); } +void BeamSearchConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const BeamSearchConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_beam_search_kernel); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, beam_width); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_block_copy_kernel); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_topk_sorted); +} + BeamSearchConfig& BeamSearchConfig::get_instance() { static BeamSearchConfig config; return config; diff --git a/xllm/core/framework/config/beam_search_config.h b/xllm/core/framework/config/beam_search_config.h index 7bdd99289..e7b0c8bf4 100644 --- a/xllm/core/framework/config/beam_search_config.h +++ b/xllm/core/framework/config/beam_search_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include "core/common/macros.h" #include "core/framework/config/option_category.h" @@ -33,6 +34,7 @@ class BeamSearchConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { @@ -48,7 +50,11 @@ class BeamSearchConfig final { PROPERTY(int32_t, beam_width) = 1; +#if defined(USE_NPU) || defined(USE_CUDA) + PROPERTY(bool, enable_block_copy_kernel) = true; +#else PROPERTY(bool, enable_block_copy_kernel) = false; +#endif PROPERTY(bool, enable_topk_sorted) = true; }; diff --git a/xllm/core/framework/config/config_json_utils.cpp b/xllm/core/framework/config/config_json_utils.cpp index 4ec398412..7e393609c 100644 --- a/xllm/core/framework/config/config_json_utils.cpp +++ b/xllm/core/framework/config/config_json_utils.cpp @@ -19,16 +19,46 @@ limitations under the License. #include #include +#include +#include #include #include #include #include +#include + +#include "core/framework/config/beam_search_config.h" +#include "core/framework/config/disagg_pd_config.h" +#include "core/framework/config/distributed_config.h" +#include "core/framework/config/dit_config.h" +#include "core/framework/config/eplb_config.h" +#include "core/framework/config/execution_config.h" +#include "core/framework/config/kernel_config.h" +#include "core/framework/config/kv_cache_config.h" +#include "core/framework/config/kv_cache_store_config.h" +#include "core/framework/config/load_config.h" +#include "core/framework/config/model_config.h" +#include "core/framework/config/parallel_config.h" +#include "core/framework/config/profile_config.h" +#include "core/framework/config/rec_config.h" +#include "core/framework/config/scheduler_config.h" +#include "core/framework/config/service_config.h" +#include "core/framework/config/speculative_config.h" DEFINE_string(config_json_file, "", "Path to a JSON config file. Values in the file override " "command-line flag values."); +DEFINE_bool(enable_dump_config_json, + false, + "Whether to dump the resolved startup config as JSON."); + +DEFINE_string(dump_config_json_file, + "xllm_config.json", + "Path to write the resolved startup config as JSON. Used only " + "when enable_dump_config_json is true."); + namespace xllm::config { namespace { @@ -84,6 +114,30 @@ void reset_parsed_json_config_if_path_changed() { parsed_json_config_once() = std::make_unique(); } +nlohmann::ordered_json build_startup_config_json() { + nlohmann::ordered_json config_json = nlohmann::ordered_json::object(); + + ServiceConfig::get_instance().append_config_json(config_json); + ModelConfig::get_instance().append_config_json(config_json); + LoadConfig::get_instance().append_config_json(config_json); + KVCacheConfig::get_instance().append_config_json(config_json); + KVCacheStoreConfig::get_instance().append_config_json(config_json); + BeamSearchConfig::get_instance().append_config_json(config_json); + SchedulerConfig::get_instance().append_config_json(config_json); + ParallelConfig::get_instance().append_config_json(config_json); + EPLBConfig::get_instance().append_config_json(config_json); + DistributedConfig::get_instance().append_config_json(config_json); + DisaggPDConfig::get_instance().append_config_json(config_json); + SpeculativeConfig::get_instance().append_config_json(config_json); + ProfileConfig::get_instance().append_config_json(config_json); + ExecutionConfig::get_instance().append_config_json(config_json); + KernelConfig::get_instance().append_config_json(config_json); + DiTConfig::get_instance().append_config_json(config_json); + RecConfig::get_instance().append_config_json(config_json); + + return config_json; +} + } // namespace JsonReader load_json_file(const std::string& config_path) { @@ -109,4 +163,38 @@ const std::optional& get_parsed_json_config() { return parsed_json_config(); } +void dump_startup_config() { + if (!FLAGS_enable_dump_config_json) { + return; + } + + const std::filesystem::path dump_path = + std::filesystem::path(FLAGS_dump_config_json_file).lexically_normal(); + if (dump_path.has_parent_path()) { + std::error_code error_code; + std::filesystem::create_directories(dump_path.parent_path(), error_code); + if (error_code) { + LOG(FATAL) << "Failed to create startup config dump directory: " + << dump_path.parent_path().string() + << ", error: " << error_code.message(); + } + } + + std::ofstream output_stream(dump_path); + if (!output_stream.is_open()) { + LOG(FATAL) << "Failed to open startup config dump file: " + << dump_path.string(); + } + + const nlohmann::ordered_json config_json = build_startup_config_json(); + output_stream << config_json.dump(2) << "\n"; + output_stream.close(); + if (!output_stream.good()) { + LOG(FATAL) << "Failed to write startup config dump file: " + << dump_path.string(); + } + + LOG(INFO) << "Dumped startup config to " << dump_path.string(); +} + } // namespace xllm::config diff --git a/xllm/core/framework/config/config_json_utils.h b/xllm/core/framework/config/config_json_utils.h index a8c7e396a..7f34fc1d7 100644 --- a/xllm/core/framework/config/config_json_utils.h +++ b/xllm/core/framework/config/config_json_utils.h @@ -15,6 +15,7 @@ limitations under the License. #pragma once +#include #include #include #include @@ -29,4 +30,20 @@ JsonReader parse_json_string(std::string_view config_json); const std::optional& get_parsed_json_config(); +void dump_startup_config(); + } // namespace xllm::config + +#define APPEND_JSON_VALUE_IF_NOT_DEFAULT( \ + config_json, key, value, default_value) \ + do { \ + const auto& config_json_value = (value); \ + if (config_json_value != (default_value)) { \ + (config_json)[key] = config_json_value; \ + } \ + } while (false) + +#define APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( \ + config_json, default_config, property) \ + APPEND_JSON_VALUE_IF_NOT_DEFAULT( \ + config_json, #property, property(), (default_config).property()) diff --git a/xllm/core/framework/config/disagg_pd_config.cpp b/xllm/core/framework/config/disagg_pd_config.cpp index 6ae829b9d..e0be4b00a 100644 --- a/xllm/core/framework/config/disagg_pd_config.cpp +++ b/xllm/core/framework/config/disagg_pd_config.cpp @@ -95,6 +95,25 @@ void DisaggPDConfig::from_json(const JsonReader& json) { transfer_listen_port())); } +void DisaggPDConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const DisaggPDConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_disagg_pd); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_pd_ooc); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, disagg_pd_port); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, instance_role); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, kv_cache_transfer_type); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, kv_cache_transfer_mode); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, transfer_listen_port); +} + DisaggPDConfig& DisaggPDConfig::get_instance() { static DisaggPDConfig config; return config; diff --git a/xllm/core/framework/config/disagg_pd_config.h b/xllm/core/framework/config/disagg_pd_config.h index bc349e329..781eb7b56 100644 --- a/xllm/core/framework/config/disagg_pd_config.h +++ b/xllm/core/framework/config/disagg_pd_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include #include "core/common/macros.h" @@ -36,6 +37,7 @@ class DisaggPDConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); void normalize_mlu(KVCacheConfig& kv_cache_config, SchedulerConfig& scheduler_config); diff --git a/xllm/core/framework/config/distributed_config.cpp b/xllm/core/framework/config/distributed_config.cpp index 3d4c3e25a..0f626e9b0 100644 --- a/xllm/core/framework/config/distributed_config.cpp +++ b/xllm/core/framework/config/distributed_config.cpp @@ -81,6 +81,30 @@ void DistributedConfig::from_json(const JsonReader& json) { .etcd_ttl(json.value_or("etcd_ttl", etcd_ttl())); } +void DistributedConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const DistributedConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, master_node_addr); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, xtensor_master_node_addr); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, nnodes); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, node_rank); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, device_ip); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, etcd_addr); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, etcd_namespace); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_service_routing); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, heart_beat_interval); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, etcd_ttl); +} + DistributedConfig& DistributedConfig::get_instance() { static DistributedConfig config; return config; diff --git a/xllm/core/framework/config/distributed_config.h b/xllm/core/framework/config/distributed_config.h index 6418c12e3..a2dd3f9a7 100644 --- a/xllm/core/framework/config/distributed_config.h +++ b/xllm/core/framework/config/distributed_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include #include "core/common/macros.h" @@ -34,6 +35,7 @@ class DistributedConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/dit_config.cpp b/xllm/core/framework/config/dit_config.cpp index 3f3321edf..729ab5392 100644 --- a/xllm/core/framework/config/dit_config.cpp +++ b/xllm/core/framework/config/dit_config.cpp @@ -115,6 +115,36 @@ void DiTConfig::from_json(const JsonReader& json) { "dit_generation_image_area_max", dit_generation_image_area_max())); } +void DiTConfig::append_config_json(nlohmann::ordered_json& config_json) const { + const DiTConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_requests_per_batch); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_cache_policy); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_cache_warmup_steps); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_cache_n_derivatives); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_cache_skip_interval_steps); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_cache_residual_diff_threshold); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_cache_start_steps); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_cache_end_steps); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_cache_start_blocks); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_cache_end_blocks); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_sp_communication_overlap); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_debug_print); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, dit_generation_image_area_max); +} + DiTConfig& DiTConfig::get_instance() { static DiTConfig config; return config; diff --git a/xllm/core/framework/config/dit_config.h b/xllm/core/framework/config/dit_config.h index f495e73ec..5aa6e9f3c 100644 --- a/xllm/core/framework/config/dit_config.h +++ b/xllm/core/framework/config/dit_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include #include "core/common/macros.h" @@ -34,6 +35,7 @@ class DiTConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/eplb_config.cpp b/xllm/core/framework/config/eplb_config.cpp index f2c3d339d..c4b0aad36 100644 --- a/xllm/core/framework/config/eplb_config.cpp +++ b/xllm/core/framework/config/eplb_config.cpp @@ -57,6 +57,22 @@ void EPLBConfig::from_json(const JsonReader& json) { json.value_or("rank_tablefile", rank_tablefile())); } +void EPLBConfig::append_config_json(nlohmann::ordered_json& config_json) const { + const EPLBConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_eplb); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, redundant_experts_num); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, eplb_update_interval); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, eplb_update_threshold); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, expert_parallel_degree); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, rank_tablefile); +} + EPLBConfig& EPLBConfig::get_instance() { static EPLBConfig config; return config; diff --git a/xllm/core/framework/config/eplb_config.h b/xllm/core/framework/config/eplb_config.h index bd3a307b9..9d5802b66 100644 --- a/xllm/core/framework/config/eplb_config.h +++ b/xllm/core/framework/config/eplb_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include #include "core/common/macros.h" @@ -34,6 +35,7 @@ class EPLBConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/execution_config.cpp b/xllm/core/framework/config/execution_config.cpp index 6a01468be..f9d6bb947 100644 --- a/xllm/core/framework/config/execution_config.cpp +++ b/xllm/core/framework/config/execution_config.cpp @@ -104,6 +104,31 @@ void ExecutionConfig::from_json(const JsonReader& json) { .random_seed(json.value_or("random_seed", random_seed())); } +void ExecutionConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const ExecutionConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_graph); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_graph_mode_decode_no_padding); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_prefill_piecewise_graph); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_graph_vmm_pool); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_tokens_for_graph_mode); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_shm); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, use_contiguous_input_buffer); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, input_shm_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, output_shm_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, random_seed); +} + ExecutionConfig& ExecutionConfig::get_instance() { static ExecutionConfig config; return config; diff --git a/xllm/core/framework/config/execution_config.h b/xllm/core/framework/config/execution_config.h index 1544dd441..eeb4c1a8c 100644 --- a/xllm/core/framework/config/execution_config.h +++ b/xllm/core/framework/config/execution_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include "core/common/macros.h" #include "core/framework/config/option_category.h" @@ -33,6 +34,7 @@ class ExecutionConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/help_formatter.h b/xllm/core/framework/config/help_formatter.h index 32fe3e89f..2b707195a 100644 --- a/xllm/core/framework/config/help_formatter.h +++ b/xllm/core/framework/config/help_formatter.h @@ -46,8 +46,11 @@ namespace xllm { class HelpFormatter { public: [[nodiscard]] static const std::vector& option_categories() { - static const OptionCategory kConfigOptionCategory = {"CONFIG OPTIONS", - {"config_json_file"}}; + static const OptionCategory kConfigOptionCategory = { + "CONFIG OPTIONS", + {"config_json_file", + "enable_dump_config_json", + "dump_config_json_file"}}; static const std::vector kOptionCategories = { kConfigOptionCategory, ServiceConfig::option_category(), diff --git a/xllm/core/framework/config/kernel_config.cpp b/xllm/core/framework/config/kernel_config.cpp index 96ab56ea4..bd3a9080e 100644 --- a/xllm/core/framework/config/kernel_config.cpp +++ b/xllm/core/framework/config/kernel_config.cpp @@ -52,6 +52,19 @@ void KernelConfig::from_json(const JsonReader& json) { #endif } +void KernelConfig::append_config_json( + nlohmann::ordered_json& config_json) const { +#if defined(USE_NPU) + const KernelConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_customize_mla_kernel); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, npu_kernel_backend); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_intralayer_addnorm); +#endif +} + KernelConfig& KernelConfig::get_instance() { static KernelConfig config; return config; diff --git a/xllm/core/framework/config/kernel_config.h b/xllm/core/framework/config/kernel_config.h index 7355dc746..3e71a1cc8 100644 --- a/xllm/core/framework/config/kernel_config.h +++ b/xllm/core/framework/config/kernel_config.h @@ -15,6 +15,7 @@ limitations under the License. #pragma once +#include #include #include "core/common/macros.h" @@ -33,6 +34,7 @@ class KernelConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/kv_cache_config.cpp b/xllm/core/framework/config/kv_cache_config.cpp index bcf088b68..9456126ff 100644 --- a/xllm/core/framework/config/kv_cache_config.cpp +++ b/xllm/core/framework/config/kv_cache_config.cpp @@ -86,6 +86,27 @@ void KVCacheConfig::from_json(const JsonReader& json) { "phy_page_granularity_size", phy_page_granularity_size())); } +void KVCacheConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const KVCacheConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, block_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_cache_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_memory_utilization); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, kv_cache_dtype); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_prefix_cache); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, xxh3_128bits_seed); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_xtensor); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, phy_page_granularity_size); +} + KVCacheConfig& KVCacheConfig::get_instance() { static KVCacheConfig config; return config; diff --git a/xllm/core/framework/config/kv_cache_config.h b/xllm/core/framework/config/kv_cache_config.h index 621181677..670cb778e 100644 --- a/xllm/core/framework/config/kv_cache_config.h +++ b/xllm/core/framework/config/kv_cache_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include #include "core/common/macros.h" @@ -34,6 +35,7 @@ class KVCacheConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/kv_cache_store_config.cpp b/xllm/core/framework/config/kv_cache_store_config.cpp index b019d0012..ccf243935 100644 --- a/xllm/core/framework/config/kv_cache_store_config.cpp +++ b/xllm/core/framework/config/kv_cache_store_config.cpp @@ -101,6 +101,33 @@ void KVCacheStoreConfig::from_json(const JsonReader& json) { "enable_control_h2d_block_num", enable_control_h2d_block_num())); } +void KVCacheStoreConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const KVCacheStoreConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, prefetch_timeout); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, prefetch_batch_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, layers_wise_copy_batchs); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, host_blocks_factor); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_kvcache_store); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_cache_upload); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, store_protocol); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, store_master_server_address); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, store_metadata_server); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, store_local_hostname); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_control_h2d_block_num); +} + KVCacheStoreConfig& KVCacheStoreConfig::get_instance() { static KVCacheStoreConfig config; return config; diff --git a/xllm/core/framework/config/kv_cache_store_config.h b/xllm/core/framework/config/kv_cache_store_config.h index 537e2115c..88d5ddb93 100644 --- a/xllm/core/framework/config/kv_cache_store_config.h +++ b/xllm/core/framework/config/kv_cache_store_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include #include "core/common/macros.h" @@ -34,6 +35,7 @@ class KVCacheStoreConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/load_config.cpp b/xllm/core/framework/config/load_config.cpp index e6c109157..298891bd1 100644 --- a/xllm/core/framework/config/load_config.cpp +++ b/xllm/core/framework/config/load_config.cpp @@ -73,6 +73,20 @@ void LoadConfig::from_json(const JsonReader& json) { enable_prefetch_weight())); } +void LoadConfig::append_config_json(nlohmann::ordered_json& config_json) const { + const LoadConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_manual_loader); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_rolling_load); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, rolling_load_num_cached_layers); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, rolling_load_num_rolling_slots); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_prefetch_weight); +} + LoadConfig& LoadConfig::get_instance() { static LoadConfig config; return config; diff --git a/xllm/core/framework/config/load_config.h b/xllm/core/framework/config/load_config.h index 0cf12c977..60b628b43 100644 --- a/xllm/core/framework/config/load_config.h +++ b/xllm/core/framework/config/load_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include "core/common/macros.h" #include "core/framework/config/option_category.h" @@ -33,6 +34,7 @@ class LoadConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/model_config.cpp b/xllm/core/framework/config/model_config.cpp index 8133e7740..3515ab2a3 100644 --- a/xllm/core/framework/config/model_config.cpp +++ b/xllm/core/framework/config/model_config.cpp @@ -144,6 +144,33 @@ void ModelConfig::from_json(const JsonReader& json) { use_cpp_chat_template())); } +void ModelConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const ModelConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, model_id); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, model); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, backend); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, task); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, devices); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, limit_image_per_prompt); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, reasoning_parser); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, tool_call_parser); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_qwen3_reranker); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_return_mm_full_embeddings); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, flashinfer_workspace_buffer_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, use_audio_in_video); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, use_cpp_chat_template); +} + ModelConfig& ModelConfig::get_instance() { static ModelConfig config; return config; diff --git a/xllm/core/framework/config/model_config.h b/xllm/core/framework/config/model_config.h index 7e2d245ae..1be95f3a1 100644 --- a/xllm/core/framework/config/model_config.h +++ b/xllm/core/framework/config/model_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include #include "core/common/macros.h" @@ -34,6 +35,7 @@ class ModelConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); void normalize_cpp_chat_template(const std::string& model_type); diff --git a/xllm/core/framework/config/parallel_config.cpp b/xllm/core/framework/config/parallel_config.cpp index fa3a8eb1c..815cddbde 100644 --- a/xllm/core/framework/config/parallel_config.cpp +++ b/xllm/core/framework/config/parallel_config.cpp @@ -109,6 +109,28 @@ void ParallelConfig::from_json(const JsonReader& json) { json.value_or("enable_dp_balance", enable_dp_balance())); } +void ParallelConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const ParallelConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, dp_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, ep_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, cp_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, tp_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, sp_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, cfg_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, communication_backend); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_prefill_sp); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_multi_stream_parallel); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, micro_batch_num); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_dp_balance); +} + ParallelConfig& ParallelConfig::get_instance() { static ParallelConfig config; return config; diff --git a/xllm/core/framework/config/parallel_config.h b/xllm/core/framework/config/parallel_config.h index 36f4798cd..0bcd375ec 100644 --- a/xllm/core/framework/config/parallel_config.h +++ b/xllm/core/framework/config/parallel_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include #include "core/common/macros.h" @@ -34,6 +35,7 @@ class ParallelConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/profile_config.cpp b/xllm/core/framework/config/profile_config.cpp index ce0e6315e..bbd41aba5 100644 --- a/xllm/core/framework/config/profile_config.cpp +++ b/xllm/core/framework/config/profile_config.cpp @@ -91,6 +91,29 @@ void ProfileConfig::from_json(const JsonReader& json) { "enable_forward_interruption", enable_forward_interruption())); } +void ProfileConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const ProfileConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_profile_step_time); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_profile_token_budget); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_latency_aware_schedule); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, profile_max_prompt_length); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_global_ttft_ms); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_global_tpot_ms); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_profile_kv_blocks); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, disable_ttft_profiling); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_forward_interruption); +} + ProfileConfig& ProfileConfig::get_instance() { static ProfileConfig config; return config; diff --git a/xllm/core/framework/config/profile_config.h b/xllm/core/framework/config/profile_config.h index 74e09fad0..2a71012ff 100644 --- a/xllm/core/framework/config/profile_config.h +++ b/xllm/core/framework/config/profile_config.h @@ -17,6 +17,7 @@ limitations under the License. #include #include +#include #include "core/common/macros.h" #include "core/framework/config/option_category.h" @@ -34,6 +35,7 @@ class ProfileConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/rec_config.cpp b/xllm/core/framework/config/rec_config.cpp index 6a30e4a76..8dce4d139 100644 --- a/xllm/core/framework/config/rec_config.cpp +++ b/xllm/core/framework/config/rec_config.cpp @@ -127,6 +127,36 @@ void RecConfig::from_json(const JsonReader& json) { "rec_worker_max_concurrency", rec_worker_max_concurrency())); } +void RecConfig::append_config_json(nlohmann::ordered_json& config_json) const { + const RecConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_rec_fast_sampler); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_rec_prefill_only); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_xattention_one_stage); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_decode_rounds); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_constrained_decoding); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, output_rec_logprobs); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_convert_tokens_to_item); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_output_sku_logprobs); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_extended_item_info); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, each_conversion_threshold); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, total_conversion_threshold); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, request_queue_size); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, rec_worker_max_concurrency); +} + RecConfig& RecConfig::get_instance() { static RecConfig config; return config; diff --git a/xllm/core/framework/config/rec_config.h b/xllm/core/framework/config/rec_config.h index 0ec01d749..99ec7bd0e 100644 --- a/xllm/core/framework/config/rec_config.h +++ b/xllm/core/framework/config/rec_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include "core/common/macros.h" #include "core/framework/config/option_category.h" @@ -33,6 +34,7 @@ class RecConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/scheduler_config.cpp b/xllm/core/framework/config/scheduler_config.cpp index 85c7dc9cd..4ef3987f3 100644 --- a/xllm/core/framework/config/scheduler_config.cpp +++ b/xllm/core/framework/config/scheduler_config.cpp @@ -128,6 +128,41 @@ void SchedulerConfig::from_json(const JsonReader& json) { enable_starve_prevent())); } +void SchedulerConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const SchedulerConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_tokens_per_batch); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_seqs_per_batch); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_schedule_overlap); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, prefill_scheduling_memory_usage_threshold); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_chunked_prefill); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_tokens_per_chunk_for_prefill); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, chunked_match_frequency); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, use_zero_evict); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_decode_token_per_sequence); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, priority_strategy); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, use_mix_scheduler); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_online_preempt_offline); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, aggressive_coeff); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, starve_threshold); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_starve_prevent); +} + SchedulerConfig& SchedulerConfig::get_instance() { static SchedulerConfig config; return config; diff --git a/xllm/core/framework/config/scheduler_config.h b/xllm/core/framework/config/scheduler_config.h index cace4457f..01a60ab22 100644 --- a/xllm/core/framework/config/scheduler_config.h +++ b/xllm/core/framework/config/scheduler_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include #include "core/common/macros.h" @@ -34,6 +35,7 @@ class SchedulerConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/service_config.cpp b/xllm/core/framework/config/service_config.cpp index d5cdf4612..27b3b2199 100644 --- a/xllm/core/framework/config/service_config.cpp +++ b/xllm/core/framework/config/service_config.cpp @@ -90,6 +90,29 @@ void ServiceConfig::from_json(const JsonReader& json) { "health_check_interval_ms", health_check_interval_ms())); } +void ServiceConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const ServiceConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, host); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT(config_json, default_config, port); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, rpc_idle_timeout_s); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, rpc_channel_timeout_ms); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_reconnect_count); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, num_threads); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, max_concurrent_requests); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, num_request_handling_threads); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, num_response_handling_threads); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, health_check_interval_ms); +} + ServiceConfig& ServiceConfig::get_instance() { static ServiceConfig config; return config; diff --git a/xllm/core/framework/config/service_config.h b/xllm/core/framework/config/service_config.h index 939fd7889..c6c2c1629 100644 --- a/xllm/core/framework/config/service_config.h +++ b/xllm/core/framework/config/service_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include #include "core/common/macros.h" @@ -34,6 +35,7 @@ class ServiceConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/core/framework/config/speculative_config.cpp b/xllm/core/framework/config/speculative_config.cpp index 5a03261c2..1b2b57985 100644 --- a/xllm/core/framework/config/speculative_config.cpp +++ b/xllm/core/framework/config/speculative_config.cpp @@ -121,6 +121,35 @@ void SpeculativeConfig::from_json(const JsonReader& json) { enable_atb_spec_kernel())); } +void SpeculativeConfig::append_config_json( + nlohmann::ordered_json& config_json) const { + const SpeculativeConfig default_config; + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, draft_model); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, draft_devices); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, num_speculative_tokens); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, speculative_algorithm); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, speculative_suffix_cache_max_depth); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, speculative_suffix_max_spec_factor); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, speculative_suffix_max_spec_offset); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, speculative_suffix_min_token_prob); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, speculative_suffix_max_cached_requests); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, speculative_suffix_use_tree_spec); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_opt_validate_probs); + APPEND_CONFIG_JSON_VALUE_IF_NOT_DEFAULT( + config_json, default_config, enable_atb_spec_kernel); +} + SpeculativeConfig& SpeculativeConfig::get_instance() { static SpeculativeConfig config; return config; diff --git a/xllm/core/framework/config/speculative_config.h b/xllm/core/framework/config/speculative_config.h index 1be7cbdcf..1a86a7a66 100644 --- a/xllm/core/framework/config/speculative_config.h +++ b/xllm/core/framework/config/speculative_config.h @@ -16,6 +16,7 @@ limitations under the License. #pragma once #include +#include #include #include "core/common/macros.h" @@ -34,6 +35,7 @@ class SpeculativeConfig final { void from_flags(); void from_json(const JsonReader& json); + void append_config_json(nlohmann::ordered_json& config_json) const; void initialize(); [[nodiscard]] static const OptionCategory& option_category() { diff --git a/xllm/xllm.cpp b/xllm/xllm.cpp index fc60d9496..6b718b60a 100644 --- a/xllm/xllm.cpp +++ b/xllm/xllm.cpp @@ -33,6 +33,7 @@ limitations under the License. #include "core/distributed_runtime/dit_master.h" #include "core/distributed_runtime/master.h" #include "core/framework/config/beam_search_config.h" +#include "core/framework/config/config_json_utils.h" #include "core/framework/config/disagg_pd_config.h" #include "core/framework/config/distributed_config.h" #include "core/framework/config/dit_config.h" @@ -384,6 +385,10 @@ int run() { execution_config.random_seed(std::random_device{}() % (1 << 30)); } + if (distributed_config.node_rank() == 0) { + config::dump_startup_config(); + } + // Create Master Options options = create_options( service_config.host() + ":" + std::to_string(service_config.port()),