From 00e4e10b9622f28c83a1be49e56780cb65d198bf Mon Sep 17 00:00:00 2001 From: xiongjun3 Date: Tue, 24 Mar 2026 14:57:45 +0800 Subject: [PATCH] feat: support chat_template_kwargs in chat completions. --- .../chat_template/jinja_chat_template.cpp | 25 ++++++++++++++++--- .../chat_template/jinja_chat_template.h | 10 ++++++++ .../jinja_chat_template_test.cpp | 20 +++++++++++++++ xllm_service/http_service/service.cpp | 4 +++ xllm_service/request/request.h | 3 +++ xllm_service/scheduler/scheduler.cpp | 3 ++- 6 files changed, 61 insertions(+), 4 deletions(-) diff --git a/xllm_service/chat_template/jinja_chat_template.cpp b/xllm_service/chat_template/jinja_chat_template.cpp index 6f240ed..4598c5e 100644 --- a/xllm_service/chat_template/jinja_chat_template.cpp +++ b/xllm_service/chat_template/jinja_chat_template.cpp @@ -39,19 +39,29 @@ JinjaChatTemplate::JinjaChatTemplate(const TokenizerArgs& args) : args_(args) { std::optional JinjaChatTemplate::apply( const ChatMessages& messages) const { const std::vector empty_tools; - return apply(messages, empty_tools); + const nlohmann::ordered_json chat_template_kwargs = nlohmann::json::object(); + return apply(messages, empty_tools, chat_template_kwargs); } std::optional JinjaChatTemplate::apply( nlohmann::ordered_json& messages) const { // Call the overloaded method with empty tools nlohmann::ordered_json empty_tools = nlohmann::json::array(); - return apply(messages, empty_tools); + const nlohmann::ordered_json chat_template_kwargs = nlohmann::json::object(); + return apply(messages, empty_tools, chat_template_kwargs); } std::optional JinjaChatTemplate::apply( const ChatMessages& messages, const std::vector& json_tools) const { + const nlohmann::ordered_json chat_template_kwargs = nlohmann::json::object(); + return apply(messages, json_tools, chat_template_kwargs); +} + +std::optional JinjaChatTemplate::apply( + const ChatMessages& messages, + const std::vector& json_tools, + const nlohmann::ordered_json& chat_template_kwargs) const { // convert the messages to json object nlohmann::ordered_json messages_json = nlohmann::json::array(); for (const auto& message : messages) { @@ -82,16 +92,25 @@ std::optional JinjaChatTemplate::apply( tools_json.push_back(tool_json); } // apply the template - return apply(messages_json, tools_json); + return apply(messages_json, tools_json, chat_template_kwargs); } std::optional JinjaChatTemplate::apply( nlohmann::ordered_json& messages, const nlohmann::ordered_json& tools) const { + const nlohmann::ordered_json chat_template_kwargs = nlohmann::json::object(); + return apply(messages, tools, chat_template_kwargs); +} + +std::optional JinjaChatTemplate::apply( + nlohmann::ordered_json& messages, + const nlohmann::ordered_json& tools, + const nlohmann::ordered_json& chat_template_kwargs) const { minja::chat_template_inputs input; input.messages = messages; input.tools = tools; input.add_generation_prompt = true; + input.extra_context = chat_template_kwargs; minja::chat_template_options options; return template_->apply(input, options); diff --git a/xllm_service/chat_template/jinja_chat_template.h b/xllm_service/chat_template/jinja_chat_template.h index d7e2282..07be3ff 100644 --- a/xllm_service/chat_template/jinja_chat_template.h +++ b/xllm_service/chat_template/jinja_chat_template.h @@ -72,6 +72,11 @@ class JinjaChatTemplate { const ChatMessages& messages, const std::vector& json_tools) const; + std::optional apply( + const ChatMessages& messages, + const std::vector& json_tools, + const nlohmann::ordered_json& chat_template_kwargs) const; + // expose this function for testing // apply the template to the values in the json object std::optional apply(nlohmann::ordered_json& messages) const; @@ -79,6 +84,11 @@ class JinjaChatTemplate { std::optional apply(nlohmann::ordered_json& messages, const nlohmann::ordered_json& tools) const; + std::optional apply( + nlohmann::ordered_json& messages, + const nlohmann::ordered_json& tools, + const nlohmann::ordered_json& chat_template_kwargs) const; + private: nlohmann::ordered_json get_mm_content(const Message::MMContentVec& vec) const; diff --git a/xllm_service/chat_template/jinja_chat_template_test.cpp b/xllm_service/chat_template/jinja_chat_template_test.cpp index de9fda2..f78a0f5 100644 --- a/xllm_service/chat_template/jinja_chat_template_test.cpp +++ b/xllm_service/chat_template/jinja_chat_template_test.cpp @@ -53,4 +53,24 @@ TEST(JinjaChatTemplate, OpenChatModel) { EXPECT_EQ(result.value(), expected); } +TEST(JinjaChatTemplate, ApplyChatTemplateKwargs) { + const std::string template_str = + "{% if enable_thinking %}{% endif %}" + "{% for message in messages %}{{ message['content'] }}{% endfor %}"; + + nlohmann::ordered_json messages = {{{"role", "user"}, {"content", "hello"}}}; + nlohmann::ordered_json chat_template_kwargs = {{"enable_thinking", false}}; + + TokenizerArgs args; + args.chat_template(template_str); + args.bos_token(""); + args.eos_token(""); + JinjaChatTemplate template_(args); + + auto result = template_.apply( + messages, nlohmann::ordered_json::array(), chat_template_kwargs); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), "hello"); +} + } // namespace xllm_service diff --git a/xllm_service/http_service/service.cpp b/xllm_service/http_service/service.cpp index ef5a609..72b1ae5 100644 --- a/xllm_service/http_service/service.cpp +++ b/xllm_service/http_service/service.cpp @@ -462,6 +462,10 @@ void XllmHttpServiceImpl::ChatCompletions( for (const auto& message : req_pb->messages()) { service_request->messages.emplace_back(message.role(), message.content()); } + if (req_pb->has_chat_template_kwargs()) { + service_request->chat_template_kwargs = + proto_struct_to_json(req_pb->chat_template_kwargs()); + } service_request->tools = parse_tools_from_proto(req_pb->tools()); if (req_pb->has_tool_choice()) { service_request->tool_choice = req_pb->tool_choice(); diff --git a/xllm_service/request/request.h b/xllm_service/request/request.h index 1ff7f15..962d9bf 100644 --- a/xllm_service/request/request.h +++ b/xllm_service/request/request.h @@ -52,6 +52,9 @@ struct Request { // controls tool usage behavior, e.g. auto/none/required std::string tool_choice = "auto"; + // extra template context such as {"enable_thinking": false} + nlohmann::json chat_template_kwargs = nlohmann::json::object(); + // token ids of prompt std::vector token_ids; diff --git a/xllm_service/scheduler/scheduler.cpp b/xllm_service/scheduler/scheduler.cpp index b660b2f..cd3313d 100644 --- a/xllm_service/scheduler/scheduler.cpp +++ b/xllm_service/scheduler/scheduler.cpp @@ -97,7 +97,8 @@ bool Scheduler::schedule(std::shared_ptr request) { const std::vector empty_tools; const std::vector& tools_for_template = request->tool_choice == "none" ? empty_tools : request->tools; - auto prompt = chat_template_->apply(request->messages, tools_for_template); + auto prompt = chat_template_->apply( + request->messages, tools_for_template, request->chat_template_kwargs); if (!prompt.has_value()) { LOG(ERROR) << "Failed to construct prompt from messages"; return false;