Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ find_package(GTest CONFIG REQUIRED)
find_package(benchmark CONFIG REQUIRED)
find_package(nlohmann_json CONFIG REQUIRED)
find_package(OpenCV CONFIG REQUIRED)
find_package(FFMPEG REQUIRED)
find_package(Python COMPONENTS Development REQUIRED)
find_package(pybind11 CONFIG REQUIRED)

Expand Down
5 changes: 5 additions & 0 deletions vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@
"name": "snappy",
"version>=": "1.1.10"
},
{
"name": "ffmpeg",
"default-features": false,
"features": ["avcodec", "avformat", "swscale", "swresample", "x264"]
},
{
"name": "opencv4",
"version>=": "4.7.0",
Expand Down
2 changes: 1 addition & 1 deletion xllm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ else()
endif()

# link brpc
target_link_libraries(xllm PRIVATE glog::glog brpc leveldb::leveldb protobuf::libprotobuf ${OpenCV_LIBS})
target_link_libraries(xllm PRIVATE glog::glog brpc leveldb::leveldb protobuf::libprotobuf ${OpenCV_LIBS} ${FFMPEG_LIBRARIES})
add_dependencies(xllm brpc-static)

if(USE_NPU)
Expand Down
2 changes: 2 additions & 0 deletions xllm/api_service/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ cc_library(
sample_service_impl.h
embedding_service_impl.h
image_generation_service_impl.h
video_generation_service_impl.h
rerank_service_impl.h
qwen3_rerank_service_impl.h
non_stream_call.h
Expand All @@ -38,6 +39,7 @@ cc_library(
sample_service_impl.cpp
embedding_service_impl.cpp
image_generation_service_impl.cpp
video_generation_service_impl.cpp
models_service_impl.cpp
rerank_service_impl.cpp
stream_output_parser.cpp
Expand Down
47 changes: 47 additions & 0 deletions xllm/api_service/api_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ limitations under the License.
#include "image_generation.pb.h"
#include "models.pb.h"
#include "service_impl_factory.h"
#include "video_generation.pb.h"
#include "xllm_metrics.h"
namespace xllm {

Expand Down Expand Up @@ -486,6 +487,52 @@ void APIService::ImageGenerationHttp(
image_generation_service_impl_->process_async(call);
}

void APIService::VideoGeneration(::google::protobuf::RpcController* controller,
const proto::VideoGenerationRequest* request,
proto::VideoGenerationResponse* response,
::google::protobuf::Closure* done) {
// TODO with xllm-service
}

void APIService::VideoGenerationHttp(
::google::protobuf::RpcController* controller,
const proto::HttpRequest* request,
proto::HttpResponse* response,
::google::protobuf::Closure* done) {
xllm::ClosureGuard done_guard(
done,
std::bind(request_in_metric, nullptr),
std::bind(request_out_metric, (void*)controller));
if (!request || !response || !controller) {
LOG(ERROR) << "brpc request | respose | controller is null";
return;
}

auto arena = GetArenaWithCheck<VideoGenerationCall>(response);
auto req_pb =
google::protobuf::Arena::CreateMessage<proto::VideoGenerationRequest>(
arena);
auto resp_pb =
google::protobuf::Arena::CreateMessage<proto::VideoGenerationResponse>(
arena);

auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
std::string error;
json2pb::Json2PbOptions options;
butil::IOBuf& buf = ctrl->request_attachment();
butil::IOBufAsZeroCopyInputStream iobuf_stream(buf);
auto st = json2pb::JsonToProtoMessage(&iobuf_stream, req_pb, options, &error);
if (!st) {
ctrl->SetFailed(error);
LOG(ERROR) << "parse json to proto failed: " << error;
return;
}
std::shared_ptr<VideoGenerationCall> call =
std::make_shared<VideoGenerationCall>(
ctrl, done_guard.release(), req_pb, resp_pb, arena != nullptr);
video_generation_service_impl_->process_async(call);
}

void APIService::Rerank(::google::protobuf::RpcController* controller,
const proto::RerankRequest* request,
proto::RerankResponse* response,
Expand Down
12 changes: 12 additions & 0 deletions xllm/api_service/api_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ limitations under the License.
#include "rec_completion_service_impl.h"
#include "rerank_service_impl.h"
#include "sample_service_impl.h"
#include "video_generation_service_impl.h"
#include "xllm_service.pb.h"

namespace xllm {
Expand Down Expand Up @@ -96,6 +97,16 @@ class APIService : public proto::XllmAPIService {
proto::HttpResponse* response,
::google::protobuf::Closure* done) override;

void VideoGeneration(::google::protobuf::RpcController* controller,
const proto::VideoGenerationRequest* request,
proto::VideoGenerationResponse* response,
::google::protobuf::Closure* done) override;

void VideoGenerationHttp(::google::protobuf::RpcController* controller,
const proto::HttpRequest* request,
proto::HttpResponse* response,
::google::protobuf::Closure* done) override;

void Rerank(::google::protobuf::RpcController* controller,
const proto::RerankRequest* request,
proto::RerankResponse* response,
Expand Down Expand Up @@ -204,6 +215,7 @@ class APIService : public proto::XllmAPIService {
std::unique_ptr<MMEmbeddingServiceImpl> mm_embedding_service_impl_;
std::unique_ptr<ModelsServiceImpl> models_service_impl_;
std::unique_ptr<ImageGenerationServiceImpl> image_generation_service_impl_;
std::unique_ptr<VideoGenerationServiceImpl> video_generation_service_impl_;
std::unique_ptr<RerankServiceImpl> rerank_service_impl_;
std::unique_ptr<RecCompletionServiceImpl> rec_completion_service_impl_;
};
Expand Down
2 changes: 1 addition & 1 deletion xllm/api_service/image_generation_service_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ bool send_result_to_client_brpc(std::shared_ptr<ImageGenerationCall> call,
auto* proto_result = proto_output->add_results();

image.clear();
butil::Base64Encode(output.image, &image);
butil::Base64Encode(output.data, &image);

proto_result->set_image(image);
proto_result->set_width(output.width);
Expand Down
3 changes: 3 additions & 0 deletions xllm/api_service/service_impl_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ void ServiceImplFactory::create(
self->image_generation_service_impl_ =
std::make_unique<ImageGenerationServiceImpl>(
dynamic_cast<DiTMaster*>(master), models);
self->video_generation_service_impl_ =
std::make_unique<VideoGenerationServiceImpl>(
dynamic_cast<DiTMaster*>(master), models);
}},
{static_cast<int8_t>(ServingMode::REC),
[](APIService* self,
Expand Down
103 changes: 103 additions & 0 deletions xllm/api_service/video_generation_service_impl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/* Copyright 2026 The xLLM Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

https://github.com/jd-opensource/xllm/blob/main/LICENSE

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "video_generation_service_impl.h"

#include <butil/base64.h>

#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "api_service/utils.h"
#include "core/framework/request/dit_request_params.h"
#include "distributed_runtime/dit_master.h"

namespace xllm {

namespace {

bool send_result_to_client_brpc(std::shared_ptr<VideoGenerationCall> call,
const std::string& request_id,
int64_t created_time,
const std::string& model,
const DiTRequestOutput& req_output) {
auto& response = call->response();
response.set_object("list");
response.set_id(request_id);
response.set_created(created_time);
response.set_model(model);
auto* proto_output = response.mutable_output();
const std::vector<DiTGenerationOutput>& outputs = req_output.outputs;
proto_output->mutable_results()->Reserve(outputs.size());

std::string video;
for (const auto& output : outputs) {
auto* proto_result = proto_output->add_results();

video.clear();
butil::Base64Encode(output.data, &video);
proto_result->set_video(video);

proto_result->set_width(output.width);
proto_result->set_height(output.height);
proto_result->set_seed(output.seed);
proto_result->set_num_frames(output.num_frames);
proto_result->set_fps(output.video_fps);
}
return call->write_and_finish(response);
}

} // namespace

VideoGenerationServiceImpl::VideoGenerationServiceImpl(
DiTMaster* master,
const std::vector<std::string>& models)
: APIServiceImpl(models), master_{master} {
CHECK(master_ != nullptr);
}

void VideoGenerationServiceImpl::process_async_impl(
std::shared_ptr<VideoGenerationCall> call) {
const auto& rpc_request = call->request();
const auto& model = rpc_request.model();
if (!models_.contains(model)) {
call->finish_with_error(StatusCode::UNKNOWN, "Model not supported");
return;
}

DiTRequestParams request_params(
rpc_request, call->get_x_request_id(), call->get_x_request_time());

std::string saved_request_id = request_params.request_id;
master_->handle_request(
std::move(request_params),
call.get(),
[call,
model,
request_id = std::move(saved_request_id),
created_time = absl::ToUnixSeconds(absl::Now())](
const DiTRequestOutput& req_output) -> bool {
if (req_output.status.has_value()) {
const auto& status = req_output.status.value();
if (!status.ok()) {
return call->finish_with_error(status.code(), status.message());
}
}

return send_result_to_client_brpc(
call, request_id, created_time, model, req_output);
});
}

} // namespace xllm
42 changes: 42 additions & 0 deletions xllm/api_service/video_generation_service_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/* Copyright 2026 The xLLM Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

https://github.com/jd-opensource/xllm/blob/main/LICENSE

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#pragma once
#include <absl/container/flat_hash_set.h>

#include "api_service/api_service_impl.h"
#include "api_service/non_stream_call.h"
#include "video_generation.pb.h"

namespace xllm {

using VideoGenerationCall = NonStreamCall<proto::VideoGenerationRequest,
proto::VideoGenerationResponse>;
class DiTMaster;
// Handles /v1/video/generation requests
class VideoGenerationServiceImpl final
: public APIServiceImpl<VideoGenerationCall> {
public:
VideoGenerationServiceImpl(DiTMaster* master,
const std::vector<std::string>& models);

void process_async_impl(std::shared_ptr<VideoGenerationCall> call);

private:
DISALLOW_COPY_AND_ASSIGN(VideoGenerationServiceImpl);
DiTMaster* master_ = nullptr;
};

} // namespace xllm
14 changes: 14 additions & 0 deletions xllm/core/framework/batch/dit_batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ DiTForwardInput DiTBatch::prepare_forward_input() {
std::vector<torch::Tensor> control_images;
std::vector<torch::Tensor> latents;
std::vector<torch::Tensor> masked_image_latents;
std::vector<torch::Tensor> last_images;
std::vector<torch::Tensor> image_embeds;
const auto batch_size = request_vec_.size();
prompt_embeds.reserve(batch_size);
pooled_prompt_embeds.reserve(batch_size);
Expand All @@ -76,6 +78,8 @@ DiTForwardInput DiTBatch::prepare_forward_input() {
control_images.reserve(batch_size);
latents.reserve(batch_size);
masked_image_latents.reserve(batch_size);
last_images.reserve(batch_size);
image_embeds.reserve(batch_size);
for (const auto& request : request_vec_) {
const auto& generation_params = request->state().generation_params();
if (input.generation_params != generation_params) {
Expand Down Expand Up @@ -109,6 +113,8 @@ DiTForwardInput DiTBatch::prepare_forward_input() {
mask_images.emplace_back(input_params.mask_image);
condition_images.emplace_back(input_params.condition_image);
control_images.emplace_back(input_params.control_image);
last_images.emplace_back(input_params.last_image);
image_embeds.emplace_back(input_params.image_embeds);
}

if (input.prompts.size() != request_vec_.size()) {
Expand Down Expand Up @@ -167,6 +173,14 @@ DiTForwardInput DiTBatch::prepare_forward_input() {
if (check_tensors_valid(masked_image_latents)) {
input.masked_image_latents = torch::stack(masked_image_latents);
}

if (check_tensors_valid(last_images)) {
input.last_images = torch::stack(last_images);
}

if (check_tensors_valid(image_embeds)) {
input.image_embeds = torch::stack(image_embeds);
}
return input;
}

Expand Down
1 change: 1 addition & 0 deletions xllm/core/framework/request/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,5 @@ cc_library(
proto::xllm_proto
torch
${OpenCV_LIBS}
${FFMPEG_LIBRARIES}
)
21 changes: 16 additions & 5 deletions xllm/core/framework/request/dit_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ void DiTRequest::log_statistic(double total_latency) {
}

void DiTRequest::handle_forward_output(torch::Tensor output) {
int count = state_.generation_params().num_images_per_prompt;
int count = state_.generation_params().num_images_per_prompt *
state_.generation_params().num_videos_per_prompt;
output_.tensors = torch::chunk(output, count);
}

Expand All @@ -69,12 +70,22 @@ const DiTRequestOutput DiTRequest::generate_output() {
result.width = state_.generation_params().width;
result.seed = state_.generation_params().seed;

OpenCVImageEncoder encoder;
int count = state_.generation_params().num_images_per_prompt;
OpenCVImageEncoder img_encoder;
FFmpegVideoEncoder vid_encoder;
int count = state_.generation_params().num_images_per_prompt *
state_.generation_params().num_videos_per_prompt;
for (size_t idx = 0; idx < count; ++idx) {
torch::Tensor image =
torch::Tensor tensor =
output_.tensors[idx].squeeze(0).cpu().to(torch::kFloat32).contiguous();
encoder.encode(image, result.image);
if (tensor.dim() == 4 || state_.generation_params().force_video_output) {
vid_encoder.encode(
tensor, state_.generation_params().video_fps, "mp4", result.data);
result.num_frames =
tensor.dim() == 4 ? static_cast<int32_t>(tensor.size(0)) : 0;
result.video_fps = state_.generation_params().video_fps;
} else {
img_encoder.encode(tensor, result.data);
}
output.outputs.push_back(result);
}

Expand Down
Loading
Loading