-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathvideo_depth_estimation.cpp
More file actions
140 lines (115 loc) · 5.14 KB
/
video_depth_estimation.cpp
File metadata and controls
140 lines (115 loc) · 5.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#include "depth_anything.hpp"
#include <opencv2/opencv.hpp>
#include <iostream>
#include <vector>
#include <string>
#include <chrono>
#include <exception> // Needed for std::exception
void processAndWriteFrame(const cv::Mat& frame, const cv::Mat& depthMap, cv::VideoWriter& writer) {
if (depthMap.empty()) {
std::cerr << "Warning: Skipping empty depth map.\n";
return;
}
// Normalize and colorize the depth map
cv::Mat depthVis, depthColor, combined;
cv::normalize(depthMap, depthVis, 0, 255, cv::NORM_MINMAX, CV_8U);
cv::applyColorMap(depthVis, depthColor, cv::COLORMAP_JET);
// Resize depthColor to match the frame if needed
if (depthColor.size() != frame.size()) {
cv::resize(depthColor, depthColor, frame.size());
}
// Horizontally concatenate the original frame and the colorized depth map
cv::hconcat(frame, depthColor, combined);
writer.write(combined);
}
int main(int argc, char* argv[]) {
if (argc < 4) {
std::cerr << "Usage: " << argv[0]
<< " <path_to_model.onnx> <path_to_input_video> <path_to_output_video>\n";
return -1;
}
std::string modelPath = argv[1];
std::string inputVideoPath = argv[2];
std::string outputVideoPath = argv[3];
// Ensure the output file has an .mp4 extension
if (outputVideoPath.substr(outputVideoPath.find_last_of(".") + 1) != "mp4") {
outputVideoPath += ".mp4";
std::cout << "Output video path adjusted to: " << outputVideoPath << std::endl;
}
try {
// === Initialize DepthAnything ===
bool useCuda = true; // Set to false to disable GPU
DepthAnything depthEstimator(modelPath, useCuda);
// === Open the input video ===
cv::VideoCapture cap(inputVideoPath);
if (!cap.isOpened()) {
std::cerr << "Error: Cannot open the video file: " << inputVideoPath << std::endl;
return -1;
}
// Define the target dimensions for processing (required by the depth model)
int targetWidth = 518;
int targetHeight = 518;
double fps = cap.get(cv::CAP_PROP_FPS);
// Choose codec: try H.264 first; fallback to mp4v if needed
int fourcc = cv::VideoWriter::fourcc('H', '2', '6', '4');
if (fourcc == -1) {
fourcc = cv::VideoWriter::fourcc('m', 'p', '4', 'v');
}
// Create the VideoWriter using the target dimensions.
// Note: the output frame is the horizontal concatenation of two frames.
cv::VideoWriter writer;
writer.open(outputVideoPath, fourcc, fps, cv::Size(targetWidth * 2, targetHeight), true);
if (!writer.isOpened()) {
std::cerr << "Error: Cannot open output writer: " << outputVideoPath << std::endl;
return -1;
}
writer.set(cv::VIDEOWRITER_PROP_QUALITY, 95); // Set quality (0-100)
std::cout << "Processing video: " << inputVideoPath << std::endl;
std::cout << "Output video: " << outputVideoPath << " (Codec: " << fourcc << ")" << std::endl;
// === Batch processing loop ===
const int batchSize = 16;
std::vector<cv::Mat> batchFrames;
std::vector<cv::Mat> batchDepths;
int frameCount = 0;
while (true) {
cv::Mat frame;
if (!cap.read(frame)) break;
// Resize the frame to the target dimensions required by the depth model
cv::Mat resized;
cv::resize(frame, resized, cv::Size(targetWidth, targetHeight));
batchFrames.push_back(resized);
if (batchFrames.size() == batchSize) {
auto start = std::chrono::high_resolution_clock::now();
batchDepths = depthEstimator.predictBatch(batchFrames);
auto end = std::chrono::high_resolution_clock::now();
std::cout << "Batch of " << batchSize << " processed in "
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
<< " ms\n";
for (size_t i = 0; i < batchFrames.size(); ++i) {
frameCount++;
std::cout << "Writing frame " << frameCount << "\n";
processAndWriteFrame(batchFrames[i], batchDepths[i], writer);
}
batchFrames.clear();
batchDepths.clear();
}
}
// Process any leftover frames in the final batch
if (!batchFrames.empty()) {
batchDepths = depthEstimator.predictBatch(batchFrames);
for (size_t i = 0; i < batchFrames.size(); ++i) {
frameCount++;
std::cout << "Writing frame " << frameCount << "\n";
processAndWriteFrame(batchFrames[i], batchDepths[i], writer);
}
}
cap.release();
writer.release();
std::cout << "Done. Total frames processed: " << frameCount << std::endl;
std::cout << "Output saved to: " << outputVideoPath << std::endl;
} catch (const std::exception& e) {
std::cerr << "Exception during processing: " << e.what() << std::endl;
return -1;
}
return 0;
}