diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f9c059e..b88cc9b 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -39,6 +39,14 @@ jobs: make -j$(nproc --all) + - name: Download test data + run: | + cd build + python3 -m pip install --upgrade pip + python3 -m pip install gdown + gdown https://drive.google.com/uc?id=1tCgUEbdlLLad-ThZ5PO9gJyy9X5DCaUT + tar -xf test_data.tar.gz + - name: Generate code run: | export LD_LIBRARY_PATH=${{ github.workspace }}/halide-install/lib:$LD_LIBRARY_PATH diff --git a/include/algos.hpp b/include/algos.hpp index d6c9f0c..482d91b 100644 --- a/include/algos.hpp +++ b/include/algos.hpp @@ -29,6 +29,12 @@ void ascii_art_halide(uint8_t* src, uint8_t* dst, int input_height, int input_wi void julia_ref(uint8_t* dst, int height, int width); void halide_julia(uint8_t* dst, int height, int width); +void LaplacianFilter(cv::Mat src, cv::Mat dst, int height, int width); +void standartDeviation(cv::Mat src, cv::Mat dst, int height, int width); +void wellExp(cv::Mat src, cv::Mat dst, int height, int width); +void weightsImage(cv::Mat src1, cv::Mat src2, cv::Mat src3, cv::Mat dst, int height, int width); +void weight_sum(cv::Mat src1, cv::Mat src2, cv::Mat src3, cv::Mat src4, cv::Mat dst, int height, int width); + #ifdef HAVE_OPENCV_DNN void convolution_nchw_halide(float* src, float* kernel, float* dst, int inpChannels, int outChannels, int height, int width); diff --git a/perf/perf_hdr.cpp b/perf/perf_hdr.cpp new file mode 100644 index 0000000..40e09a9 --- /dev/null +++ b/perf/perf_hdr.cpp @@ -0,0 +1,145 @@ +#include + +#include "algos.hpp" + +using namespace cv; + +PERF_TEST(hdr, halide) { + + Mat image1 = imread("1.jpg"); + Mat image2 = imread("2.jpg"); + Mat image3 = imread("3.jpg"); + Mat image4 = imread("4.jpg"); + + image1.convertTo(image1, CV_32F, 1.0f/255.0f); + image2.convertTo(image2, CV_32F, 1.0f/255.0f); + image3.convertTo(image3, CV_32F, 1.0f/255.0f); + image4.convertTo(image4, CV_32F, 1.0f/255.0f); + + Mat image1Gray; + Mat image2Gray; + Mat image3Gray; + Mat image4Gray; + + cvtColor(image1, image1Gray, cv::COLOR_BGR2GRAY); + cvtColor(image2, image2Gray, cv::COLOR_BGR2GRAY); + cvtColor(image3, image3Gray, cv::COLOR_BGR2GRAY); + cvtColor(image4, image4Gray, cv::COLOR_BGR2GRAY); + + int imageWidth = image1Gray.cols; + int imageHeigth = image1Gray.rows; + + int size = imageHeigth * imageHeigth; + + Mat laplaced1(imageHeigth, imageWidth, CV_32F); + Mat laplaced2(imageHeigth, imageWidth, CV_32F); + Mat laplaced3(imageHeigth, imageWidth, CV_32F); + Mat laplaced4(imageHeigth, imageWidth, CV_32F); + + Mat stDev1(imageHeigth, imageWidth, CV_32F); + Mat stDev2(imageHeigth, imageWidth, CV_32F); + Mat stDev3(imageHeigth, imageWidth, CV_32F); + Mat stDev4(imageHeigth, imageWidth, CV_32F); + + Mat we1(imageHeigth, imageWidth, CV_32F); + Mat we2(imageHeigth, imageWidth, CV_32F); + Mat we3(imageHeigth, imageWidth, CV_32F); + Mat we4(imageHeigth, imageWidth, CV_32F); + + Mat weights1(imageHeigth, imageWidth, CV_32F); + Mat weights2(imageHeigth, imageWidth, CV_32F); + Mat weights3(imageHeigth, imageWidth, CV_32F); + Mat weights4(imageHeigth, imageWidth, CV_32F); + + Mat weights_sum(imageHeigth, imageWidth, CV_32F); + + PERF_SAMPLE_BEGIN() + + LaplacianFilter(image1Gray, laplaced1, imageHeigth, imageWidth); // LaplacianFilter - 1920x1080x1 as result + LaplacianFilter(image2Gray, laplaced2, imageHeigth, imageWidth); + LaplacianFilter(image3Gray, laplaced3, imageHeigth, imageWidth); + LaplacianFilter(image4Gray, laplaced4, imageHeigth, imageWidth); + + laplaced1 = abs(laplaced1); //absolute value + laplaced2 = abs(laplaced2); + laplaced3 = abs(laplaced3); + laplaced4 = abs(laplaced4); + + standartDeviation(image1, stDev1, imageHeigth, imageWidth); // Calculation of standrat deviation - 1920x1080x1 as result + standartDeviation(image2, stDev2, imageHeigth, imageWidth); + standartDeviation(image3, stDev3, imageHeigth, imageWidth); + standartDeviation(image4, stDev4, imageHeigth, imageWidth); + + wellExp(image1, we1, imageHeigth, imageWidth); // Calculation of well-exposedness - 1920x1080x1 as result + wellExp(image2, we2, imageHeigth, imageWidth); + wellExp(image3, we3, imageHeigth, imageWidth); + wellExp(image4, we4, imageHeigth, imageWidth); + + weightsImage(laplaced1, stDev1, we1, weights1, imageHeigth, imageWidth); // Calculation of weight map - 1920x1080x1 as result + weightsImage(laplaced2, stDev2, we2, weights2, imageHeigth, imageWidth); + weightsImage(laplaced3, stDev3, we3, weights3, imageHeigth, imageWidth); + weightsImage(laplaced4, stDev4, we4, weights4, imageHeigth, imageWidth); + + weight_sum(weights1, weights2, weights3, weights4, weights_sum, imageHeigth, imageWidth); + + PERF_SAMPLE_END() + + SANITY_CHECK_NOTHING(); + + imwrite("laplaced1.jpg", laplaced1 * 255); //write laplaced images + imwrite("laplaced2.jpg", laplaced2 * 255); + imwrite("laplaced3.jpg", laplaced3 * 255); + imwrite("laplaced4.jpg", laplaced4 * 255); + + std::cout << "check weights sum" << std::endl; + + std::vector weightsVec = {weights1, weights2, weights3, weights4}; + + std::vector imagesVec = {image1, image2, image3, image4}; + + //opencv code + + int maxlevel = static_cast(logf(static_cast(min(imageWidth, imageHeigth))) / logf(2.0f)); + std::vector res_pyr(maxlevel + 1); + std::vector res_pyr_mutexes(maxlevel + 1); + + parallel_for_(Range(0, static_cast(imagesVec.size())), [&](const Range& range) { + for(int i = range.start; i < range.end; i++) { + weightsVec[i] /= weights_sum; + + std::vector img_pyr, weight_pyr; + buildPyramid(imagesVec[i], img_pyr, maxlevel); + buildPyramid(weightsVec[i], weight_pyr, maxlevel); + + for(int lvl = 0; lvl < maxlevel; lvl++) { + Mat up; + pyrUp(img_pyr[lvl + 1], up, img_pyr[lvl].size()); + img_pyr[lvl] -= up; + } + for(int lvl = 0; lvl <= maxlevel; lvl++) { + std::vector splitted(3); + split(img_pyr[lvl], splitted); + for(int c = 0; c < 3; c++) { + splitted[c] = splitted[c].mul(weight_pyr[lvl]); + } + merge(splitted, img_pyr[lvl]); + + AutoLock lock(res_pyr_mutexes[lvl]); + if(res_pyr[lvl].empty()) { + res_pyr[lvl] = img_pyr[lvl]; + } else { + res_pyr[lvl] += img_pyr[lvl]; + } + } + } + }); + for(int lvl = maxlevel; lvl > 0; lvl--) { + Mat up; + pyrUp(res_pyr[lvl], up, res_pyr[lvl - 1].size()); + res_pyr[lvl - 1] += up; + } + + Mat dst = res_pyr[0]; + + imwrite("FinalImage.jpg", dst * 255); +} diff --git a/src/mertens.cpp b/src/mertens.cpp new file mode 100644 index 0000000..7ad5a02 --- /dev/null +++ b/src/mertens.cpp @@ -0,0 +1,285 @@ +#include +#include + +#ifdef __riscv + +#include +#include "halide_laplacianFilter_rv.h" +#include "halide_standartDeviation_rv.h" +#include "halide_wellExp_rv.h" +#include "halide_weightsImage_rv.h" +#include "halide_weight_sum_rv.h" + +using namespace cv; +using namespace Halide::Runtime; + +#else + +#include +#include +#include + +using namespace cv; +using namespace Halide; + +#endif + +void LaplacianFilter(Mat src, Mat dst, int height, int width) { + + float filter[3][3] = {{0, -1, 0}, {-1, 4, -1}, {0, -1, 0}}; + Buffer input(src.ptr(), {width, height}); + Buffer output(dst.ptr(), {width, height}); + Buffer weights(filter); + +#ifdef __riscv + halide_laplacianFilter_rv(input, output); +#else + static Func f("contrast"); +try { + if (!f.defined()) { + Var x("x"), y("y"), c("c"); + + RDom r(-1, 3, -1, 3); + + Func input1 = BoundaryConditions::constant_exterior(input, 0); + + f(x, y) = sum(input1(x + r.x, y + r.y) * weights(r.x + 1, r.y + 1)); + + f.vectorize(x, 4); + + Target target; + target.os = Target::OS::Linux; + target.arch = Target::Arch::RISCV; + target.bits = 64; + target.vector_bits = 128; + + // Tested XuanTie C906 has 128-bit vector unit + CV_Assert(target.vector_bits <= 128); + + std::vector features; + features.push_back(Target::RVV); + features.push_back(Target::NoAsserts); + features.push_back(Target::NoRuntime); + target.set_features(features); + + std::cout << target << std::endl; + f.print_loop_nest(); + + f.compile_to_header("halide_laplacianFilter_rv.h", {input}, "halide_laplacianFilter_rv", target); + f.compile_to_assembly("halide_laplacianFilter_rv.s", {input}, "halide_laplacianFilter_rv", target); + } +} catch (const Halide::Error& ex) { + std::cout << ex.what() << std::endl; + exit(1); +} +#endif +} + +void standartDeviation(Mat src, Mat dst, int height, int width) { + + Buffer input = Buffer::make_interleaved(src.ptr(), width, height, 3); + Buffer output(dst.ptr(), {width, height}); + +#ifdef __riscv + halide_standartDeviation_rv(input, output); +#else + static Func f("deviation"); + Func mean; + +try { + if (!f.defined()) { + Var x("x"), y("y"), c("c"); + + mean(x, y) = cast(input(x, y, 0) + input(x, y, 1) + input(x, y, 2))/3; + + Expr r = mean(x, y) - input(x, y, 0); + Expr g = mean(x, y) - input(x, y, 1); + Expr b = mean(x, y) - input(x, y, 2); + + f(x, y) = sqrt(r*r + g*g + b*b); + + f.vectorize(x, 4); + + Target target; + target.os = Target::OS::Linux; + target.arch = Target::Arch::RISCV; + target.bits = 64; + target.vector_bits = 128; + + // Tested XuanTie C906 has 128-bit vector unit + CV_Assert(target.vector_bits <= 128); + + std::vector features; + features.push_back(Target::RVV); + features.push_back(Target::NoAsserts); + features.push_back(Target::NoRuntime); + target.set_features(features); + + std::cout << target << std::endl; + f.print_loop_nest(); + + f.compile_to_header("halide_standartDeviation_rv.h", {input}, "halide_standartDeviation_rv", target); + f.compile_to_assembly("halide_standartDeviation_rv.s", {input}, "halide_standartDeviation_rv", target); + } +} catch (const Halide::Error& ex) { + std::cout << ex.what() << std::endl; + exit(1); +} +#endif +} + +void wellExp(Mat src, Mat dst, int height, int width) { + + Buffer input = Buffer::make_interleaved(src.ptr(), width, height, 3); + Buffer output(dst.ptr(), {width, height}); + +#ifdef __riscv + halide_wellExp_rv(input, output); + +#else + static Func f("well-exposedness"); +try { + if (!f.defined()) { + + Var x("x"), y("y"), c("c"); + + Expr r = pow(input(x, y, 0) - 0.5f, 2.0f); + Expr g = pow(input(x, y, 1) - 0.5f, 2.0f); + Expr b = pow(input(x, y, 2) - 0.5f, 2.0f); + + r = -r / 0.08f; + g = -g / 0.08f; + b = -b / 0.08f; + + r = exp(r); + g = exp(g); + b = exp(b); + + f(x, y) = r * g * b; + + f.vectorize(x, 4); + + Target target; + target.os = Target::OS::Linux; + target.arch = Target::Arch::RISCV; + target.bits = 64; + target.vector_bits = 128; + + // Tested XuanTie C906 has 128-bit vector unit + CV_Assert(target.vector_bits <= 128); + + std::vector features; + features.push_back(Target::RVV); + features.push_back(Target::NoAsserts); + features.push_back(Target::NoRuntime); + target.set_features(features); + + std::cout << target << std::endl; + f.print_loop_nest(); + + f.compile_to_header("halide_wellExp_rv.h", {input}, "halide_wellExp_rv", target); + f.compile_to_assembly("halide_wellExp_rv.s", {input}, "halide_wellExp_rv", target); + } +} catch (const Halide::Error& ex) { + std::cout << ex.what() << std::endl; + exit(1); +} +#endif +} + +void weightsImage(Mat contrast, Mat saturation, Mat wellexp, Mat dst, int height, int width) { + Buffer input1(contrast.ptr(), {width, height}); + Buffer input2(saturation.ptr(), {width, height}); + Buffer output(dst.ptr(), {width, height}); + +#ifdef __riscv + halide_weightsImage_rv(input1, input2, output); + +#else + static Func f("weights"); +try { + if (!f.defined()) { + + Var x("x"), y("y"), c("c"); + + f(x, y) = input1(x, y) * input2(x, y) + 1e-12f; + + f.vectorize(x, 4); + + Target target; + target.os = Target::OS::Linux; + target.arch = Target::Arch::RISCV; + target.bits = 64; + target.vector_bits = 128; + + // Tested XuanTie C906 has 128-bit vector unit + CV_Assert(target.vector_bits <= 128); + + std::vector features; + features.push_back(Target::RVV); + features.push_back(Target::NoAsserts); + features.push_back(Target::NoRuntime); + target.set_features(features); + + std::cout << target << std::endl; + f.print_loop_nest(); + + f.compile_to_header("halide_weightsImage_rv.h", {input1, input2}, "halide_weightsImage_rv", target); + f.compile_to_assembly("halide_weightsImage_rv.s", {input1, input2}, "halide_weightsImage_rv", target); + } +} catch (const Halide::Error& ex) { + std::cout << ex.what() << std::endl; + exit(1); +} +#endif +} + +void weight_sum(Mat src1, Mat src2, Mat src3, Mat src4, Mat dst, int height, int width) { + Buffer input1(src1.ptr(), {width, height}); + Buffer input2(src2.ptr(), {width, height}); + Buffer input3(src3.ptr(), {width, height}); + Buffer input4(src4.ptr(), {width, height}); + Buffer output(dst.ptr(), {width, height}); + +#ifdef __riscv + halide_weight_sum_rv(input1, input2, input3, input4, output); + +#else + static Func f("summary"); + +try { + if (!f.defined()) { + + Var x("x"), y("y"), c("c"); + + f(x, y) = input1(x, y) + input2(x, y) + input3(x, y) + input4(x, y); + + f.vectorize(x, 4); + + Target target; + target.os = Target::OS::Linux; + target.arch = Target::Arch::RISCV; + target.bits = 64; + target.vector_bits = 128; + + // Tested XuanTie C906 has 128-bit vector unit + CV_Assert(target.vector_bits <= 128); + + std::vector features; + features.push_back(Target::RVV); + features.push_back(Target::NoAsserts); + features.push_back(Target::NoRuntime); + target.set_features(features); + + std::cout << target << std::endl; + f.print_loop_nest(); + + f.compile_to_header("halide_weight_sum_rv.h", {input1, input2, input3, input4}, "halide_weight_sum_rv", target); + f.compile_to_assembly("halide_weight_sum_rv.s", {input1, input2, input3, input4}, "halide_weight_sum_rv", target); + } +} catch (const Halide::Error& ex) { + std::cout << ex.what() << std::endl; + exit(1); +} +#endif +} diff --git a/test/test_hdr.cpp b/test/test_hdr.cpp new file mode 100644 index 0000000..14473de --- /dev/null +++ b/test/test_hdr.cpp @@ -0,0 +1,148 @@ +#include + +#include "algos.hpp" + +using namespace cv; + +TEST(hdr, halide) { + + Mat image1 = imread("1.jpg"); + Mat image2 = imread("2.jpg"); + Mat image3 = imread("3.jpg"); + Mat image4 = imread("4.jpg"); + + image1.convertTo(image1, CV_32F, 1.0f/255.0f); + image2.convertTo(image2, CV_32F, 1.0f/255.0f); + image3.convertTo(image3, CV_32F, 1.0f/255.0f); + image4.convertTo(image4, CV_32F, 1.0f/255.0f); + + Mat image1Gray; + Mat image2Gray; + Mat image3Gray; + Mat image4Gray; + + cvtColor(image1, image1Gray, cv::COLOR_BGR2GRAY); + cvtColor(image2, image2Gray, cv::COLOR_BGR2GRAY); + cvtColor(image3, image3Gray, cv::COLOR_BGR2GRAY); + cvtColor(image4, image4Gray, cv::COLOR_BGR2GRAY); + + int imageWidth = image1Gray.cols; + int imageHeigth = image1Gray.rows; + + int size = imageHeigth * imageHeigth; + + Mat laplaced1(imageHeigth, imageWidth, CV_32F); + Mat laplaced2(imageHeigth, imageWidth, CV_32F); + Mat laplaced3(imageHeigth, imageWidth, CV_32F); + Mat laplaced4(imageHeigth, imageWidth, CV_32F); + + LaplacianFilter(image1Gray, laplaced1, imageHeigth, imageWidth); // LaplacianFilter - 1920x1080x1 as result + LaplacianFilter(image2Gray, laplaced2, imageHeigth, imageWidth); + LaplacianFilter(image3Gray, laplaced3, imageHeigth, imageWidth); + LaplacianFilter(image4Gray, laplaced4, imageHeigth, imageWidth); + + laplaced1 = abs(laplaced1); // Calculation of absolute values - 1920x1080x1 as result + laplaced2 = abs(laplaced2); + laplaced3 = abs(laplaced3); + laplaced4 = abs(laplaced4); + + imwrite("laplaced1.jpg", laplaced1 * 255); //write laplaced images + imwrite("laplaced2.jpg", laplaced2 * 255); + imwrite("laplaced3.jpg", laplaced3 * 255); + imwrite("laplaced4.jpg", laplaced4 * 255); + + std::cout << "check LaplacedFilter" << std::endl; + + Mat stDev1(imageHeigth, imageWidth, CV_32F); + Mat stDev2(imageHeigth, imageWidth, CV_32F); + Mat stDev3(imageHeigth, imageWidth, CV_32F); + Mat stDev4(imageHeigth, imageWidth, CV_32F); + + standartDeviation(image1, stDev1, imageHeigth, imageWidth); // Calculation of standrat deviation - 1920x1080x1 as result + standartDeviation(image2, stDev2, imageHeigth, imageWidth); + standartDeviation(image3, stDev3, imageHeigth, imageWidth); + standartDeviation(image4, stDev4, imageHeigth, imageWidth); + + std::cout << "check standartDeviation" << std::endl; + + Mat we1(imageHeigth, imageWidth, CV_32F); + Mat we2(imageHeigth, imageWidth, CV_32F); + Mat we3(imageHeigth, imageWidth, CV_32F); + Mat we4(imageHeigth, imageWidth, CV_32F); + + wellExp(image1, we1, imageHeigth, imageWidth); // Calculation of well-exposedness - 1920x1080x1 as result + wellExp(image2, we2, imageHeigth, imageWidth); + wellExp(image3, we3, imageHeigth, imageWidth); + wellExp(image4, we4, imageHeigth, imageWidth); + + std::cout << "check well-exposedness" << std::endl; + + Mat weights1(imageHeigth, imageWidth, CV_32F); + Mat weights2(imageHeigth, imageWidth, CV_32F); + Mat weights3(imageHeigth, imageWidth, CV_32F); + Mat weights4(imageHeigth, imageWidth, CV_32F); + + weightsImage(laplaced1, stDev1, we1, weights1, imageHeigth, imageWidth); // Calculation of weight map - 1920x1080x1 as result + weightsImage(laplaced2, stDev2, we2, weights2, imageHeigth, imageWidth); + weightsImage(laplaced3, stDev3, we3, weights3, imageHeigth, imageWidth); + weightsImage(laplaced4, stDev4, we4, weights4, imageHeigth, imageWidth); + + std::vector weightsVec = {weights1, weights2, weights3, weights4}; + + std::vector imagesVec = {image1, image2, image3, image4}; + + std::cout << "check weights" << std::endl; + + Mat weights_sum(imageHeigth, imageWidth, CV_32F); + + weight_sum(weights1, weights2, weights3, weights4, weights_sum, imageHeigth, imageWidth); + + std::cout << "check weights sum" << std::endl; + + //opencv code + + int maxlevel = static_cast(logf(static_cast(min(imageWidth, imageHeigth))) / logf(2.0f)); + std::vector res_pyr(maxlevel + 1); + std::vector res_pyr_mutexes(maxlevel + 1); + + parallel_for_(Range(0, static_cast(imagesVec.size())), [&](const Range& range) { + for(int i = range.start; i < range.end; i++) { + weightsVec[i] /= weights_sum; + + std::vector img_pyr, weight_pyr; + buildPyramid(imagesVec[i], img_pyr, maxlevel); + buildPyramid(weightsVec[i], weight_pyr, maxlevel); + + for(int lvl = 0; lvl < maxlevel; lvl++) { + Mat up; + pyrUp(img_pyr[lvl + 1], up, img_pyr[lvl].size()); + img_pyr[lvl] -= up; + } + for(int lvl = 0; lvl <= maxlevel; lvl++) { + std::vector splitted(3); + split(img_pyr[lvl], splitted); + for(int c = 0; c < 3; c++) { + splitted[c] = splitted[c].mul(weight_pyr[lvl]); + } + merge(splitted, img_pyr[lvl]); + + AutoLock lock(res_pyr_mutexes[lvl]); + if(res_pyr[lvl].empty()) { + res_pyr[lvl] = img_pyr[lvl]; + } else { + res_pyr[lvl] += img_pyr[lvl]; + } + } + } + }); + for(int lvl = maxlevel; lvl > 0; lvl--) { + Mat up; + pyrUp(res_pyr[lvl], up, res_pyr[lvl - 1].size()); + res_pyr[lvl - 1] += up; + } + + Mat dst = res_pyr[0]; + + imwrite("FinalImage.jpg", dst * 255); + +} \ No newline at end of file diff --git a/test/test_main.cpp b/test/test_main.cpp index 9fadfbd..d748d09 100644 --- a/test/test_main.cpp +++ b/test/test_main.cpp @@ -165,7 +165,7 @@ TEST(convolution_nhwc, halide) { TEST(idw, halide) { - Mat src(height, width, CV_8U); + Mat src(height, width, CV_8U); Mat dst(height, width, CV_8U), cl_dst(height, width, CV_8UC3), dst_h(height, width, CV_8U), cl_dst_h(height, width, CV_8UC3); // randu(src, 0, 256); @@ -185,4 +185,4 @@ TEST(idw, halide) { imwrite("cres.png", cl_dst); imwrite("res_halide.png", dst_h); imwrite("cres_halide.png", cl_dst_h); -} \ No newline at end of file +}