diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100755 new mode 100644 index 4e3a795..619c307 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,12 +1,16 @@ -cmake_minimum_required(VERSION 3.5) - -set(PROJECT_ID gesture_controller) -PROJECT (${PROJECT_ID}) +cmake_minimum_required(VERSION 3.28) set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) -# add_compile_options(-Wall -Wextra -Werror -Wpedantic -pedantic-errors -Wconversion) +set(PROJECT_ID delta_robot_example) + +PROJECT (${PROJECT_ID}) + +set(PROJECT_BASE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) +set(SOFTWARE_SRC_PATH ${PROJECT_BASE_PATH}/src) +file(GLOB_RECURSE SOFTWARE_SRC "${SOFTWARE_SRC_PATH}/*.*") + set(USE_ASAN ON) set(BUILD_TESTS ON) @@ -14,66 +18,20 @@ set(BUILD_TESTS ON) if ((DEFINED USE_ASAN) AND (USE_ASAN STREQUAL "ON")) message(STATUS "Using AddressSanitizer (ASan).") if (UNIX) - # message(STATUS "ASAN_OPTIONS = $ENV{ASAN_OPTIONS}") - # set(ENV{ASAN_OPTIONS} allocator_may_return_null=1) add_compile_options(-fsanitize=address -fno-omit-frame-pointer) add_link_options(-fsanitize=address) endif(UNIX) endif() -find_package(spdlog REQUIRED) -find_package(OpenCV REQUIRED PATHS /usr/local/debug/) - -set( MAIN_SRC - src/main.cpp - src/controller.cpp - src/device.cpp - src/face_detection.cpp - src/gesture_detection.cpp - src/detection.cpp) -set(MAIN_EXEC ${PROJECT_ID}__main) - -# cmrc_add_resource_library(models ALIAS models::rc NAMESPACE models -# resources/models/haarcascade_frontalface_default.xml -# resources/models/resnet18.onnx -# ) - -# cmrc_add_resource_library(test_res ALIAS test_res::rc NAMESPACE test_res -# test/data/test.png -# test/data/test_face.jpg -# resources/models/haarcascade_frontalface_default.xml -# resources/models/resnet18.onnx -# ) - -add_executable( ${MAIN_EXEC} ${MAIN_SRC} ) -target_link_libraries(${MAIN_EXEC} PUBLIC ${OpenCV_LIBS} spdlog::spdlog) -target_compile_options(${MAIN_EXEC} PRIVATE -Werror -Wall -Wextra) -INCLUDE_DIRECTORIES( ${CMAKE_CURRENT_SOURCE_DIR}/include ${OpenCV_INCLUDE_DIRS}) -link_directories( ${CMAKE_BINARY_DIR}/bin) -set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin) +find_package(OpenCV REQUIRED) +find_package(open62541 REQUIRED) -if ((DEFINED BUILD_TESTS) AND (BUILD_TESTS STREQUAL "ON")) - include(FetchContent) - FetchContent_Declare( - googletest - URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip - ) - FetchContent_MakeAvailable(googletest) - enable_testing() +include(FetchContent) +FetchContent_Declare( + spdlog + GIT_REPOSITORY https://github.com/gabime/spdlog.git + GIT_TAG v1.12.0 +) +FetchContent_MakeAvailable(spdlog) - set(SOURCES_FILES_TESTS - test/test_gesture_detection.cpp - test/test_face_detection.cpp - test/test_controller.cpp) - set( TEST_SRC - src/controller.cpp - src/device.cpp - src/face_detection.cpp - src/gesture_detection.cpp - src/detection.cpp) - set(EXEC_TEST ${PROJECT_ID}__test) - add_executable(${EXEC_TEST} ${TEST_SRC} ${SOURCES_FILES_TESTS} test/main_gtest.cpp) - target_link_libraries(${EXEC_TEST} PRIVATE GTest::gtest_main ${OpenCV_LIBS} spdlog::spdlog) - include(GoogleTest) - gtest_discover_tests(${EXEC_TEST}) -endif() \ No newline at end of file +add_subdirectory(examples) \ No newline at end of file diff --git a/README.md b/README.md index f7e0abb..44fba35 100755 --- a/README.md +++ b/README.md @@ -1,21 +1,52 @@ [![CMake](https://github.com/mboiar/gesture_controller/actions/workflows/cmake.yml/badge.svg?branch=main)](https://github.com/mboiar/gesture_controller/actions/workflows/cmake.yml) -# Gesture controller +# Gesture Controller -Modular, versatile gesture-based controller written in C++. +A modular, high‑performance gesture‑based controller written in C++. +It translates hand gestures captured by a camera into control commands for robots or drones, with built‑in support for **OPC UA** communication and real‑time video processing. -## Demo +## Examples -[![Watch the video](https://img.youtube.com/vi/OrVqN6P2TyY/hqdefault.jpg)](https://youtu.be/OrVqN6P2TyY) +| Drone Software‑in‑the‑Loop Simulation | Delta Robot PLC (OPC UA) | +|:-------------------------------------:|:-------------------------:| +| [![Watch the demo](https://img.youtube.com/vi/OrVqN6P2TyY/hqdefault.jpg)](https://youtu.be/OrVqN6P2TyY) | [![Watch the demo](doc/images/thumb.png)](https://youtube.com/shorts/yFY8yK7BDtM) | ## Features -- Easily remappable gesture commands -- Connect to any device over serial supporting camera video stream -- Good performance on resource constrained hardware -- Face detection with the HaarCascade model -- Gesture detection with the ResNet18 model trained on a custom dataset - -## Usage -Compile project with `cmake --build .` -Run tests with `bin/gesture_controller__test` -Run app with `bin/gesture_controller` + +- **Remappable gesture commands** – easily change which gesture triggers which action. +- **Multi‑device support** – connect to any device that provides a video stream and accepts commands (serial, OPC UA, etc.). +- **Resource‑efficient** – runs well on constrained hardware (e.g., Raspberry Pi, PLC Companion Computer). +- **Face detection** – using OpenCV’s Haar cascade to focus the gesture recognition region. +- **Gesture detection** – ResNet18 model trained on a custom dataset, executed with ONNX Runtime. +- **OPC UA integration** – read robot status (position, mode, servo OK) and send jog commands to industrial controllers. +- **Simulation mode** – test the control logic without hardware. +- **Asynchronous logging** – spdlog provides colour‑coded, thread‑safe logs. + +## Architecture + +The project is split into several modules: + +- **`Controller`** – gesture/face detection, and command dispatching. +- **`GenericDevice`** – abstract interface for any controllable device (simulated, serial, OPC UA). +- **`OPCUA_Device`** – concrete implementation that talks to an OPC UA server (e.g., a PLC controlling a delta robot). +- **Face & Gesture detectors** – wrappers around OpenCV and ONNX Runtime. + +## Dependencies + +- **C++17** compiler (gcc, clang) +- **CMake** 3.10+ +- **OpenCV** (≥4.5) +- **spdlog** -logging +- **open62541** – OPC UA client +- **argparse** – command‑line argument parsing + +## Building + +``` +git clone https://github.com/mboiar/gesture_controller.git +cd gesture_controller +mkdir build && cd build +cmake .. +make -j$(nproc) +``` + diff --git a/doc/images/thumb.png b/doc/images/thumb.png new file mode 100644 index 0000000..72abf05 Binary files /dev/null and b/doc/images/thumb.png differ diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000..41cba19 --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(DeltaRobot) \ No newline at end of file diff --git a/examples/DeltaRobot/CMakeLists.txt b/examples/DeltaRobot/CMakeLists.txt new file mode 100755 index 0000000..390d2f0 --- /dev/null +++ b/examples/DeltaRobot/CMakeLists.txt @@ -0,0 +1,15 @@ +set(NAME DeltaRobotExample) + +add_executable(${NAME}) + +target_sources(${NAME} PRIVATE + ${SOFTWARE_SRC} +) + +target_include_directories(${NAME} PRIVATE ${SOFTWARE_SRC_PATH}) +target_compile_definitions(${NAME} PRIVATE UA_ENABLE_AMALGAMATION) + +target_include_directories(${NAME} PUBLIC ${OpenCV_INCLUDE_DIRS}) +link_directories( ${CMAKE_BINARY_DIR}/bin) +target_link_libraries(${NAME} PUBLIC ${OpenCV_LIBS} spdlog::spdlog) +set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin) diff --git a/examples/DeltaRobot/main.cpp b/examples/DeltaRobot/main.cpp new file mode 100755 index 0000000..67681c0 --- /dev/null +++ b/examples/DeltaRobot/main.cpp @@ -0,0 +1,99 @@ +#include "spdlog/spdlog.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "core/controller.hpp" +#include + +using std::cerr; +using std::cout; +using std::endl; +using std::string; +using std::vector; + +int main(int argc, char *argv[]) { + + std::cout << "test" << std::endl; + + auto logger_ = spdlog::get("MAIN"); + + if (!logger_) { + logger_ = spdlog::stdout_color_mt("MAIN"); + } + logger_->set_level(spdlog::level::info); + + logger_->info("Parsing input arguments"); + + argparse::ArgumentParser parser("controller"); + parser.add_argument("-v", "--verbose") + .help("Display additional information during execution") + .default_value(false) + .implicit_value(true); + + parser.add_argument("--log-level") + .help("Choose logging level") + .scan<'d', int>() + .default_value(0); + + parser.add_argument("--save-video") + .help("Save video feed to a specified file") + .default_value(string{""}); + + parser.add_argument("mode") + .help("Choose operation mode") + .action([](const string &value) { + static const vector choices = {"SIM", "WEBCAM"}; + if (std::find(choices.begin(), choices.end(), value) != choices.end()) { + return value; + } + throw std::invalid_argument("Choose a valid mode option."); + }); + + parser.add_argument("--server-path") + .help("Save video feed to a specified file") + .default_value(string{"127.0.0.1:4840"}); + + parser.add_description("Control a delta robot with gestures."); + + try { + parser.parse_args(argc, argv); + } catch (const std::runtime_error &err) { + logger_->error(err.what()); + return EXIT_FAILURE; + } + + string mode = parser.get("mode"); + string server_addr = parser.get("--server-path"); + int log_level = parser.get("--log-level"); + string video_filepath = parser.get("--save-video"); + + spdlog::set_level(static_cast(log_level)); + + logger_->info("Connecting to device"); + std::string opc_ua_server_name = "opc.tcp://" + server_addr; + Device device = OPCUA_Device{}; + if (device.connect(opc_ua_server_name) < 0) { + logger_->info("Program exited with status {}", EXIT_FAILURE); + return EXIT_FAILURE; + } + device.streamon(); + + std::string gesture_detector_path = "../resources/models/resnet18.onnx"; + std::string face_detector_path = + "../resources/models/haarcascade_frontalface_default.xml"; + + Controller controller = + Controller(&device, true, face_detector_path, gesture_detector_path); + logger_->info("Running"); + controller.run(50); + + logger_->info("Program exited with status {}", EXIT_SUCCESS); + return EXIT_SUCCESS; +} diff --git a/include/controller.h b/include/controller.h deleted file mode 100755 index 1040bbc..0000000 --- a/include/controller.h +++ /dev/null @@ -1,146 +0,0 @@ -/** - * @file controller.hpp - * - * @brief Device controller class and its components. - * - * @author Maks Boiar - * - */ - -#ifndef CONTROLLER_H -#define CONTROLLER_H - -#include -#include -#include -#include -#include - -#include -#include "spdlog/spdlog.h" -#include "spdlog/sinks/stdout_color_sinks.h" - -#include "face_detection.h" -#include "gesture_detection.h" -#include "device.h" - -using std::string; -using std::vector; -using std::chrono::milliseconds; -using cv::Mat; -using std::atomic; -using TimePoint = std::chrono::time_point; -using AsyncLogger = std::shared_ptr; - -using interval_ms_t = unsigned long; -using class_id_t = unsigned long; - -class Buffer { - // TODO refactor - vector buffer_; - unsigned int max_count_; - size_t size_; - class_id_t default_class_id_; -public: - /** - * A constructor. - * @param max_count maximum count for a class_id before buffer is flushed - * @param size_ number of classes - * @param default_class_id to be returned when queried buffer is not full - */ - Buffer(unsigned int max_count, size_t size_, class_id_t default_class_id = 0) : max_count_(max_count), size_(size_), default_class_id_(default_class_id) { - buffer_ = vector(size_); - } - void add(class_id_t class_id); - class_id_t get(); - [[nodiscard]] size_t size() const { return buffer_.size(); } - unsigned int operator[](class_id_t class_id) const { return buffer_[class_id]; } -}; - -/** - * Device controller based on gesture recognition. - */ -class Controller { - constexpr static size_t buffer_len_ = 5; - constexpr static int speed_increment_[3] = { 10, 10, 10 }; - constexpr static milliseconds WAIT_RC_CONTROL_ = milliseconds(500); - constexpr static milliseconds WAIT_BATTERY_ = milliseconds(4000); - constexpr static milliseconds FACE_TIMEOUT_ = milliseconds(1000); - constexpr static milliseconds GESTURE_TIMEOUT_ = milliseconds(1000); - - Device *device_; - bool dry_run_; - FaceDetector face_detector_; - GestureDetector gesture_detector_; - Buffer buffer_; - atomic battery_stat_ = -1; - TimePoint last_gesture_ = TimePoint(); - TimePoint last_face_ = TimePoint(); - bool stop_device_ = false; - bool is_busy_ = false; - velocity_vector_ms_t velocity_ = { 0,0,0,0 }; - static const string cv_window_name_; - AsyncLogger logger_; - string name_; - - /** - * put additional information on the video frame. - * @param frame - * @param fps frames per second speed - */ - void put_info_on_frame_(Mat * frame, double fps/*, bool verbose = true*/); - - /** - * continuously query device's battery status and save value in `battery_stat` attribute. - */ - void update_battery_stat_(); - - /** - * Send a command from the buffer to the connected device. - */ - void send_command(); - -public: - /** - * A constructor. - * @param device pointer to a `Device` instance to be controlled. - * @param dry_run if true, commands are not being sent to the actual device. - * @param name controller instance name - */ - Controller( - Device* device, bool dry_run, const string& face_detector_path, const string& gesture_detector_path, - const string& name="CONTROLLER" - ) : - device_(device), - dry_run_(dry_run), - face_detector_(face_detector_path), - gesture_detector_(gesture_detector_path), - buffer_(buffer_len_, GestureCount), - name_(name) { - logger_ = spdlog::get(name_); - if (!logger_) { - logger_ = spdlog::stdout_color_mt(name_); - } - logger_->set_level(spdlog::level::info); - }; - - /** - * run inference on input stream and control device. - * @param frame_refresh_rate indicates how often to refresh application window (in ms). - */ - void run(interval_ms_t frame_refresh_rate = 25); - - /** - * Detect gesture in an input image. - * @param image a matrix containing an image where gesture will be detected. - */ - void detect(image_t* image); - - /** - * Stop control and try to land the device. - */ - void stop(); - -}; - -#endif \ No newline at end of file diff --git a/include/device.h b/include/device.h deleted file mode 100755 index 9e0783c..0000000 --- a/include/device.h +++ /dev/null @@ -1,82 +0,0 @@ -/** - * @file device.h - * - * @brief Generic controllable device interface. - * - * @author Maks Boiar - * - */ - -#ifndef DEVICE_H -#define DEVICE_H - -#include -#include -#include -#include - -#include -#include "spdlog/spdlog.h" -#include "spdlog/sinks/stdout_color_sinks.h" - -using std::string; -using std::vector; -using AsyncLogger = std::shared_ptr; - -using velocity_vector_ms_t = vector; - -/** - * List of command names for device control. - */ -enum Command { - NoGesture = 0, - Left, - Right, - Up, - Down, - Forward, - Back, - Stop, - Land, - GestureCount -}; - -/** - * Abstract controllable device with a camera. - */ -class Device { - AsyncLogger logger_; - bool simulate_; - static const char STREAM_URL_[]; -public: - explicit Device(bool simulate = true) : simulate_(simulate) { - string name("DEVICE"); - logger_ = spdlog::get(name); - if (!logger_) { - logger_ = spdlog::stdout_color_mt(name); - } - logger_->set_level(spdlog::level::info); - } - int get_battery(); - void send_rc_control(const velocity_vector_ms_t& velocity); - void land(); - - /** - * Capture video stream. - * - * @param camera_id id of the camera whose stream will be captured - */ - cv::VideoCapture get_video_stream(int camera_id); - - /** - * Enable video streaming. - */ - void streamon(){}; - - /** - * Connect to a device. - */ - void connect(){}; -}; - -#endif \ No newline at end of file diff --git a/include/face_detection.h b/include/face_detection.h deleted file mode 100755 index d5a746a..0000000 --- a/include/face_detection.h +++ /dev/null @@ -1,55 +0,0 @@ -/** - * @file face_detection.hpp - * - * @brief Face detection implementation. - * - * @author Maks Boiar - * - */ - -#ifndef FACE_DETECTION_H -#define FACE_DETECTION_H - -#include "detection.h" - -using std::string; -using std::vector; -using std::ostream; -using cv::dnn::Net; -using TimePoint = std::chrono::time_point; -using AsyncLogger = std::shared_ptr; - - -/** - * Face detection implementation using OpenCV Cascade Classifier. - */ -class FaceDetector { - cv::CascadeClassifier detector_; - AsyncLogger logger_; - double scale_ = 4; -public: - /** - * A constructor. - * @param detector_path path to a XML file with cascade classifier weights. - */ - FaceDetector(const string& detector_path); - - /** - * Detect a face with the maximum area in the input image and find its bounding box. - * @param image matrix containing an image where face is detected. - * @see visualize() - * @see generate_bounding_box() - * @return a `struct` containing a bounding box and confidence - */ - DetectionResult detect(const image_t& frame); - - /** - * Put detection score and box on the image. - * @param img - * @param detection - * @param color - */ - static void visualize(image_t* frame, const DetectionResult& detection, const color_t& color = cv::Scalar(0, 255, 255)); -}; - -#endif \ No newline at end of file diff --git a/include/gesture_detection.h b/include/gesture_detection.h deleted file mode 100755 index acda35e..0000000 --- a/include/gesture_detection.h +++ /dev/null @@ -1,51 +0,0 @@ -/** - * @file gesture_detection.hpp - * - * @brief Gesture detection implementation. - * - * @author Maks Boiar - * - */ - -#ifndef GESTURE_DETECTION_H -#define GESTURE_DETECTION_H - -#include "detection.h" -#include - -using AsyncLogger = std::shared_ptr; -using std::vector; -using std::string; -using landmarks_t = std::vector; - - -struct ClassifierOutput { - double score = 0; - int class_id = 0; - landmarks_t landmarks; - ClassifierOutput(double score, int class_id) : score(score), class_id(class_id) {} - ClassifierOutput() : score(0), class_id(0) {} -}; - -/** - * Gesture detection implementation based on a ResNet model. - */ -class GestureDetector { - AsyncLogger logger_; - cv::dnn::Net detector_; -public: - /** - * A constructor. - * @param detector_path path to a ONNX model. - */ - GestureDetector(const string& detector_path); - static void visualize( - image_t* image, const ClassifierOutput& classified_gesture, - const bounding_box_t& gesture_box, const color_t& color = cv::Scalar(0, 255, 255) - ); - ClassifierOutput detect(const cv::Mat&); - static bounding_box_t get_detection_area(const bounding_box_t& face_box, int img_width, int img_height, int w, int h); - static cv::Mat preprocess_image(const image_t& img); -}; - -#endif \ No newline at end of file diff --git a/lib/spdlogd.lib b/lib/spdlogd.lib deleted file mode 100755 index 2e2edb9..0000000 Binary files a/lib/spdlogd.lib and /dev/null differ diff --git a/src/controller.cpp b/src/controller.cpp deleted file mode 100755 index 8a7bbcd..0000000 --- a/src/controller.cpp +++ /dev/null @@ -1,193 +0,0 @@ -#include -#include -#include -#include -#include -#include "controller.h" - -using std::chrono::system_clock; -using std::chrono::milliseconds; -using std::string; -using std::vector; -using std::atomic; -using TimePoint = std::chrono::time_point; -using namespace std::chrono_literals; - - -const string Controller::cv_window_name_ = "Device camera"; - -void Controller::update_battery_stat_() { - while (true) { - battery_stat_ = device_->get_battery(); - std::this_thread::sleep_for(WAIT_BATTERY_); - } -} - -void Controller::run(interval_ms_t frame_refresh_rate) { - std::thread control_thread(&Controller::send_command, this); - control_thread.detach(); - std::thread battery_thread(&Controller::update_battery_stat_, this); - battery_thread.detach(); - - cv::VideoCapture cap = device_->get_video_stream(0); - cv::Mat frame; - cv::namedWindow(cv_window_name_); - - unsigned int frame_count = 0; - TimePoint start_time = system_clock::now(); - TimePoint end_time; - double fps = 0; - logger_->info("Starting detection"); - - while (true) { - cap >> frame; - if (frame.empty()) { - logger_->info("Skipping empty frame"); - continue; - } - - if (++frame_count >= 10) { - end_time = system_clock::now(); - fps = (double)frame_count / (double)((end_time - start_time)/1.0s); - start_time = end_time; - frame_count = 0; - } - detect(&frame); - - put_info_on_frame_(&frame, fps); - - cv::imshow(cv_window_name_, frame); - - char key = (char)cv::waitKey(frame_refresh_rate); - if (key == 27 || key == 'q' || (int)key == -29) { - // TODO clean-up - break; - } - } - // TODO catch Ctrl+C KeyboardInterrupt (?) -} - -void Controller::detect(cv::Mat* img) { - DetectionResult face_detection = face_detector_.detect(*img); - if (face_detection.score > 0) { - last_face_ = system_clock::now(); - color_t color = cv::Scalar(0, 0, 255); - - FaceDetector::visualize(img, face_detection); - bounding_box_t gesture_box = gesture_detector_.get_detection_area(face_detection.box, img->rows, img->cols, 256, 256); - cv::rectangle(*img, gesture_box, color, 2); - - cv::Mat gesture_detection_region = (*img)(gesture_box); - //cv::imshow("Gesture detection area", gesture_detection_area); - - ClassifierOutput classified_gesture = gesture_detector_.detect(gesture_detection_region); - - if (classified_gesture.score > 0) { - last_gesture_ = system_clock::now(); - stop_device_ = false; - buffer_.add(classified_gesture.class_id); - gesture_detector_.visualize(img, classified_gesture, gesture_box); - } - } -} - -void Controller::send_command() { - while (true) { - if (!stop_device_) { - if ((system_clock::now() - last_face_) > FACE_TIMEOUT_ || - (system_clock::now() - last_gesture_) > GESTURE_TIMEOUT_) { - logger_->info("No face or gesture: stopping drone"); - stop(); - } - else { - velocity_vector_ms_t velocity = { 0, 0, 0, -1 }; - auto command = static_cast(buffer_.get()); - - if (command != NoGesture) { - logger_->debug("Received command {}", static_cast(command)); - if (!is_busy_) { - switch (command) - { - case NoGesture: - break; - case Stop: - stop(); - break; - case Left: - velocity[0] = -1*speed_increment_[0]; - velocity[3] = 0; - break; - case Right: - velocity[0] = speed_increment_[0]; - velocity[3] = 0; - break; - case Up: - velocity[2] = speed_increment_[2]; - velocity[3] = 0; - break; - case Down: - velocity[2] = -1*speed_increment_[2]; - velocity[3] = 0; - break; - case Forward: - velocity[1] = speed_increment_[1]; - velocity[3] = 0; - break; - case Back: - velocity[1] = -1*speed_increment_[1]; - velocity[3] = 0; - break; - case Land: - device_->land(); - is_busy_ = true; - break; - default: - break; - } - } - - if (velocity[3] != -1 && velocity_ != velocity) { - velocity_ = velocity; - if (!dry_run_) { - device_->send_rc_control(velocity); - } - } - } - } - } - std::this_thread::sleep_for(WAIT_RC_CONTROL_); - } -} - -void Controller::stop() { - velocity_ = { 0, 0, 0, 0 }; - stop_device_ = true; - device_->send_rc_control(velocity_); -} - -void Controller::put_info_on_frame_(cv::Mat* frame, double fps/*, TODO bool verbose*/) { - string battery_text("No battery info"); - if (battery_stat_ > 0) { - battery_text = std::to_string(battery_stat_) + "%"; - } - cv::putText(*frame, battery_text, cv::Point(20, 100), 1, 2, cv::Scalar(0, 255, 255), 2); - - cv::putText(*frame, std::to_string((int)fps)+" fps", cv::Point(20, 50), 1, 2, cv::Scalar(0, 255, 255), 2); -} - - -void Buffer::add(class_id_t class_id) { - buffer_.at(class_id)++; -} - -class_id_t Buffer::get() { - auto curr_max_count_it = std::max_element(buffer_.begin(), buffer_.end()); - if (curr_max_count_it != buffer_.end() && *curr_max_count_it >= max_count_) { - class_id_t class_id = std::distance(buffer_.begin(), curr_max_count_it); - buffer_.assign(size_, 0); - return class_id; - } - else { - return default_class_id_; - } -} diff --git a/src/core/Commands.h b/src/core/Commands.h new file mode 100644 index 0000000..9de3267 --- /dev/null +++ b/src/core/Commands.h @@ -0,0 +1,16 @@ +#pragma once + +/** + * List of command names for device control. + */ +enum Command { + Idle, + Up, // dislike + Down, // fist + Left, // four + Right, // like + Forth, // ok + Back, // mute + Stop, // palm + NoGesture = 18 +}; \ No newline at end of file diff --git a/src/core/controller.cpp b/src/core/controller.cpp new file mode 100755 index 0000000..839b626 --- /dev/null +++ b/src/core/controller.cpp @@ -0,0 +1,215 @@ +#include "controller.hpp" +#include "Commands.h" + +#include +#include +#include +#include +#include + +using std::atomic; +using std::string; +using std::vector; +using std::chrono::milliseconds; +using std::chrono::system_clock; +using TimePoint = std::chrono::time_point; +using namespace std::chrono_literals; + +const string Controller::cv_window_name_ = "Device camera"; + +void Controller::update_device_stat_() { + std::vector curPos; + while (true) { + servoOk = device_->get_status(); + curPos = device_->get_position(); + modeText = device_->get_mode(); + RobotPos[0].store(curPos[0]); + RobotPos[1].store(curPos[1]); + RobotPos[2].store(curPos[2]); + + std::this_thread::sleep_for(WAIT_DEVICE_STAT_); + } +} + +void Controller::run(interval_ms_t frame_refresh_rate) { + std::thread control_thread(&Controller::send_command, this); + control_thread.detach(); + std::thread stat_thread(&Controller::update_device_stat_, this); + stat_thread.detach(); + + cv::Mat frame; + cv::namedWindow(cv_window_name_); + + unsigned int frame_count = 0; + TimePoint start_time = system_clock::now(); + TimePoint end_time; + double fps = 0; + logger_->info("Starting detection"); + + while (true) { + device_->get_frame(&frame); + if (frame.empty()) { + logger_->info("Skipping empty frame"); + continue; + } + + if (++frame_count >= 10) { + end_time = system_clock::now(); + fps = (double)frame_count / (double)((end_time - start_time) / 1.0s); + start_time = end_time; + frame_count = 0; + } + if (modeText != "AUTO") { + try { + detect(&frame); + } catch (const std::exception &e) { + std::cerr << "[DETECT] Caught exception: " << e.what() << std::endl; + } + } + try { + put_info_on_frame_(&frame, fps); + } catch (const std::exception &e) { + std::cerr << "[PUT INFO] Caught exception: " << e.what() << std::endl; + } + + try { + cv::imshow(cv_window_name_, frame); + } catch (const std::exception &e) { + std::cerr << "[IMSHOW] Caught exception: " << e.what() << std::endl; + } + + char key = (char)cv::waitKey(frame_refresh_rate); + if (key == 27 || key == 'q' || (int)key == -29) { + break; + } + } +} + +void Controller::detect(cv::Mat *img) { + DetectionResult face_detection = face_detector_.detect(*img); + if (face_detection.score > 0) { + last_face_ = system_clock::now(); + color_t color = cv::Scalar(0, 0, 255); + + FaceDetector::visualize(img, face_detection); + bounding_box_t gesture_box = gesture_detector_.get_detection_area( + face_detection.box, img->cols, img->rows, 256, 256); + cv::rectangle(*img, gesture_box, color, 2); + + cv::Mat gesture_detection_region = (*img)(gesture_box); + + ClassifierOutput classified_gesture = + gesture_detector_.detect(gesture_detection_region); + + if (classified_gesture.score > 0 && classified_gesture.class_id != 18) { + last_gesture_ = system_clock::now(); + stop_device_ = false; + buffer_.add(classified_gesture.class_id); + gesture_detector_.visualize(img, classified_gesture, gesture_box); + } + } +} + +void Controller::send_command() { + while (true) { + if (!stop_device_) { + if ((system_clock::now() - last_face_) > FACE_TIMEOUT_ || + (system_clock::now() - last_gesture_) > GESTURE_TIMEOUT_) { + logger_->info("No face or gesture: stopping"); + stop(); + } else { + velocity_vector_ms_t velocity = {0, 0, 0, -1}; + auto command = static_cast(buffer_.get()); + + if (command != NoGesture) { + logger_->info("Received command {}", static_cast(command)); + if (!is_busy_) { + switch (command) { + case Stop: + stop(); + break; + + case Up: + velocity[0] = -1 * speed_increment_[0]; + velocity[3] = 0; + break; + + case Down: + velocity[0] = speed_increment_[0]; + velocity[3] = 0; + break; + + case Left: + velocity[2] = speed_increment_[2]; + velocity[3] = 0; + break; + + case Right: + velocity[2] = -1 * speed_increment_[2]; + velocity[3] = 0; + break; + + case Forth: + velocity[1] = speed_increment_[1]; + velocity[3] = 0; + break; + + case Back: + velocity[1] = -1 * speed_increment_[1]; + velocity[3] = 0; + device_->stop(); + break; + + default: + break; + } + } + + if (velocity[3] != -1 && velocity_ != velocity) { + velocity_ = velocity; + if (!dry_run_) { + device_->send_rc_control(velocity); + } + } + } + } + } + std::this_thread::sleep_for(WAIT_RC_CONTROL_); + } +} + +void Controller::stop() { + + velocity_ = {0, 0, 0, 0}; + stop_device_ = true; + device_->send_rc_control(velocity_); +} + +void Controller::put_info_on_frame_(cv::Mat *frame, + double fps /*, TODO bool verbose*/) { + cv::putText(*frame, "ServoOK: " + std::to_string(servoOk), cv::Point(20, 100), + 1, 1, cv::Scalar(0, 0, 0), 2); + cv::putText(*frame, "Mode: " + modeText, cv::Point(20, 120), 1, 1, + cv::Scalar(0, 0, 0), 2); + cv::putText(*frame, "PosX: " + std::to_string(RobotPos[0].load()), + cv::Point(20, 200), 1, 1, cv::Scalar(0, 0, 0), 2); + cv::putText(*frame, "PosY: " + std::to_string(RobotPos[1].load()), + cv::Point(20, 220), 1, 1, cv::Scalar(0, 0, 0), 2); + cv::putText(*frame, "PosZ: " + std::to_string(RobotPos[2].load()), + cv::Point(20, 240), 1, 1, cv::Scalar(0, 0, 0), 2); + cv::putText(*frame, std::to_string((int)fps) + " fps", cv::Point(20, 80), 1, + 2, cv::Scalar(0, 0, 0), 2); +} + +void Buffer::add(class_id_t class_id) { buffer_.at(class_id)++; } + +class_id_t Buffer::get() { + auto curr_max_count_it = std::max_element(buffer_.begin(), buffer_.end()); + if (curr_max_count_it != buffer_.end() && *curr_max_count_it >= max_count_) { + class_id_t class_id = std::distance(buffer_.begin(), curr_max_count_it); + buffer_.assign(size_, 0); + return class_id; + } else { + return default_class_id_; + } +} diff --git a/src/core/controller.hpp b/src/core/controller.hpp new file mode 100755 index 0000000..6674d5d --- /dev/null +++ b/src/core/controller.hpp @@ -0,0 +1,151 @@ +/** + * @file controller.hpp + * + * @brief Device controller class and its components. + * + * @author Maks Boiar + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "spdlog/sinks/stdout_color_sinks.h" +#include "spdlog/spdlog.h" +#include + +#include "device.hpp" +#include "face_detection.hpp" +#include "gesture_detection.hpp" + +using cv::Mat; +using std::atomic; +using std::string; +using std::vector; +using std::chrono::milliseconds; +using TimePoint = std::chrono::time_point; +using AsyncLogger = std::shared_ptr; + +using interval_ms_t = unsigned long; +using class_id_t = unsigned long; + +class Buffer { + // TODO refactor + vector buffer_; + unsigned int max_count_; + size_t size_; + class_id_t default_class_id_; + +public: + /** + * A constructor. + * @param max_count maximum count for a class_id before buffer is flushed + * @param size_ number of classes + * @param default_class_id to be returned when queried buffer is not full + */ + Buffer(unsigned int max_count, size_t size_, class_id_t default_class_id = 0) + : max_count_(max_count), size_(size_), + default_class_id_(default_class_id) { + buffer_ = vector(size_); + } + void add(class_id_t class_id); + class_id_t get(); + [[nodiscard]] size_t size() const { return buffer_.size(); } + unsigned int operator[](class_id_t class_id) const { + return buffer_[class_id]; + } +}; + +/** + * Device controller based on gesture recognition. + */ +class Controller { + constexpr static size_t buffer_len_ = 2; + constexpr static int speed_increment_[3] = {10, 10, 10}; + constexpr static milliseconds WAIT_RC_CONTROL_ = milliseconds(500); + constexpr static milliseconds WAIT_DEVICE_STAT_ = milliseconds(100); + constexpr static milliseconds FACE_TIMEOUT_ = milliseconds(1000); + constexpr static milliseconds GESTURE_TIMEOUT_ = milliseconds(1000); + + std::unique_ptr device_; + bool dry_run_; + FaceDetector face_detector_; + GestureDetector gesture_detector_; + Buffer buffer_; + atomic servoOk = false; + string modeText = ""; + atomic RobotPos[3] = {0, 0, 0}; + TimePoint last_gesture_ = TimePoint(); + TimePoint last_face_ = TimePoint(); + bool stop_device_ = false; + bool is_busy_ = false; + velocity_vector_ms_t velocity_ = {0, 0, 0, 0}; + static const string cv_window_name_; + AsyncLogger logger_; + string name_; + + /** + * put additional information on the video frame. + * @param frame + * @param fps frames per second speed + */ + void put_info_on_frame_(Mat *frame, double fps /*, bool verbose = true*/); + + /** + * continuously query device's battery status and save value in `battery_stat` + * attribute. + */ + void update_device_stat_(); + + /** + * Send a command from the buffer to the connected device. + */ + void send_command(); + +public: + /** + * A constructor. + * @param device pointer to a `Device` instance to be controlled. + * @param dry_run if true, commands are not being sent to the actual device. + * @param name controller instance name + */ + Controller(std::unique_ptr device, bool dry_run, + const string &face_detector_path, + const string &gesture_detector_path, + const string &name = "CONTROLLER") + : device_(std::move(device)), dry_run_(dry_run), + face_detector_(face_detector_path), + gesture_detector_(gesture_detector_path), buffer_(buffer_len_, 19), + name_(name) { + logger_ = spdlog::get(name_); + if (!logger_) { + logger_ = spdlog::stdout_color_mt(name_); + } + logger_->set_level(spdlog::level::info); + }; + + /** + * run inference on input stream and control device. + * @param frame_refresh_rate indicates how often to refresh application window + * (in ms). + */ + void run(interval_ms_t frame_refresh_rate = 25); + + /** + * Detect gesture in an input image. + * @param image a matrix containing an image where gesture will be detected. + */ + void detect(image_t *image); + + /** + * Stop control and try to land the device. + */ + void stop(); +}; diff --git a/src/core/detection.cpp b/src/core/detection.cpp new file mode 100644 index 0000000..6f121fc --- /dev/null +++ b/src/core/detection.cpp @@ -0,0 +1,58 @@ + +#include "detection.hpp" + +void resize_and_pad(const image_t &src, image_t &dst, cv::Size new_shape, + int pad_color) { + int src_width = src.cols; + int src_height = src.rows; + int new_width = new_shape.width; + int new_height = new_shape.height; + int interpolation_method; + int pad_top, pad_bottom, pad_left, pad_right; + double aspect_ratio = double(src_width) / src_height; + double new_aspect_ratio = double(new_width) / new_height; + if (src_height > new_height || src_width > new_width) { + interpolation_method = cv::INTER_AREA; + } else { + interpolation_method = cv::INTER_CUBIC; + } + if ((new_aspect_ratio >= aspect_ratio) || + ((new_aspect_ratio == 1) && (aspect_ratio <= 1))) { + // new_height = new_height; + new_width = int(new_height * aspect_ratio); + pad_left = int(double(new_shape.width - new_width) / 2); + pad_right = int(double(new_shape.width - new_width) / 2); + pad_top = 0; + pad_bottom = 0; + } else { + new_height = int(new_width / aspect_ratio); + pad_top = int(double(new_shape.height - new_height) / 2); + pad_bottom = int(double(new_shape.height - new_width) / 2); + pad_left = 0; + pad_right = 0; + } + + cv::resize(src, dst, cv::Size(new_width, new_height), 0, 0, + interpolation_method); + + color_t color = cv::Scalar(pad_color, pad_color, pad_color); + cv::copyMakeBorder(dst, dst, pad_top, pad_bottom, pad_left, pad_right, + cv::BORDER_CONSTANT | CV_HAL_BORDER_ISOLATED, color); +} + +void softmax(cv::InputArray inblob, cv::OutputArray outblob) { + const cv::Mat input = inblob.getMat(); + outblob.create(inblob.size(), inblob.type()); + + cv::Mat exp; + const float max = *std::max_element(input.begin(), input.end()); + cv::exp((input - max), exp); + outblob.getMat() = exp / cv::sum(exp)[0]; +} + +void rescale_box(const bounding_box_t &src, bounding_box_t &dst, double scale) { + dst.x = (int)((double)src.x * scale); + dst.y = (int)((double)src.y * scale); + dst.width = (int)((double)src.width * scale); + dst.height = (int)((double)src.height * scale); +} \ No newline at end of file diff --git a/include/detection.h b/src/core/detection.hpp similarity index 57% rename from include/detection.h rename to src/core/detection.hpp index 5490bc3..45f8e85 100755 --- a/include/detection.h +++ b/src/core/detection.hpp @@ -1,13 +1,12 @@ -#ifndef DETECTION_H -#define DETECTION_H +#pragma once #include -#include -#include +#include +#include -#include -#include "spdlog/spdlog.h" #include "spdlog/sinks/stdout_color_sinks.h" +#include "spdlog/spdlog.h" +#include using bounding_box_t = cv::Rect; using score_t = double; @@ -18,13 +17,13 @@ using color_t = cv::Scalar; * Contains bounding box and confidence score for a detection result. */ struct DetectionResult { - bounding_box_t box = cv::Rect(); - score_t score = 0; - DetectionResult(bounding_box_t box, score_t score) : box(box), score(score) {}; - DetectionResult(): box(), score(0) {} + bounding_box_t box = cv::Rect(); + score_t score = 0; + DetectionResult(bounding_box_t box, score_t score) : box(box), score(score){}; + DetectionResult() : box(), score(0) {} }; -void rescale_box(const bounding_box_t& src, bounding_box_t& dst, double scale); +void rescale_box(const bounding_box_t &src, bounding_box_t &dst, double scale); /** * Evaluate softmax function on input array. @@ -34,14 +33,14 @@ void rescale_box(const bounding_box_t& src, bounding_box_t& dst, double scale); void softmax(cv::InputArray inblob, cv::OutputArray outblob); /** - * Resize an OpenCV image up or down keeping the aspect ratio of the original image constant and padding with specified - * color if necessary. See https://stackoverflow.com/a/72955620. + * Resize an OpenCV image up or down keeping the aspect ratio of the original + * image constant and padding with specified color if necessary. See + * https://stackoverflow.com/a/72955620. * * @param src input image * @param dst output image * @param new_shape shape of the image after resizing * @param pad_color color of the padding region -*/ -void resize_and_pad(const image_t& src, image_t& dst, cv::Size new_shape, int pad_color=0); - -#endif // !DETECTION_H + */ +void resize_and_pad(const image_t &src, image_t &dst, cv::Size new_shape, + int pad_color = 0); diff --git a/src/core/device.cpp b/src/core/device.cpp new file mode 100755 index 0000000..4412e22 --- /dev/null +++ b/src/core/device.cpp @@ -0,0 +1,27 @@ +#include "device.hpp" + +using std::string; +using std::vector; + +cv::VideoCapture GenericDevice::get_video_stream(int camera_id) { + + if (simulate_) { + logger_->info("Opening stream {}", camera_id); + // while (true) { + // if (cap.open(camera_id++)) { + // logger_->info("Opened {}", camera_id); + // } + // } + cap = cv::VideoCapture(camera_id, cv::CAP_MSMF); + logger_->info(cap.getBackendName()); + } else { + logger_->info("Opening stream... {}", STREAM_URL_); + cap = cv::VideoCapture(STREAM_URL_, cv::CAP_FFMPEG); + } + if (!cap.isOpened()) { + logger_->error("Unable to get video stream"); + // TODO handle error + } + logger_->info("Done"); + return cap; +} \ No newline at end of file diff --git a/src/core/device.hpp b/src/core/device.hpp new file mode 100755 index 0000000..48d5be3 --- /dev/null +++ b/src/core/device.hpp @@ -0,0 +1,71 @@ +/** + * @file device.h + * + * @brief Generic controllable device interface. + * + * @author Maks Boiar + * + */ + +#pragma once + +#include +#include +#include +#include + +#include "spdlog/sinks/stdout_color_sinks.h" +#include "spdlog/spdlog.h" +#include + +using std::string; +using std::vector; +using AsyncLogger = std::shared_ptr; + +using velocity_vector_ms_t = vector; + +/** + * Abstract controllable device with a camera. + */ +class GenericDevice { +public: + GenericDevice(bool simulate = true) : simulate_(simulate) { + string name("DEVICE"); + logger_ = spdlog::get(name); + if (!logger_) { + logger_ = spdlog::stdout_color_mt(name); + } + } + + void send_rc_control(const velocity_vector_ms_t &velocity); + + virtual void stop(); + + // /** + // * Capture video stream. + // * + // * @param camera_id id of the camera whose stream will be captured + // */ + cv::VideoCapture get_video_stream(int camera_id); + + // /** + // * Connect to a device. + // */ + virtual int connect(const std::string &server_name); + + void get_frame(cv::Mat *frame) { cap >> *frame; } + + virtual bool get_status() const; + + virtual std::string get_mode() const; + + virtual std::vector get_position(); + + ~GenericDevice() = default; + +protected: + AsyncLogger logger_; + bool simulate_; + cv::VideoCapture cap; + std::string STREAM_URL_; +}; diff --git a/src/core/face_detection.cpp b/src/core/face_detection.cpp new file mode 100755 index 0000000..7acce3e --- /dev/null +++ b/src/core/face_detection.cpp @@ -0,0 +1,67 @@ +#include "face_detection.hpp" + +#include +#include + +using cv::dnn::Net; +using std::chrono::system_clock; +using TimePoint = std::chrono::time_point; + +FaceDetector::FaceDetector(const std::string &detector_path) { + string name("DETECTION"); + logger_ = spdlog::get(name); + if (!logger_) { + logger_ = spdlog::stdout_color_mt(name); + } + + detector_ = cv::CascadeClassifier(); + string detector_name = cv::samples::findFileOrKeep(detector_path); + logger_->info("Load face cascade classifier"); + if (!detector_.load(detector_name)) { + logger_->error("Error loading classifier"); + // TODO handle error + } +} + +DetectionResult FaceDetector::detect(const image_t &image) { + auto start = std::chrono::high_resolution_clock::now(); + + vector faces; + image_t gray; + cv::cvtColor(image, gray, cv::COLOR_RGBA2GRAY, 0); + + cv::resize(gray, gray, cv::Size(), 1 / scale_, 1 / scale_, cv::INTER_LINEAR); + cv::equalizeHist(gray, gray); + detector_.detectMultiScale(gray, faces, 1.1, 3, 0); + + if (faces.empty()) { + return {}; + } + bounding_box_t max_face = *std::max_element( + faces.begin(), faces.end(), [](cv::Rect face1, cv::Rect face2) { + return face1.area() < face2.area(); + }); + + // TODO get detection score + score_t score = 0.99; + DetectionResult detection = DetectionResult(max_face, score); + rescale_box(detection.box, detection.box, scale_); + + auto stop = std::chrono::high_resolution_clock::now(); + logger_->debug( + "Detected face: {:03.1f}% at ({} {} {} {}) duration: {}", score * 100, + max_face.x, max_face.y, max_face.x + max_face.width, + max_face.y + max_face.height, + std::chrono::duration_cast(stop - start) + .count()); + + return detection; +} + +void FaceDetector::visualize(image_t *img, const DetectionResult &detection, + const color_t &color) { + cv::rectangle(*img, detection.box, color, 1); + cv::putText(*img, std::to_string((int)(detection.score * 100)) + " %", + cv::Point(detection.box.x, detection.box.y - 20), 0, 0.5, color, + 1); +} diff --git a/src/core/face_detection.hpp b/src/core/face_detection.hpp new file mode 100755 index 0000000..7ab511a --- /dev/null +++ b/src/core/face_detection.hpp @@ -0,0 +1,54 @@ +/** + * @file face_detection.hpp + * + * @brief Face detection implementation. + * + * @author Maks Boiar + * + */ + +#pragma once + +#include "detection.hpp" + +using cv::dnn::Net; +using std::ostream; +using std::string; +using std::vector; +using TimePoint = std::chrono::time_point; +using AsyncLogger = std::shared_ptr; + +/** + * Face detection implementation using OpenCV Cascade Classifier. + */ +class FaceDetector { + cv::CascadeClassifier detector_; + AsyncLogger logger_; + double scale_ = 4; + +public: + /** + * A constructor. + * @param detector_path path to a XML file with cascade classifier weights. + */ + FaceDetector(const string &detector_path); + + /** + * Detect a face with the maximum area in the input image and find its + * bounding box. + * @param image matrix containing an image where face is detected. + * @see visualize() + * @see generate_bounding_box() + * @return a `struct` containing a bounding box and confidence + */ + DetectionResult detect(const image_t &frame); + + /** + * Put detection score and box on the image. + * @param img + * @param detection + * @param color + */ + static void visualize(image_t *frame, const DetectionResult &detection, + const color_t &color = cv::Scalar(0, 0, 0)); +}; diff --git a/src/core/gesture_detection.cpp b/src/core/gesture_detection.cpp new file mode 100755 index 0000000..a607049 --- /dev/null +++ b/src/core/gesture_detection.cpp @@ -0,0 +1,105 @@ +#include "gesture_detection.hpp" +#include + +using cv::dnn::Net; +using std::string; + +std::map gesture_map = { + {1, "JogXUp"}, {2, "JogXDown"}, {3, "JogYUp"}, {4, "JogYDown"}, + {5, "JogZUp"}, {6, "JogZDown"}, {7, "Stop"}, {8, "ToolOn"}, +}; + +GestureDetector::GestureDetector(const string &detector_path) { + string name("DETECTION"); + logger_ = spdlog::get(name); + if (!logger_) { + logger_ = spdlog::stdout_color_mt(name); + } + + detector_ = cv::dnn::readNet(detector_path); +} + +ClassifierOutput GestureDetector::detect(const image_t &img) { + auto start = std::chrono::high_resolution_clock::now(); + + cv::Mat blob = preprocess_image(img); + detector_.setInput(blob); + std::vector outNames = detector_.getUnconnectedOutLayersNames(); + std::vector outs; + detector_.forward(outs, outNames); + for (auto i : outs) { + softmax(i, i); + } + cv::Point classIdPoint_leading_hand; + score_t confidence_leading_hand; + cv::Point classIdPoint_gesture; + score_t confidence_gesture; + + auto stop = std::chrono::high_resolution_clock::now(); + cv::minMaxLoc(outs.at(1).reshape(1, 1), nullptr, &confidence_gesture, nullptr, + &classIdPoint_gesture); + logger_->debug( + "Gesture class: {} conf: {:.2f} duration {}", classIdPoint_gesture.x, + confidence_gesture, + std::chrono::duration_cast(stop - start) + .count()); + cv::minMaxLoc(outs.at(0).reshape(1, 1), nullptr, &confidence_leading_hand, + nullptr, &classIdPoint_leading_hand); + logger_->debug("Leading hand: {} conf: {:.2f}", classIdPoint_leading_hand.x, + confidence_leading_hand); + + ClassifierOutput classified_gesture = + ClassifierOutput(confidence_gesture, classIdPoint_gesture.x); + return classified_gesture; +} + +cv::Mat GestureDetector::preprocess_image(const image_t &img) { + // rescale frame to [0, 1] than resize and pad to [224, 224, 3] + double scale = 1.0 / 255.0; + int inpWidth = 224; + int inpHeight = 224; + image_t resized_img = img; + resize_and_pad(resized_img, resized_img, cv::Size(inpWidth, inpHeight)); + return cv::dnn::blobFromImage(resized_img, scale, + cv::Size(inpWidth, inpHeight), 0, true, false); +} + +void GestureDetector::visualize(image_t *img, + const ClassifierOutput &classified_gesture, + const bounding_box_t &gesture_box, + const color_t &color) { + // draw gesture name + cv::putText(*img, gesture_map[classified_gesture.class_id], + cv::Point(gesture_box.x + 80, gesture_box.y - 20), 0, 0.5, color, + 1); + // draw landmarks + if (!classified_gesture.landmarks.empty()) { + // TODO draw gesture landmarks + } + // draw score + cv::putText(*img, + std::to_string((int)(classified_gesture.score * 100)) + " %", + cv::Point(gesture_box.x, gesture_box.y - 20), 0, 0.5, color, 1); +} + +/** + * Given a face detection box, define an area of the image where gestures will + * be detected.. + * @param box area of the image where face has been detected in (x1, y1, x2, y2) + * format + * @param img_width image width + * @param img_height image height + * @param w new detection area width + * @param h new detection area height + * @return bounding box of an area where gestures will be detected + */ +bounding_box_t +GestureDetector::get_detection_area(const bounding_box_t &face_box, + int img_width, int img_height, int w, + int h) { + int x1 = face_box.x + face_box.width; + int y1 = face_box.y; + int x2 = std::min(x1 + w, img_width); + int y2 = std::min(y1 + h, img_height); + return {cv::Point(x1, y1), cv::Point(x2, y2)}; +} diff --git a/src/core/gesture_detection.hpp b/src/core/gesture_detection.hpp new file mode 100755 index 0000000..632c990 --- /dev/null +++ b/src/core/gesture_detection.hpp @@ -0,0 +1,51 @@ +/** + * @file gesture_detection.hpp + * + * @brief Gesture detection implementation. + * + * @author Maks Boiar + * + */ + +#pragma once + +#include "detection.hpp" +#include + +using AsyncLogger = std::shared_ptr; +using std::string; +using std::vector; +using landmarks_t = std::vector; + +struct ClassifierOutput { + double score = 0; + int class_id = 0; + landmarks_t landmarks; + ClassifierOutput(double score, int class_id) + : score(score), class_id(class_id) {} + ClassifierOutput() : score(0), class_id(0) {} +}; + +/** + * Gesture detection implementation based on a ResNet model. + */ +class GestureDetector { + AsyncLogger logger_; + cv::dnn::Net detector_; + +public: + /** + * A constructor. + * @param detector_path path to a ONNX model. + */ + GestureDetector(const string &detector_path); + static void visualize(image_t *image, + const ClassifierOutput &classified_gesture, + const bounding_box_t &gesture_box, + const color_t &color = cv::Scalar(0, 0, 0)); + ClassifierOutput detect(const cv::Mat &); + static bounding_box_t get_detection_area(const bounding_box_t &face_box, + int img_width, int img_height, int w, + int h); + static cv::Mat preprocess_image(const image_t &img); +}; diff --git a/include/argparse.hpp b/src/dep/argparse.hpp similarity index 100% rename from include/argparse.hpp rename to src/dep/argparse.hpp diff --git a/src/detection.cpp b/src/detection.cpp deleted file mode 100644 index 7796daf..0000000 --- a/src/detection.cpp +++ /dev/null @@ -1,57 +0,0 @@ - -#include "detection.h" - - -void resize_and_pad(const image_t& src, image_t& dst, cv::Size new_shape, int pad_color) { - int src_width = src.cols; - int src_height = src.rows; - int new_width = new_shape.width; - int new_height = new_shape.height; - int interpolation_method; - int pad_top, pad_bottom, pad_left, pad_right; - double aspect_ratio = double(src_width) / src_height; - double new_aspect_ratio = double(new_width) / new_height; - if (src_height > new_height || src_width > new_width){ - interpolation_method = cv::INTER_AREA; - } else { - interpolation_method = cv::INTER_CUBIC; - } - if ( (new_aspect_ratio >= aspect_ratio) || ((new_aspect_ratio == 1) && (aspect_ratio <= 1)) ){ - // new_height = new_height; - new_width = int(new_height * aspect_ratio); - pad_left = int(double(new_shape.width - new_width) / 2); - pad_right = int(double(new_shape.width - new_width) / 2); - pad_top = 0; - pad_bottom = 0; - } - else { - new_height = int(new_width / aspect_ratio); - pad_top = int(double(new_shape.height - new_height) / 2); - pad_bottom = int(double(new_shape.height - new_width) / 2); - pad_left = 0; - pad_right = 0; - } - - cv::resize(src, dst, cv::Size(new_width, new_height), 0, 0, interpolation_method); - - color_t color = cv::Scalar(pad_color, pad_color, pad_color); - cv::copyMakeBorder(dst, dst, pad_top, pad_bottom, pad_left, pad_right, cv::BORDER_CONSTANT | CV_HAL_BORDER_ISOLATED, color); -} - -void softmax(cv::InputArray inblob, cv::OutputArray outblob) -{ - const cv::Mat input = inblob.getMat(); - outblob.create(inblob.size(), inblob.type()); - - cv::Mat exp; - const float max = *std::max_element(input.begin(), input.end()); - cv::exp((input - max), exp); - outblob.getMat() = exp / cv::sum(exp)[0]; -} - -void rescale_box(const bounding_box_t& src, bounding_box_t& dst, double scale){ - dst.x = (int)((double)src.x * scale); - dst.y = (int)((double)src.y * scale); - dst.width = (int)((double)src.width * scale); - dst.height = (int)((double)src.height * scale); -} \ No newline at end of file diff --git a/src/device.cpp b/src/device.cpp deleted file mode 100755 index 5cccd42..0000000 --- a/src/device.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include "device.h" - -using std::vector; -using std::string; - -const char Device::STREAM_URL_[] = "udp://0.0.0.0:11111"; - -void Device::send_rc_control(const velocity_vector_ms_t& vel) { - logger_->info("rc {} {} {} {}", vel.at(0), vel.at(1), vel.at(2), vel.at(3)); - // TODO -} - -void Device::land() { - logger_->info("land"); - // TODO -} - -int Device::get_battery() { - logger_->info("Battery: {}%", 100); - return 100; // TODO -} - -cv::VideoCapture Device::get_video_stream(int camera_id) { - cv::VideoCapture cap; - if (simulate_) { - cap = cv::VideoCapture(camera_id); - } - else { - cap = cv::VideoCapture(STREAM_URL_, cv::CAP_FFMPEG); - } - if (!cap.isOpened()) { - logger_->error("Unable to get video stream"); - // TODO handle error - } - return cap; -} \ No newline at end of file diff --git a/src/face_detection.cpp b/src/face_detection.cpp deleted file mode 100755 index 1a2e87d..0000000 --- a/src/face_detection.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include "face_detection.h" - -#include -#include - -using cv::dnn::Net; -using std::chrono::system_clock; -using TimePoint = std::chrono::time_point; - - -FaceDetector::FaceDetector(const std::string& detector_path) { - string name("DETECTION"); - logger_ = spdlog::get(name); - if (!logger_) { - logger_ = spdlog::stdout_color_mt(name); - } - logger_->set_level(spdlog::level::info); - - detector_ = cv::CascadeClassifier(); - string detector_name = cv::samples::findFileOrKeep(detector_path); - logger_->info("Load face cascade classifier"); - if (!detector_.load(detector_name)) { - logger_->error("Error loading classifier"); - // TODO handle error - } -} - - -DetectionResult FaceDetector::detect(const image_t &image) { - vector faces; - image_t gray; - cv::cvtColor(image, gray, cv::COLOR_RGBA2GRAY, 0); - - cv::resize(gray, gray, cv::Size(), 1 / scale_, 1 / scale_, cv::INTER_LINEAR); - cv::equalizeHist(gray, gray); - detector_.detectMultiScale(gray, faces, 1.1, 3, 0); - - if (faces.empty()){ - return {}; - } - bounding_box_t max_face = *std::max_element(faces.begin(), faces.end(), [](cv::Rect face1, cv::Rect face2){return face1.area() < face2.area();}); - - // TODO get detection score - score_t score = 0.99; - DetectionResult detection = DetectionResult(max_face, score); - rescale_box(detection.box, detection.box, scale_); - - logger_->debug("Detected face: {:03.1f}% at ({} {} {} {})", score*100, max_face.x, max_face.y, max_face.x+max_face.width, max_face.y+max_face.height); - - return detection; -} - - -void FaceDetector::visualize(image_t* img, const DetectionResult& detection, const color_t& color) { - cv::rectangle(*img, detection.box, color, 1); - cv::putText(*img, std::to_string((int)(detection.score*100))+" %", cv::Point(detection.box.x, detection.box.y - 20), 0, 0.5, color, 1); -} diff --git a/src/gesture_detection.cpp b/src/gesture_detection.cpp deleted file mode 100755 index 60c9c92..0000000 --- a/src/gesture_detection.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include "gesture_detection.h" -#include - -using cv::dnn::Net; -using std::string; - -std::map gesture_map = { - { 1, "Left"}, - { 2, "Right"}, - { 3, "Up"}, - { 4, "Down"}, - { 5, "Forward"}, - { 6, "Back"}, - { 7, "Stop"}, - { 8, "Land"}, -}; - -GestureDetector::GestureDetector(const string &detector_path) { - string name("DETECTION"); - logger_ = spdlog::get(name); - if (!logger_) { - logger_ = spdlog::stdout_color_mt(name); - } - logger_->set_level(spdlog::level::debug); - - detector_ = cv::dnn::readNet(detector_path); -} - -ClassifierOutput GestureDetector::detect(const image_t& img) { - cv::Mat blob = preprocess_image(img); - detector_.setInput(blob); - std::vector outNames = detector_.getUnconnectedOutLayersNames(); - std::vector outs; - detector_.forward(outs, outNames); - for (auto i : outs) { - softmax(i, i); - } - cv::Point classIdPoint_leading_hand; - score_t confidence_leading_hand; - cv::Point classIdPoint_gesture; - score_t confidence_gesture; - cv::minMaxLoc(outs.at(1).reshape(1, 1), nullptr, &confidence_gesture, nullptr, &classIdPoint_gesture); - logger_->info("Gesture class: {} conf: {:.2f}", classIdPoint_gesture.x, confidence_gesture); - cv::minMaxLoc(outs.at(0).reshape(1, 1), nullptr, &confidence_leading_hand, nullptr, &classIdPoint_leading_hand); - logger_->info("Leading hand: {} conf: {:.2f}", classIdPoint_leading_hand.x, confidence_leading_hand); - - ClassifierOutput classified_gesture = ClassifierOutput(confidence_gesture, classIdPoint_gesture.x); - return classified_gesture; -} - -cv::Mat GestureDetector::preprocess_image(const image_t& img) { - // rescale frame to [0, 1] than resize and pad to [224, 224, 3] - double scale = 1.0 / 255.0; - int inpWidth = 224; - int inpHeight = 224; - image_t resized_img = img; - resize_and_pad(resized_img, resized_img, cv::Size(inpWidth, inpHeight)); - return cv::dnn::blobFromImage(resized_img, scale, cv::Size(inpWidth, inpHeight), 0, true, false); -} - -void GestureDetector::visualize(image_t* img, const ClassifierOutput& classified_gesture, const bounding_box_t& gesture_box, const color_t& color) { - // draw gesture name - cv::putText(*img, gesture_map[classified_gesture.class_id], cv::Point(gesture_box.x + 80, gesture_box.y - 20), 0, 0.5, color, 1); - // draw landmarks - if (!classified_gesture.landmarks.empty()) { - // TODO draw gesture landmarks - } - // draw score - cv::putText(*img, std::to_string((int)(classified_gesture.score * 100)) + " %", cv::Point(gesture_box.x, gesture_box.y - 20), 0, 0.5, color, 1); -} - -/** - * Given a face detection box, define an area of the image where gestures will be detected.. - * @param box area of the image where face has been detected in (x1, y1, x2, y2) format - * @param img_width image width - * @param img_height image height - * @param w new detection area width - * @param h new detection area height - * @return bounding box of an area where gestures will be detected - */ -bounding_box_t GestureDetector::get_detection_area(const bounding_box_t& face_box, int img_width, int img_height, int w, int h) { - int x1 = face_box.x + face_box.width; - int y1 = face_box.y; - int x2 = std::min(x1 + w, img_width); - int y2 = std::min(y1 + h, img_height); - return {cv::Point(x1, y1), cv::Point(x2, y2)}; -} diff --git a/src/interfaces/opc_ua/dev_OPC_UA.cpp b/src/interfaces/opc_ua/dev_OPC_UA.cpp new file mode 100755 index 0000000..50ec783 --- /dev/null +++ b/src/interfaces/opc_ua/dev_OPC_UA.cpp @@ -0,0 +1,124 @@ +#include "dev_OPC_UA.hpp" + +using std::string; +using std::vector; + +void OPCUA_Device::send_rc_control(const velocity_vector_ms_t &vel) { + logger_->info("rc {} {} {} {}", vel.at(0), vel.at(1), vel.at(2), vel.at(3)); + + if (vel.at(0) < 0) { + set_opc_bool("::Manual:JogYUp", true); + set_opc_bool("::Manual:JogYDown", false); + + } else if (vel.at(0) > 0) { + set_opc_bool("::Manual:JogYUp", false); + set_opc_bool("::Manual:JogYDown", true); + } else { + set_opc_bool("::Manual:JogYUp", false); + set_opc_bool("::Manual:JogYDown", false); + } + + if (vel.at(1) < 0) { + set_opc_bool("::Manual:JogXUp", true); + set_opc_bool("::Manual:JogXDown", false); + + } else if (vel.at(1) > 0) { + set_opc_bool("::Manual:JogXUp", false); + set_opc_bool("::Manual:JogXDown", true); + } else { + set_opc_bool("::Manual:JogYUp", false); + set_opc_bool("::Manual:JogYDown", false); + } + if (vel.at(2) < 0) { + set_opc_bool("::Manual:JogZUp", true); + set_opc_bool("::Manual:JogZDown", false); + + } else if (vel.at(2) > 0) { + set_opc_bool("::Manual:JogZUp", false); + set_opc_bool("::Manual:JogZDown", true); + } else { + set_opc_bool("::Manual:JogZUp", false); + set_opc_bool("::Manual:JogZDown", false); + } +} + +void OPCUA_Device::stop() { + set_opc_value("::Manual:JogYUp", false, &UA_TYPES[UA_TYPES_BOOLEAN]); + set_opc_value("::Manual:JogYDown", false, &UA_TYPES[UA_TYPES_BOOLEAN]); + set_opc_value("::Manual:JogXUp", false, &UA_TYPES[UA_TYPES_BOOLEAN]); + set_opc_value("::Manual:JogXDown", false, &UA_TYPES[UA_TYPES_BOOLEAN]); + set_opc_value("::Manual:JogZUp", false, &UA_TYPES[UA_TYPES_BOOLEAN]); + set_opc_value("::Manual:JogZDown", false, &UA_TYPES[UA_TYPES_BOOLEAN]); +} + +std::string OPCUA_Device::get_mode() const { + UA_String uastr = get_opc_value("::AsGlobalPV:gModeText"); + char *modeTextVal = (char *)malloc(uastr.length + 1); + memcpy(modeTextVal, uastr.data, uastr.length); + modeTextVal[uastr.length] = '\0'; + std::string modeText = modeTextVal; + free(modeTextVal); + return std::string(modeText); +} + +std::vector OPCUA_Device::get_position() const { + std::vector RobotPos(3, 0.0); + + RobotPos[0] = get_opc_value("::AsGlobalPV:MpDelta4Axis_0.X"); + RobotPos[1] = get_opc_value("::AsGlobalPV:MpDelta4Axis_0.Y"); + RobotPos[2] = get_opc_value("::AsGlobalPV:MpDelta4Axis_0.Z"); + + return RobotPos; +} + +template +T OPCUA_Device::get_opc_value(const std::string &key) const { + UA_Variant value; + UA_Variant_init(&value); + // logger_->info("Reading {}", key); + auto status = UA_Client_readValueAttribute( + client, UA_NODEID_STRING(6, const_cast(key.c_str())), &value); + // logger_->info("Read {}", key); + if (status == UA_STATUSCODE_GOOD /* && + UA_Variant_hasScalarType(&value, &UA_TYPES[UA_TYPES_BOOLEAN])*/) { + logger_->debug("Read: {}\n", key); + return *(T *)value.data; + } else { + logger_->error("Unable to read value"); + } + return T{}; +} + +template +void OPCUA_Device::set_opc_value(const std::string &key, T value, + const UA_DataType *type) { + UA_Variant valueVar; + UA_Variant_setScalar(&valueVar, &value, type); + auto status = UA_Client_writeValueAttribute( + client, UA_NODEID_STRING(6, const_cast(key.c_str())), &valueVar); + if (status == UA_STATUSCODE_GOOD /* && + UA_Variant_hasScalarType(&value, &UA_TYPES[UA_TYPES_BOOLEAN])*/) { + logger_->debug("Written: {}\n", key); + } else { + logger_->error("Unable to read value"); + } +} + +int OPCUA_Device::connect(const std::string &server_name) { + std::string opc_ua_server_name(server_name); // 192.168.137.1 + logger_->info("Connecting to OPC UA server { }", opc_ua_server_name); + + client = UA_Client_new(); + UA_ClientConfig_setDefault(UA_Client_getConfig(client)); + UA_StatusCode retval = UA_Client_connect(client, opc_ua_server_name.c_str()); + if (retval != UA_STATUSCODE_GOOD) { + logger_->info("Unable to connect"); + UA_Client_delete(client); + return (int)retval; + } + logger_->info("Connected"); + + get_video_stream(0); + + return 0; +}; \ No newline at end of file diff --git a/src/interfaces/opc_ua/dev_OPC_UA.hpp b/src/interfaces/opc_ua/dev_OPC_UA.hpp new file mode 100755 index 0000000..c783b31 --- /dev/null +++ b/src/interfaces/opc_ua/dev_OPC_UA.hpp @@ -0,0 +1,60 @@ +/** + * @file dev_OPC_UA.hpp + * + * @brief OPC UA control interface. + * + * @author Maks Boiar + * + */ + +#pragma once + +#include +#include + +#include "core/device.hpp" + +extern "C" { +#include +} + +/** + * OPC UA controllable device with a camera. + */ +class OPCUA_Device : public GenericDevice { + +public: + OPCUA_Device() : GenericDevice() { STREAM_URL_ = "udp://0.0.0.0:11111"; } + + void stop(); + + bool get_status() const { + return get_opc_value( + "::AsGlobalPV:gMainInterface.Robot.Status.ServoOK"); + } + + std::string get_mode() const; + + std::vector get_position() const; + + template T get_opc_value(const std::string &key) const; + + template + void set_opc_value(const std::string &key, T value, const UA_DataType *type); + + void set_opc_bool(const std::string &key, bool value) { + set_opc_value(key, value, &UA_TYPES[UA_TYPES_BOOLEAN]); + } + + void send_rc_control(const velocity_vector_ms_t &velocity); + + int connect(const std::string &server_name); + + ~OPCUA_Device() { + UA_Client_disconnect(client); + UA_Client_delete(client); + } + +private: + UA_Client *client; +}; diff --git a/src/main.cpp b/src/main.cpp deleted file mode 100755 index 292478f..0000000 --- a/src/main.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "spdlog/spdlog.h" -#include "controller.h" -#include -#include - -using std::cerr; -using std::cout; -using std::endl; -using std::vector; -using std::string; - - -int main(int argc, char* argv[]) { - argparse::ArgumentParser parser("controller"); - parser.add_argument("-v", "--verbose") - .help("Display additional information during execution") - .default_value(false) - .implicit_value(true); - - parser.add_argument("--log-level") - .help("Choose logging level") - .default_value(string("DEBUG")) - .action([](const string& value) { - static const vector choices = { "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL" }; - if (std::find(choices.begin(), choices.end(), value) != choices.end()) { - return value; - } - return string{ "DEBUG" }; - }); - - parser.add_argument("--save-video") - .help("Save video feed to a specified file") - .default_value(string{ "" }); - - parser.add_argument("mode") - .help("Choose operation mode") - .action([](const string& value) { - static const vector choices = { "SIM", "WEBCAM" }; - if (std::find(choices.begin(), choices.end(), value) != choices.end()) { - return value; - } - throw std::invalid_argument("Choose a valid mode option."); - }); - - try { - parser.parse_args(argc, argv); - } - catch (const std::runtime_error& err) { - cerr << err.what() << endl; - cerr << parser; - std::exit(1); - } - - - // auto verbose = parser.get("--verbose"); - auto mode = parser.get("mode"); - auto log_level = parser.get("--log-level"); - auto video_filepath = parser.get("--save-video"); - parser.add_description("Control a drone with gestures."); - - spdlog::set_level(spdlog::level::debug); - - Device device; - device.connect(); - device.streamon(); - - std::string gesture_detector_path = "/home/mbcious/copter-gesture/resources/models/resnet18.onnx"; - std::string face_detector_path = "/home/mbcious/copter-gesture/resources/models/haarcascade_frontalface_default.xml"; - - Controller controller = Controller(&device, true, face_detector_path, gesture_detector_path); - controller.run(100); - - return 0; -} - \ No newline at end of file