diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..697abde --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +onnx/out +onnx/.vs/ +onnx/libs/ +onnx/test/.vs/ +onnx/test/out/ +onnx/test/Audiofile/ +src/BeatNet/__pycache__/ diff --git a/onnx/BeatNet.cpp b/onnx/BeatNet.cpp index 2338fdd..68b9b24 100644 --- a/onnx/BeatNet.cpp +++ b/onnx/BeatNet.cpp @@ -70,9 +70,8 @@ BeatNet::BeatNet( env(nullptr), session(nullptr), session_options(nullptr), memory_info(nullptr), allocator(nullptr), run_options(nullptr), input_name(nullptr), output_name(nullptr), - signal_processor(FRAME_LENGTH, HOP_SIZE), - fft_processor(FRAME_LENGTH, FFT_SIZE, FRAME_SIZE_POW2), - filterbank_processor(BANKS_PER_OCTAVE, FFT_SIZE, SR_BEATNET, 30.0f, 11025.0f, true, true), + fft_processor(FRAME_LENGTH, FFT_SIZE, FRAME_LENGTH), + filterbank_processor(BANKS_PER_OCTAVE, FFT_SIZE, SR_BEATNET, 30.0f, 17000.0f, true, true), SR(0),bufferSize(0) { @@ -136,18 +135,38 @@ void BeatNet::setup(double sampleRate, int samplesPerBlock) { bool BeatNet::preprocess(const std::vector& raw_input, std::vector& preprocessed_input) { - std::vector resampled = resampler.resample(raw_input); - std::vector frame; - bool valid_frame = signal_processor.process(resampled,frame); - if (!valid_frame) { - // std::cout<<"invalid frame and will be invalid for the first ~"< resampledSignal = resampler.resample(raw_input); + + // slice original signal to Frames + const int nFrames = 4; + FramedSignal framedSignal{ resampledSignal , nFrames, FRAME_LENGTH, HOP_SIZE }; + + // spectral difference + // last frame + auto frame_3 = framedSignal[3]; + auto spectrum_3 = fft_processor.compute_fft(frame_3); + auto filters_3 = filterbank_processor.apply(spectrum_3); + auto log_compress_3 = log_compress(filters_3); + log_fb = std::move(log_compress_3); + + // frame before + auto frame_2 = framedSignal[2]; + auto spectrum_2 = fft_processor.compute_fft(frame_2); + auto filters_2 = filterbank_processor.apply(spectrum_2); + auto log_compress_2 = log_compress(filters_2); + prev_log_fb = std::move(log_compress_2); + + // diff = log_fb3 - log_fb2 + diff.assign(log_fb.size(), 0.0f); + std::transform(log_fb.begin(), log_fb.end(), prev_log_fb.begin(), + diff.begin(), std::minus()); + + // replace negative values with zero + std::replace_if(diff.begin(), diff.end(), + [](float x) {return x < 0.0f; }, + 0.0f); + + // stack log spectrum and spectral difference hstack(log_fb, diff, preprocessed_input); return true; } @@ -191,7 +210,7 @@ void BeatNet::inference(std::vector& output) { output[i] = output_data[i]; } - printOutputShape(output_tensor); + // printOutputShape(output_tensor); ReleaseValue(input_tensor); ReleaseValue(output_tensor); @@ -214,4 +233,4 @@ void BeatNet::printOutputShape(OrtValue* output_tensor) { std::cout << "]" << std::endl; ReleaseTensorTypeAndShapeInfo(shape_info); -} \ No newline at end of file +} diff --git a/onnx/BeatNet.h b/onnx/BeatNet.h index 71e2e15..45d8db5 100644 --- a/onnx/BeatNet.h +++ b/onnx/BeatNet.h @@ -5,7 +5,7 @@ #include #include "onnxruntime_c_api.h" #include "resampler.h" -#include "frameprocessor.h" +#include "framedSignal.h" #include "fftprocessor.h" #include "filterbankprocessor.h" #include "logspecutils.h" @@ -17,10 +17,10 @@ constexpr double MS_FR_GITHUB {0.064}; constexpr double MS_HOP_GITHUB {0.020}; constexpr int FRAME_LENGTH {static_cast(SR_BEATNET*MS_FR_GITHUB)}; // 1411 constexpr int HOP_SIZE {static_cast(SR_BEATNET*MS_HOP_GITHUB)}; // 441 -constexpr int FFT_SIZE { FRAME_LENGTH / 2 + 1}; // 706 +constexpr int FFT_SIZE {FRAME_LENGTH / 2}; // 705 constexpr int FRAME_SIZE_POW2 {2048}; // this is the minumum higher than FRAME_LENGTH (1411) that is a power-of-two value. constexpr int FBANK_SIZE {272}; -constexpr int BANKS_PER_OCTAVE {16}; // {24};; +constexpr int BANKS_PER_OCTAVE {24}; using OrtGetApiBaseFn = const OrtApiBase* (*)(); using OrtCreateTensorWithDataAsOrtValueFn = OrtStatus* (*) @@ -125,7 +125,7 @@ class BeatNet{ // Preprocessing Resampler resampler; - FramedSignalProcessor signal_processor; + FFTProcessor fft_processor; FilterBankProcessor filterbank_processor; std::vector preprocessed_input; @@ -133,8 +133,9 @@ class BeatNet{ std::vector spectrum; std::vector filters; std::vector log_fb; - std::vector diff; std::vector prev_log_fb; + std::vector diff; + // helper functions - preprocess for feature extraction and inference for model utilization bool preprocess(const std::vector& raw_input, std::vector& preprocessed_input); @@ -143,4 +144,4 @@ class BeatNet{ }; -#endif \ No newline at end of file +#endif diff --git a/onnx/CMakeLists.txt b/onnx/CMakeLists.txt index cbcc26f..8bacf9f 100644 --- a/onnx/CMakeLists.txt +++ b/onnx/CMakeLists.txt @@ -56,7 +56,7 @@ endif() set(LIB_SOURCE_FILES BeatNet.cpp resampler.cpp - frameprocessor.cpp + framedSignal.cpp fftprocessor.cpp filterbankprocessor.cpp logspecutils.cpp diff --git a/onnx/filterbankprocessor.cpp b/onnx/filterbankprocessor.cpp index c09530f..9cbf847 100644 --- a/onnx/filterbankprocessor.cpp +++ b/onnx/filterbankprocessor.cpp @@ -1,4 +1,5 @@ #include "filterbankprocessor.h" +#include FilterBankProcessor::FilterBankProcessor( int bands_per_octave, @@ -22,24 +23,76 @@ FilterBankProcessor::FilterBankProcessor( void FilterBankProcessor::buildFilters() { filters.clear(); float num_octaves = std::log2(fmax / fmin); - int num_filters = static_cast(std::floor(num_octaves * bands_per_octave)); - std::vector centers(num_filters + 2); + // centerfrequencies (219) + int num_filters = static_cast(std::floor(num_octaves * bands_per_octave)); // (219) - for (int i = 0; i < centers.size(); ++i) { - centers[i] = fmin * std::pow(2.0, (float)i / (float)bands_per_octave); - } + /* + # get the range + left = np.floor(np.log2(float(fmin) / fref) * bands_per_octave) + right = np.ceil(np.log2(float(fmax) / fref) * bands_per_octave) + # generate frequencies + frequencies = fref * 2. ** (np.arange(left, right) / + float(bands_per_octave)) + # filter frequencies + # needed, because range might be bigger because of the use of floor/ceil + frequencies = frequencies[np.searchsorted(frequencies, fmin):] + frequencies = frequencies[:np.searchsorted(frequencies, fmax, 'right')] + + */ + const float fref = 440.0; // 440Hz reference value in madmom python code + float left = std::floor(std::log2(fmin / fref) * bands_per_octave); + float right = std::ceil(std::log2(fmax / fref) * bands_per_octave); + + // centers + std::vector centers(num_filters); + float val = left + 1.0f; // left + 1 to skip the first value which is < fmin + std::generate(centers.begin(), centers.end(), + [&val, fref, this]() + { + return fref * std::pow(2.0f, val++ / (float) bands_per_octave); + }); + + // bins + std::vector bins = centersHzToBins(centers); + for (int i = 1; i < bins.size() - 1; ++i) { + + std::vector filt(fft_size, 0.0f); // std::vector filt(fft_size / 2 + 1, 0.0); + + int l = bins[i - 1]; // float l = hzToBin(centers[i - 1]); + int c = bins[i]; // float c = hzToBin(centers[i]); + int r = bins[i + 1]; // float r = hzToBin(centers[i + 1]); + + int start = l; + int center = c - l; // relative to start + int stop = r - l; // relative to start + + /* + data = np.zeros(stop) + # rising edge (without the center) + data[:center] = np.linspace(0, 1, center, endpoint=False) + # falling edge (including the center, but without the last bin) + data[center:] = np.linspace(1, 0, stop - center, endpoint=False) + */ + + int n = stop; + std::vector data(n, 0.0f); + + float dx = 1.0f / center; - for (int i = 1; i < centers.size() - 1; ++i) { - std::vector filt(fft_size / 2 + 1, 0.0); - float l = hzToBin(centers[i - 1]); - float c = hzToBin(centers[i]); - float r = hzToBin(centers[i + 1]); + // rising edge(without the center) + float x0 = 0.0f; + for (int i = 0; i < center; ++i) { + data[i] = x0 + (i * dx); + } + + // falling edge (including the center, but without the last bin) + x0 = 1.0f; + for (int i = center; i < stop; ++i) { + data[i] = x0 - ( (i - center) * dx); + } - for (int j = (int)std::ceil(l); j < (int)std::ceil(c) && j < filt.size(); ++j) - filt[j] = (j - l) / (c - l); + std::copy(data.begin(), data.end(), filt.begin() + start); - for (int j = (int)std::ceil(c); j < (int)std::ceil(r) && j < filt.size(); ++j) - filt[j] = (r - j) / (r - c); if (norm_filters) { float sum = std::accumulate(filt.begin(), filt.end(), 0.0); @@ -66,6 +119,26 @@ int FilterBankProcessor::numBands() const return (int)filters.size(); } -float FilterBankProcessor::hzToBin(float f) const { - return (f / (float)sample_rate) * fft_size; +std::vector FilterBankProcessor::centersHzToBins(const std::vector& centers) const { + + std::vector bins(centers.size()); + for (int i= 0; i < bins.size(); ++i) + { + const float value = std::round( centers[i] / ((float) sample_rate / 2.0f)* fft_size); + bins[i] = static_cast(value); + } + + // keep values unique + auto newend = std::unique(bins.begin(), bins.end()); + bins.erase(newend, bins.end()); + + // remove values higher than fft_size + const int size_max = fft_size - 1; + newend = std::remove_if(bins.begin(), bins.end(), [&size_max](int x) {return x > size_max;}); + bins.erase(newend, bins.end()); + + // add the size_max value at the end of the array + bins.push_back(size_max); + + return bins; } \ No newline at end of file diff --git a/onnx/filterbankprocessor.h b/onnx/filterbankprocessor.h index ce83ee1..2037245 100644 --- a/onnx/filterbankprocessor.h +++ b/onnx/filterbankprocessor.h @@ -29,7 +29,7 @@ class FilterBankProcessor { bool unique_filters; std::vector> filters; - float hzToBin(float f) const; + std::vector centersHzToBins(const std::vector& centers) const; }; diff --git a/onnx/framedSignal.cpp b/onnx/framedSignal.cpp new file mode 100644 index 0000000..d1da37e --- /dev/null +++ b/onnx/framedSignal.cpp @@ -0,0 +1,56 @@ +#include "framedSignal.h" +#include +#include +#include "iostream" + +FramedSignal::FramedSignal(const std::vector& inputSignal, int nFrames, int frameSize, int hopSize) + : original_signal(inputSignal), + nFrames(nFrames), + frameSize(frameSize), + hopSize(hopSize) +{ + int nMax = ((nFrames -1) * hopSize) + frameSize; + padded_signal.assign(nMax, 0.0f); + + { + auto s0 = original_signal.begin(); + auto sEnd = original_signal.end(); + auto destination = padded_signal.begin() + frameSize / 2; + + int i = frameSize / 2; + + std::copy_if(s0, sEnd, destination, + [&i, nMax](float x) + { + return i++ < nMax; + }); + } + + for (int iFrame = 0, index = 0; iFrame < nFrames; iFrame++, index += hopSize) + { + auto i0 = padded_signal.begin() + index; + + std::vector signal(i0, i0 + frameSize); + frames.push_back(signal); + } +} + +FramedSignal::~FramedSignal() +{ + +} + +std::vector FramedSignal::operator[](int i) +{ + return frames.at(i); +} + +std::vector FramedSignal::getOriginalSignal() +{ + return original_signal; +} + +int FramedSignal::get_nFrames() +{ + return nFrames; +} diff --git a/onnx/framedSignal.h b/onnx/framedSignal.h new file mode 100644 index 0000000..e76a854 --- /dev/null +++ b/onnx/framedSignal.h @@ -0,0 +1,26 @@ +#ifndef FRAMEDSIGNAL_H +#define FRAMEDSIGNAL_H + +#include + +class FramedSignal { +public: + + FramedSignal(const std::vector& inputSignal, int nFrames, int frameSize, int hopSize); + ~FramedSignal(); + + std::vector operator[](int i); + std::vector getOriginalSignal(); + int get_nFrames(); + +private: + std::vector original_signal; + int nFrames; + int frameSize; + int hopSize; + + std::vector padded_signal; + std::vector> frames; +}; + +#endif diff --git a/onnx/frameprocessor.cpp b/onnx/frameprocessor.cpp deleted file mode 100644 index d4f23ed..0000000 --- a/onnx/frameprocessor.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include "frameprocessor.h" -#include -#include - -FramedSignalProcessor::FramedSignalProcessor(int frameSize, int hopSize) - : frame_size(frameSize), - hop_size(hopSize), - ring_size(frame_size*1.1), // make it a litle bit longer - write_pos(0), - ring_buffer(ring_size, 0.0f), - total_samples_written(0){} - -void FramedSignalProcessor::reset() { - std::fill(ring_buffer.begin(), ring_buffer.end(), 0.0f); - write_pos = 0; - total_samples_written = 0; -} - -bool FramedSignalProcessor::process(const std::vector& input, std::vector& frame_out) { - - for (float sample : input) { - ring_buffer[write_pos] = sample; - write_pos = (write_pos + 1) % ring_size; - total_samples_written++; - } - - if (total_samples_written>=frame_size) - { - frame_out.resize(frame_size); - for (int i = 0; i < frame_size; ++i) { - int index = (write_pos + ring_size - frame_size + i) % ring_size; - frame_out[i] = ring_buffer[index]; - } - return true; - } - else return false; -} \ No newline at end of file diff --git a/onnx/frameprocessor.h b/onnx/frameprocessor.h deleted file mode 100644 index a39efbd..0000000 --- a/onnx/frameprocessor.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef FRAMEPROCESSOR_H -#define FRAMEPROCESSOR_H - -#include - -class FramedSignalProcessor { -public: - FramedSignalProcessor(int frameSize, int hopSize); - - bool process(const std::vector& input, std::vector& frame_out); - - void reset(); - -private: - int sample_rate; - int frame_size; - int hop_size; - int ring_size; - int write_pos; - size_t total_samples_written; - - std::vector ring_buffer; -}; - -#endif diff --git a/onnx/test/CMakeLists.txt b/onnx/test/CMakeLists.txt new file mode 100644 index 0000000..12d7b69 --- /dev/null +++ b/onnx/test/CMakeLists.txt @@ -0,0 +1,95 @@ +cmake_minimum_required(VERSION 3.22) +project(cpp_test) +set(CMAKE_CXX_STANDARD 17) + +include(cmake/cpm.cmake) + +set(BEATNET_ONNX_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..) +set(BEATNET_ONNX_TEMP_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/BeatNetOnnxBuild) +add_subdirectory(${BEATNET_ONNX_DIR} ${BEATNET_ONNX_TEMP_OUT_DIR}) + +# download adamstark/AudioFile +set(AUDIOFILE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/AudioFile) +if(NOT EXISTS ${AUDIOFILE_DIR}/CMakeLists.txt) + CPMAddPackage( + NAME AudioFile + GITHUB_REPOSITORY adamstark/AudioFile + GIT_TAG 1.1.4 + VERSION 1.1.4 + SOURCE_DIR ${AUDIOFILE_DIR} + ) +else() + add_subdirectory(${AUDIOFILE_DIR}) + message(STATUS "AudioFile already exists, skipping download.") +endif() + +set(ONNX_TEST_SOURCE_FILES + testCPP.cpp +) + +add_executable(${PROJECT_NAME} ${ONNX_TEST_SOURCE_FILES}) +target_link_libraries(${PROJECT_NAME} beatnetlib) + +# add_custom_target(CopyFiles ALL +# COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/$" +# COMMAND ${CMAKE_COMMAND} -E copy_directory +# "${BEATNET_ONNX_TEMP_OUT_DIR}/$" +# "${CMAKE_CURRENT_BINARY_DIR}/$" +# ) + +function(copy_beatnet_deps target_name) + + set(LIBS_AND_WEIGHTS "") + + if(WIN32) + list(APPEND LIBS_AND_WEIGHTS + "${ORT_DIR}/lib/onnxruntime.dll" + "${SAMPLERATE_DIR}/bin/samplerate.dll" + ) + if(ENABLE_FFTW3) + list (APPEND LIBS_AND_WEIGHTS + "${FFTW3_DIR}/libfftw3f-3.dll") + endif() + + elseif(APPLE) + file(GLOB SAMPLERATE_LIBS "${SAMPLERATE_DIR}/lib/libsamplerate*.dylib*") + list(APPEND LIBS_AND_WEIGHTS ${SAMPLERATE_LIBS}) + + file(GLOB ORT_LIBS "${ORT_DIR}/lib/libonnxruntime*.dylib*") + list(APPEND LIBS_AND_WEIGHTS ${ORT_LIBS}) + + if(ENABLE_FFTW3) + file(GLOB FFTW3_LIBS "${FFTW3_DIR}/lib/libfftw3f*.dylib*") + list(APPEND LIBS_AND_WEIGHTS ${FFTW3_LIBS}) + endif() + elseif(UNIX) + file(GLOB SAMPLERATE_LIBS + "${SAMPLERATE_DIR}/lib/libsamplerate.so*" + "${SAMPLERATE_DIR}/lib64/libsamplerate.so*") + list(APPEND LIBS_AND_WEIGHTS ${SAMPLERATE_LIBS}) + + file(GLOB ORT_LIBS "${ORT_DIR}/lib/libonnxruntime.so*") + list(APPEND LIBS_AND_WEIGHTS ${ORT_LIBS}) + + if(ENABLE_FFTW3) + file(GLOB FFTW3_LIBS + "${FFTW3_DIR}/lib/libfftw3f.so*" + "${FFTW3_DIR}/lib64/libfftw3f.so*") + list(APPEND LIBS_AND_WEIGHTS ${FFTW3_LIBS}) + endif() + endif() + + list (APPEND LIBS_AND_WEIGHTS "${BEATNET_ONNX_ROOTDIR}/beatnet_bda.onnx") + + set(LIBRARY_NAME beatnetlib) + + foreach(DEP_FILE IN LISTS LIBS_AND_WEIGHTS) + add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${DEP_FILE}" + $) + endforeach() + +endfunction(copy_beatnet_deps target_name) + +copy_beatnet_deps(${PROJECT_NAME}) \ No newline at end of file diff --git a/onnx/test/README.md b/onnx/test/README.md new file mode 100644 index 0000000..ddc8432 --- /dev/null +++ b/onnx/test/README.md @@ -0,0 +1,27 @@ +This directory is used to test the porting of the BeatNet model into C++. +The following configuration is used: + +- Samples are loaded from the `samples` directory. +- Sample rate: 22050 Hz +- Buffer size: 512 +- BeatNet operates in `Streaming mode`. + +Two implementations are compared: +1. The official Python implementation. +2. The C++ ported implementation. + +The goal is to verify the correctness of the C++ implementation by comparing its results with the Python implementation. The comparison process involves the following steps: + +1. Load an audio file and divide it into buffers. +2. Feed each buffer into the model for predictions. +3. Annotate each buffer with the model's predictions. +4. Store the results in files. +5. Compare the alignment of predictions between the two implementations. + +# Results + +Results are yet to be defined. + +# Help + +- [condaEnvList](condaEnvList) file is provided for helping out in the python env setup. \ No newline at end of file diff --git a/onnx/test/cmake/cpm.cmake b/onnx/test/cmake/cpm.cmake new file mode 100644 index 0000000..9cb91b2 --- /dev/null +++ b/onnx/test/cmake/cpm.cmake @@ -0,0 +1,27 @@ +set(CPM_DOWNLOAD_VERSION 0.38.2) + +set(CPM_DOWNLOAD_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") + +# Expand relative path. This is important if the provided path contains a tilde (~) +get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE) + +function(download_cpm) + message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}") + file(DOWNLOAD + https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake + ${CPM_DOWNLOAD_LOCATION} + ) +endfunction() + +if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION})) + download_cpm() +else() + # resume download if it previously failed + file(READ ${CPM_DOWNLOAD_LOCATION} check) + if("${check}" STREQUAL "") + download_cpm() + endif() + unset(check) +endif() + +include(${CPM_DOWNLOAD_LOCATION}) \ No newline at end of file diff --git a/onnx/test/condaEnvList b/onnx/test/condaEnvList new file mode 100644 index 0000000..e564ce9 --- /dev/null +++ b/onnx/test/condaEnvList @@ -0,0 +1,78 @@ +# packages in environment at miniconda3\envs\bn: +# +# Name Version Build Channel +audioread 3.0.1 pypi_0 pypi +bzip2 1.0.8 h2bbff1b_6 +ca-certificates 2025.12.2 haa95532_0 +certifi 2026.1.4 pypi_0 pypi +cffi 2.0.0 pypi_0 pypi +charset-normalizer 3.4.4 pypi_0 pypi +coloredlogs 15.0.1 pypi_0 pypi +contourpy 1.0.7 pypi_0 pypi +cycler 0.12.1 pypi_0 pypi +cython 3.1.4 pypi_0 pypi +decorator 5.2.1 pypi_0 pypi +expat 2.7.3 h885b0b7_4 +filelock 3.19.1 pypi_0 pypi +flatbuffers 25.12.19 pypi_0 pypi +fonttools 4.60.2 pypi_0 pypi +fsspec 2025.10.0 pypi_0 pypi +humanfriendly 10.0 pypi_0 pypi +idna 3.11 pypi_0 pypi +importlib-resources 6.5.2 pypi_0 pypi +jinja2 3.1.6 pypi_0 pypi +joblib 1.5.3 pypi_0 pypi +kiwisolver 1.4.7 pypi_0 pypi +lazy-loader 0.4 pypi_0 pypi +libexpat 2.7.3 h885b0b7_4 +libffi 3.4.4 hd77b12b_1 +librosa 0.10.2.post1 pypi_0 pypi +libzlib 1.3.1 h02ab6af_0 +llvmlite 0.38.1 pypi_0 pypi +madmom 0.16.1 pypi_0 pypi +markupsafe 3.0.3 pypi_0 pypi +matplotlib 3.7.0 pypi_0 pypi +mido 1.3.3 pypi_0 pypi +mpmath 1.3.0 pypi_0 pypi +msgpack 1.1.2 pypi_0 pypi +networkx 3.2.1 pypi_0 pypi +numba 0.55.2 pypi_0 pypi +numpy 1.21.6 pypi_0 pypi +onnx 1.12.0 pypi_0 pypi +onnxruntime 1.19.2 pypi_0 pypi +openssl 3.0.18 h543e019_0 +packaging 25.0 pypi_0 pypi +pillow 11.3.0 pypi_0 pypi +pip 25.3 pyhc872135_0 +platformdirs 4.4.0 pypi_0 pypi +pooch 1.8.2 pypi_0 pypi +protobuf 3.20.1 pypi_0 pypi +pyaudio 0.2.14 pypi_0 pypi +pycparser 2.23 pypi_0 pypi +pyparsing 3.3.1 pypi_0 pypi +pyreadline3 3.5.4 pypi_0 pypi +python 3.9.25 h716150d_1 +python-dateutil 2.9.0.post0 pypi_0 pypi +requests 2.32.5 pypi_0 pypi +scikit-learn 1.6.1 pypi_0 pypi +scipy 1.10.1 pypi_0 pypi +setuptools 80.9.0 py39haa95532_0 +six 1.17.0 pypi_0 pypi +soundfile 0.13.1 pypi_0 pypi +soxr 1.0.0 pypi_0 pypi +sqlite 3.51.1 hda9a48d_0 +sympy 1.14.0 pypi_0 pypi +threadpoolctl 3.6.0 pypi_0 pypi +tk 8.6.15 hf199647_0 +torch 2.7.1 pypi_0 pypi +typing-extensions 4.15.0 pypi_0 pypi +tzdata 2025b h04d1e81_0 +ucrt 10.0.22621.0 haa95532_0 +urllib3 2.6.3 pypi_0 pypi +vc 14.3 h2df5915_10 +vc14_runtime 14.44.35208 h4927774_10 +vs2015_runtime 14.44.35208 ha6b5a95_10 +wheel 0.45.1 py39haa95532_0 +xz 5.6.4 h4754444_1 +zipp 3.23.0 pypi_0 pypi +zlib 1.3.1 h02ab6af_0 \ No newline at end of file diff --git a/onnx/test/results/108bpm_cpp b/onnx/test/results/108bpm_cpp new file mode 100644 index 0000000..f3f51e6 --- /dev/null +++ b/onnx/test/results/108bpm_cpp @@ -0,0 +1,59 @@ +0.508005,1 +0.528005,1 +0.668005,1 +0.788005,1 +0.808004,1 +1.068,1 +1.348,1 +1.368,1 +1.628,1 +1.648,1 +1.908,1 +1.928,1 +2.168,1 +2.188,1 +2.728,1 +2.748,1 +2.888,1 +3.008,1 +3.028,1 +3.048,1 +3.288,1 +3.308,1 +3.568,1 +3.588,1 +3.848,1 +3.868,1 +4.128,1 +4.148,1 +4.388,1 +4.408,1 +4.828,1 +4.948,1 +4.968,1 +5.108,1 +5.228,1 +5.248,1 +5.268,1 +5.508,1 +5.528,1 +5.808,1 +6.068,1 +6.088,1 +6.348,1 +6.368,1 +6.608,1 +6.628,1 +7.048,1 +7.168,1 +7.188,1 +7.328,1 +7.468,1 +7.488,1 +7.748,1 +8.028,1 +8.288,1 +8.308,1 +8.328,1 +8.568,1 +8.588,1 diff --git a/onnx/test/results/108bpm_py b/onnx/test/results/108bpm_py new file mode 100644 index 0000000..28d54fd --- /dev/null +++ b/onnx/test/results/108bpm_py @@ -0,0 +1,8 @@ +(0.6, 1.0) +(1.1400000000000001, 2.0) +(3.9, 1.0) +(4.44, 2.0) +(5.0200000000000005, 2.0) +(5.58, 1.0) +(6.16, 2.0) +(7.1000000000000005, 2.0) diff --git a/onnx/test/results/120bpm_cpp b/onnx/test/results/120bpm_cpp new file mode 100644 index 0000000..2324cb1 --- /dev/null +++ b/onnx/test/results/120bpm_cpp @@ -0,0 +1,102 @@ +0.0880045,1 +0.188005,1 +0.208005,1 +0.328005,1 +0.448005,1 +0.468005,1 +0.568005,1 +0.588005,1 +0.688004,1 +0.708005,1 +0.828005,1 +0.948005,1 +0.968005,1 +1.068,1 +1.088,1 +1.188,1 +1.208,1 +1.328,1 +1.448,1 +1.468,1 +1.568,1 +1.588,1 +1.708,1 +1.828,1 +1.948,1 +1.968,1 +2.088,1 +2.188,1 +2.208,1 +2.328,1 +2.448,1 +2.468,1 +2.568,1 +2.588,1 +2.688,1 +2.708,1 +2.828,1 +2.948,1 +2.968,1 +3.068,1 +3.088,1 +3.188,1 +3.208,1 +3.328,1 +3.448,1 +3.468,1 +3.568,1 +3.588,1 +3.708,1 +3.828,1 +3.948,1 +3.968,1 +4.088,1 +4.188,1 +4.208,1 +4.328,1 +4.448,1 +4.468,1 +4.568,1 +4.588,1 +4.688,1 +4.708,1 +4.828,1 +4.948,1 +4.968,1 +5.068,1 +5.088,1 +5.188,1 +5.208,1 +5.328,1 +5.448,1 +5.468,1 +5.568,1 +5.588,1 +5.708,1 +5.828,1 +5.948,1 +5.968,1 +6.088,1 +6.188,1 +6.208,1 +6.328,1 +6.448,1 +6.468,1 +6.568,1 +6.588,1 +6.688,1 +6.708,1 +6.828,1 +6.948,1 +6.968,1 +7.068,1 +7.088,1 +7.188,1 +7.208,1 +7.328,1 +7.448,1 +7.468,1 +7.568,1 +7.588,1 +7.708,1 +7.828,1 diff --git a/onnx/test/results/120bpm_py b/onnx/test/results/120bpm_py new file mode 100644 index 0000000..e69de29 diff --git a/onnx/test/results/128bpm_cpp b/onnx/test/results/128bpm_cpp new file mode 100644 index 0000000..e6040de --- /dev/null +++ b/onnx/test/results/128bpm_cpp @@ -0,0 +1,59 @@ +0.428005,1 +0.448005,1 +0.648005,1 +1.128,1 +1.148,1 +1.588,1 +1.608,1 +1.628,1 +1.828,1 +2.288,1 +2.308,1 +2.528,1 +2.548,1 +2.988,1 +3.008,1 +3.028,1 +3.708,1 +3.728,1 +4.168,1 +4.188,1 +4.208,1 +4.868,1 +4.888,1 +5.348,1 +5.368,1 +5.608,1 +6.048,1 +6.068,1 +6.288,1 +6.308,1 +6.748,1 +6.768,1 +7.008,1 +7.448,1 +7.468,1 +7.928,1 +7.948,1 +8.148,1 +8.188,1 +8.628,1 +8.648,1 +9.088,1 +9.788,1 +9.808,1 +9.828,1 +10.488,1 +10.508,1 +10.528,1 +11.208,1 +11.668,1 +11.688,1 +12.368,1 +12.388,1 +12.848,1 +13.548,1 +13.568,1 +14.248,1 +14.268,1 +14.488,1 diff --git a/onnx/test/results/128bpm_py b/onnx/test/results/128bpm_py new file mode 100644 index 0000000..84c1dba --- /dev/null +++ b/onnx/test/results/128bpm_py @@ -0,0 +1,14 @@ +(1.2, 2.0) +(1.6600000000000001, 2.0) +(2.6, 2.0) +(4.48, 2.0) +(5.42, 1.0) +(6.34, 2.0) +(8.24, 2.0) +(8.700000000000001, 2.0) +(9.16, 2.0) +(10.1, 2.0) +(10.58, 2.0) +(12.92, 2.0) +(13.38, 1.0) +(14.32, 2.0) diff --git a/onnx/test/samples/108bpm.wav b/onnx/test/samples/108bpm.wav new file mode 100644 index 0000000..e40957c Binary files /dev/null and b/onnx/test/samples/108bpm.wav differ diff --git a/onnx/test/samples/120bpm.wav b/onnx/test/samples/120bpm.wav new file mode 100644 index 0000000..ecd79d4 Binary files /dev/null and b/onnx/test/samples/120bpm.wav differ diff --git a/onnx/test/samples/128bpm.wav b/onnx/test/samples/128bpm.wav new file mode 100644 index 0000000..6e668e2 Binary files /dev/null and b/onnx/test/samples/128bpm.wav differ diff --git a/onnx/test/testCPP.cpp b/onnx/test/testCPP.cpp new file mode 100644 index 0000000..875d73e --- /dev/null +++ b/onnx/test/testCPP.cpp @@ -0,0 +1,96 @@ +#include +#include "AudioFile/AudioFile.h" //--> https://github.com/adamstark/AudioFile +#include +#include +#include "BeatNet.h" +#include + +static int buffersize = 2293; // in BeatNet at 22050 samples/sec +typedef float bitQuantization; + +namespace fs = std::filesystem; + +const fs::path currentFilePath = fs::path(__FILE__).parent_path(); +const fs::path samplesPath = currentFilePath / "samples"; +const fs::path resultsPath = currentFilePath / "results"; + +int main() { + + std::cout<<"Testing cpp implementation of BeatNet"< audioFile; + audioFile.load(audiopath); + + // values of parameters at the samplerate of the loaded file + const int numSamples = audioFile.getNumSamplesPerChannel(); + const float sr_inputWavfile = static_cast(audioFile.getSampleRate()); + const int sr_Ratio = static_cast(sr_inputWavfile / SR_BEATNET); // 44100 / 22050 = 2 + const float dt = 1.0f / sr_inputWavfile; + + const int buffersize_current = buffersize * sr_Ratio; + const int FRAME_LENGTH_current = FRAME_LENGTH * sr_Ratio; + const int HOP_SIZE_current = HOP_SIZE * sr_Ratio; + + // initialize BeatNet (note:processes float buffers) + BeatNet model; + model.setup(sr_inputWavfile, buffersize_current); + + // predict beats and downbeats + std::vector> beatPositions; + std::vector> downBeatPositions; + + for (int idx=0; idx + buffersize_current < numSamples; idx += HOP_SIZE_current) + { + std::vector audioBlockInput(audioFile.samples[0].begin() + idx, + audioFile.samples[0].begin() + idx + buffersize_current); + std::vector output; + + if (model.process(audioBlockInput, output)) + { + // output[0] : downbeat + // output[1] : beat + // output[2] : no beat + + float time_index = (float)idx + (-(FRAME_LENGTH_current / 2) + (3 * HOP_SIZE_current)); + float time_seconds = time_index * dt; + + int indexMaxProbability = std::max_element(output.begin(), output.end()) - output.begin(); + switch (indexMaxProbability) + { + case 0: // downbeat + downBeatPositions.push_back({ time_seconds, 0 }); // + break; + case 1: // beat + beatPositions.push_back({ time_seconds, 1 }); + break; + } + } + } + + // write beat time positions to file, one per line + if (beatPositions.size() > 0) + { + std::ofstream outFile(outputFilePath); + for (const auto& [time, beatValue] : beatPositions) + { + outFile << time << "," << beatValue << std::endl; + } + outFile.close(); + } + + std::cout << "Finished processing " << audiopath << " (" << numSamples << " samples)" << std::endl; + } + return 0; +} diff --git a/onnx/test/testPython.py b/onnx/test/testPython.py new file mode 100644 index 0000000..558e685 --- /dev/null +++ b/onnx/test/testPython.py @@ -0,0 +1,66 @@ +import sys +import os +sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "..", "src"))) +sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "..", "src", "BeatNet"))) +print(sys.path) +import numpy as np +from BeatNet.BeatNet import BeatNet +import librosa + +# config +samplerate = 22050 +buffersize = 512 + +wavDir = "samples" + +# class that replaces the microphone input to feed buffers of audio data instead +class BufferStream: + ''' + Class that replaces the pyaudio.PyAudio().open()->_Stream that is used from BeatNet in stream mode to read the microphone input. + @see BeatNet.py->line 87 + ''' + def __init__(self, audiopath): + self.audio, _ = librosa.load(audiopath, sr=samplerate, mono=True, dtype=np.float32) + self.active = True + self.pos = 0 + self.numSamples = len(self.audio) + + def getNumSamples(self): + return self.numSamples + + def read(self, size): + # if last buffer + if (self.numSamples < (self.pos+1)*buffersize): + self.active = False + + # read buffer + buffer = self.audio[self.pos*buffersize : (self.pos+1)*buffersize] + self.pos += 1 + return buffer.tobytes() + + def is_active(self): + return self.active + +filelist = os.listdir(wavDir) +for audiosample in filelist: + import pdb; pdb.set_trace() + + audiopath = os.path.join(wavDir, audiosample) + + print(f"Processing {audiopath}...") + + # init beatnet model + estimator = BeatNet(1, mode='stream', inference_model='PF', thread=False) + customBufferStream = BufferStream(audiopath) + estimator.stream = customBufferStream + + output = estimator.process() # read buffers internally until is_active() is false + + # store results + outputFile = os.path.join(os.getcwd(), "results" ,os.path.basename(audiopath).split(".wav")[0] + "_py") + with open(outputFile, "w") as f: + for beat in output: + # import pdb; pdb.set_trace() + f.write(f"{beat[0],beat[1]}\n") + + print(f"Finished processing {audiopath} ({customBufferStream.getNumSamples()} samples)") diff --git a/src/BeatNet/BeatNet.py b/src/BeatNet/BeatNet.py index a449fb9..92879bb 100644 --- a/src/BeatNet/BeatNet.py +++ b/src/BeatNet/BeatNet.py @@ -95,6 +95,7 @@ def process(self, audio_path=None): if self.inference_model != "PF": raise RuntimeError('The infernece model should be set to "PF" for the streaming mode!') self.counter = 0 + # self.beatPositions = [] while self.stream.is_active(): self.activation_extractor_stream() # Using BeatNet causal Neural network streaming mode to extract activations if self.thread: @@ -103,7 +104,10 @@ def process(self, audio_path=None): x.join() else: output = self.estimator.process(self.pred) + self.beatPositions = output self.counter += 1 + + return self.beatPositions elif self.mode == "realtime":