From 298157f2303e3f94737910de3814a7cf621bf5ce Mon Sep 17 00:00:00 2001 From: dr32644 Date: Thu, 5 Jun 2025 15:32:32 -0400 Subject: [PATCH 1/6] feat: initial memory hash plugin commit --- plugins/CMakeLists.txt | 1 + plugins/memory_hash/CMakeLists.txt | 22 +++++ plugins/memory_hash/memory_hash.cpp | 143 ++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 plugins/memory_hash/CMakeLists.txt create mode 100644 plugins/memory_hash/memory_hash.cpp diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index 3f5a2cd..099dd76 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -68,6 +68,7 @@ add_subdirectory(syscall_tracer) add_subdirectory(bbstats) add_subdirectory(callstack) add_subdirectory(apicall_tracer) +add_subdirectory(memory_hash) add_subdirectory(memory_regions) add_subdirectory(pmemdump) #add_subdirectory(volatility) diff --git a/plugins/memory_hash/CMakeLists.txt b/plugins/memory_hash/CMakeLists.txt new file mode 100644 index 0000000..53858eb --- /dev/null +++ b/plugins/memory_hash/CMakeLists.txt @@ -0,0 +1,22 @@ +set(PANDA_PLUGIN_NAME "memory_hash") +set(PLUGIN_TARGET "panda_${PANDA_PLUGIN_NAME}") + +# Set flags, build and link the actual plugin +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11") +set(SRC_FILES ${PANDA_PLUGIN_NAME}.cpp) + +set(LINK_LIBS_I386 ${LINK_LIBS}) +set(LINK_LIBS_X86_64 ${LINK_LIBS}) + +set(LINK_LIBS_I386 ${LINK_LIBS} panda_ipanda-i386 panda_apicall_tracer-i386) +set(LINK_LIBS_X86_64 ${LINK_LIBS} panda_ipanda-x86_64 panda_apicall_tracer-x86_64) + +set(TARGET_DEPS_I386 panda_ipanda-i386) +set(TARGET_DEPS_X86_64 panda_ipanda-x86_64) + +add_i386_plugin(${PLUGIN_TARGET} SRC_FILES LINK_LIBS_I386) +add_x86_64_plugin(${PLUGIN_TARGET} SRC_FILES LINK_LIBS_X86_64) +#add_dependencies(${PLUGIN_TARGET}-i386 ${TARGET_DEPS_I386}) +#add_dependencies(${PLUGIN_TARGET}-x86_64 ${TARGET_DEPS_X86_64}) diff --git a/plugins/memory_hash/memory_hash.cpp b/plugins/memory_hash/memory_hash.cpp new file mode 100644 index 0000000..c39effe --- /dev/null +++ b/plugins/memory_hash/memory_hash.cpp @@ -0,0 +1,143 @@ +/* + * Memory Hash -- Page hashing PANDA plugin + */ + +// This needs to be defined before anything is included in order to get +// the PRIx64 macro +#define __STDC_FORMAT_MACROS +#include +#include +#include + +#include "panda/plugin.h" +#include "panda/common.h" +#include "exec/cpu-defs.h" + +#include "ipanda/ipanda.h" +#include "ipanda/manager.h" +#include "ipanda/types.h" + +#include "apicall_tracer/trace_filter.h" + +// These need to be extern "C" so that the ABI is compatible with +// QEMU/PANDA, which is written in C +extern "C" { + void phys_mem_after_write(CPUState *env, target_ulong pc, target_ulong addr, target_ulong size, void *buf); + void phys_mem_after_read(CPUState *env, target_ulong pc, target_ulong addr, target_ulong size, void *buf); + + bool init_plugin(void*); + void uninit_plugin(void*); +} + +// Todo list +// [X] TODO: Basic plugin setup (build and print messages) +// [X] TODO: Count number of Writes / Reads per insn & bb -- decide what is too much / too slow +// [X] TODO: Get memory pages from physical (maybe virtual is fine) +// [X] TODO: Filter on a PID list +// [ ] TODO: Optimize filtering +// [ ] TODO: Decide how/what to store +// [ ] TODO: +// [ ] TODO: + +// ### Globals +bool s_memhash_initialized = false; + + +// ### Memory Hash Plugin Variables ### +static uint64_t phys_read_count = 0; +static uint64_t phys_write_count = 0; +static std::set phys_pages_read; +static std::set phys_pages_written; + +static std::shared_ptr os_manager; +static auto tracefilter = std::shared_ptr(); +static bool allowed = false; + +bool mh_check_allowlist(CPUState *env) { + // This check is very slow and expensive + ipanda_types::Process current_process; + os_manager->get_current_process(env, current_process); + return tracefilter->quickCheck(current_process.pid, current_process.asid); +} + +void mh_phys_mem_read(CPUState *env, target_ptr_t pc, target_ptr_t addr, size_t size, uint8_t *buf) +{ + if (!allowed || panda_in_kernel(env)) return; + if (!mh_check_allowlist(env)) return; + phys_read_count++; + phys_pages_read.insert(addr & ~(0xFFF)); +} + +void mh_phys_mem_write(CPUState *env, target_ptr_t pc, target_ptr_t addr, size_t size, uint8_t *buf) +{ + if (!allowed || panda_in_kernel(env)) return; + if (!mh_check_allowlist(env)) return; + phys_write_count++; + phys_pages_written.insert(addr & ~(0xFFF)); +} + +void init_memhash(CPUState* env) +{ + std::cout << "initializing memhash" << std::endl; + // ipanda must load on/after first instruction + if (!init_ipanda(env, os_manager)) { + fprintf(stderr, "Could not initialize the introspection library.\n"); + return; + } + std::cout << "initialized memhash" << std::endl; + //allowed = mh_check_allowlist(env); + s_memhash_initialized = true; +} + +bool mh_process_change(CPUState* env, target_ulong oldval, target_ulong newval) +{ + allowed = mh_check_allowlist(env); + return false; +} + +bool init_plugin(void* self) +{ + panda_cb pcb; + + // Tracer Filter args handling + // --panda-arg trace_filter:file=filter.json + panda_arg_list* filter_args = panda_get_args("filter"); + const char* filter_file = strdup(panda_parse_string(filter_args, "file", "")); + if (filter_file[0] == '\0') { + std::cerr << "ERROR: filter not provided" << std::endl; + return false; + } + tracefilter.reset(new TraceFilter(filter_file)); + panda_free_args(filter_args); + + // enable memory callbacks, turned off by defualt + panda_enable_memcb(); + + // Post vm load initialization + pcb.after_loadvm = (reinterpret_cast(init_memhash)); + panda_register_callback(self, PANDA_CB_AFTER_LOADVM, pcb); + + // Load Callbacks + //pcb.phys_mem_after_read = mh_phys_mem_after_read; + //panda_register_callback(self, PANDA_CB_PHYS_MEM_AFTER_READ, pcb); + + pcb.phys_mem_before_write = mh_phys_mem_write; + panda_register_callback(self, PANDA_CB_PHYS_MEM_BEFORE_WRITE, pcb); + + // Track process changes to optimize checks for target threads + pcb.asid_changed = mh_process_change; + panda_register_callback(self, PANDA_CB_ASID_CHANGED, pcb); + + std::cout << "loaded MEM_HASH" << std::endl; + + return true; +} + +void uninit_plugin(void* self) +{ + std::cout << "unloaded MEM_HASH" << std::endl; + std::cout << "PHYS READS: " << phys_read_count << std::endl; + std::cout << "PHYS PAGES RD: " << phys_pages_read.size() << std::endl; + std::cout << "PHYS WRITES: " << phys_write_count << std::endl; + std::cout << "PHYS PAGES WR: " << phys_pages_written.size() << std::endl; +} From 059b51a32585b21ce6efb80f3eca18498a97309a Mon Sep 17 00:00:00 2001 From: dr32644 Date: Fri, 20 Jun 2025 16:15:06 -0400 Subject: [PATCH 2/6] feat: memory_hash -- collects and hashes memory pages using a 61-bit polynomial rolling hash on every write --- plugins/memory_hash/CMakeLists.txt | 4 +- plugins/memory_hash/memory_hash.cpp | 154 ++++++++++++++++++++------ plugins/memory_hash/pr61hash.cpp | 59 ++++++++++ plugins/memory_hash/pr61hash.h | 13 +++ plugins/memory_hash/pr61hash_test.cpp | 55 +++++++++ 5 files changed, 247 insertions(+), 38 deletions(-) create mode 100644 plugins/memory_hash/pr61hash.cpp create mode 100644 plugins/memory_hash/pr61hash.h create mode 100644 plugins/memory_hash/pr61hash_test.cpp diff --git a/plugins/memory_hash/CMakeLists.txt b/plugins/memory_hash/CMakeLists.txt index 53858eb..4ec2952 100644 --- a/plugins/memory_hash/CMakeLists.txt +++ b/plugins/memory_hash/CMakeLists.txt @@ -2,10 +2,10 @@ set(PANDA_PLUGIN_NAME "memory_hash") set(PLUGIN_TARGET "panda_${PANDA_PLUGIN_NAME}") # Set flags, build and link the actual plugin -include_directories(${CMAKE_CURRENT_BINARY_DIR}) +Include_directories(${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11") -set(SRC_FILES ${PANDA_PLUGIN_NAME}.cpp) +set(SRC_FILES ${PANDA_PLUGIN_NAME}.cpp pr61hash.cpp) set(LINK_LIBS_I386 ${LINK_LIBS}) set(LINK_LIBS_X86_64 ${LINK_LIBS}) diff --git a/plugins/memory_hash/memory_hash.cpp b/plugins/memory_hash/memory_hash.cpp index c39effe..8b54ea2 100644 --- a/plugins/memory_hash/memory_hash.cpp +++ b/plugins/memory_hash/memory_hash.cpp @@ -6,8 +6,17 @@ // the PRIx64 macro #define __STDC_FORMAT_MACROS #include -#include +#include #include +#include +#include +#include +#include +#include + +#include +#include +#include #include "panda/plugin.h" #include "panda/common.h" @@ -19,6 +28,10 @@ #include "apicall_tracer/trace_filter.h" +#include "pr61hash.h" + +#define PREFIX "[memory_hash] " + // These need to be extern "C" so that the ABI is compatible with // QEMU/PANDA, which is written in C extern "C" { @@ -34,7 +47,8 @@ extern "C" { // [X] TODO: Count number of Writes / Reads per insn & bb -- decide what is too much / too slow // [X] TODO: Get memory pages from physical (maybe virtual is fine) // [X] TODO: Filter on a PID list -// [ ] TODO: Optimize filtering +// [X] TODO: Optimize filtering +// [X] TODO: Polynomial Rolling Hash // [ ] TODO: Decide how/what to store // [ ] TODO: // [ ] TODO: @@ -43,16 +57,52 @@ extern "C" { bool s_memhash_initialized = false; -// ### Memory Hash Plugin Variables ### -static uint64_t phys_read_count = 0; +// ### Memory Hash Plugin Variables +static FILE *output_fp; static uint64_t phys_write_count = 0; -static std::set phys_pages_read; -static std::set phys_pages_written; +static std::map> phys_pages_written; +static std::map hash_freq; static std::shared_ptr os_manager; static auto tracefilter = std::shared_ptr(); static bool allowed = false; + +void write_json() { + rapidjson::Document document; + document.SetObject(); + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + + // Handle Pages + rapidjson::Value pages_writtenj(rapidjson::kObjectType); + for (const auto& kv : phys_pages_written) { + std::stringstream ss; + ss << std::hex << std::uppercase << std::setw(10) << std::setfill('0') << kv.first; + std::string page_id = ss.str(); + + rapidjson::Value deltasj(rapidjson::kArrayType); + for (const auto& delta : kv.second) { + std::stringstream ss; + ss << std::hex << std::uppercase << std::setw(16) << std::setfill('0') << delta; + std::string hash = ss.str(); + + rapidjson::Value value(hash.c_str(), allocator); + deltasj.PushBack(value, allocator); + } + + rapidjson::Value key(page_id.c_str(), allocator); + rapidjson::Value value(deltasj, allocator); + pages_writtenj.AddMember(key, value, allocator); + } + document.AddMember("pages", pages_writtenj, allocator); + + // Output Json + char writeBuffer[65536]; // Buffer for writing + rapidjson::FileWriteStream os(output_fp, writeBuffer, sizeof(writeBuffer)); + rapidjson::Writer writer(os); + document.Accept(writer); +} + bool mh_check_allowlist(CPUState *env) { // This check is very slow and expensive ipanda_types::Process current_process; @@ -60,20 +110,51 @@ bool mh_check_allowlist(CPUState *env) { return tracefilter->quickCheck(current_process.pid, current_process.asid); } -void mh_phys_mem_read(CPUState *env, target_ptr_t pc, target_ptr_t addr, size_t size, uint8_t *buf) +void mh_phys_mem_write(CPUState *env, target_ptr_t pc, target_ptr_t addr, size_t size, uint8_t *buf) { if (!allowed || panda_in_kernel(env)) return; if (!mh_check_allowlist(env)) return; - phys_read_count++; - phys_pages_read.insert(addr & ~(0xFFF)); + + target_ptr_t page_id = (addr & ~(0xFFF)) >> 12; + target_ptr_t page_offset = (addr & (0xFFF)); + uint8_t buffer[PAGE_SIZE]; + + phys_write_count++; + + assert(page_offset + size <= PAGE_SIZE); // read/writes should per page + + // Check if first time seeing page + if (phys_pages_written.find(page_id) == phys_pages_written.end()) { + + // Get page to hash + if (panda_physical_memory_rw(page_id, buffer, PAGE_SIZE, false) != MEMTX_OK) { + std::cout << "ERROR: failed to read page: " << page_id << std::endl; + return; + } + + // Calculate the full hash and update structures + uint64_t hash = full_poly_hash(buffer); + phys_pages_written[page_id] = { hash }; + hash_freq[hash]++; + } + + // Get memory about to be changed + if (panda_physical_memory_rw(page_id, buffer, size, false) != MEMTX_OK) { + std::cout << "ERROR: failed to read page: " << page_id << std::endl; + return; + } + + // Calculate delta and update structures + uint64_t hash = phys_pages_written[page_id].back(); + uint64_t delta = apply_delta(hash, buffer, buf, page_offset, size); + phys_pages_written[page_id].push_back(delta); + hash_freq[hash]++; } -void mh_phys_mem_write(CPUState *env, target_ptr_t pc, target_ptr_t addr, size_t size, uint8_t *buf) +bool mh_process_change(CPUState* env, target_ulong oldval, target_ulong newval) { - if (!allowed || panda_in_kernel(env)) return; - if (!mh_check_allowlist(env)) return; - phys_write_count++; - phys_pages_written.insert(addr & ~(0xFFF)); + allowed = mh_check_allowlist(env); + return false; } void init_memhash(CPUState* env) @@ -84,32 +165,34 @@ void init_memhash(CPUState* env) fprintf(stderr, "Could not initialize the introspection library.\n"); return; } + std::cout << "initialized memhash" << std::endl; //allowed = mh_check_allowlist(env); s_memhash_initialized = true; } -bool mh_process_change(CPUState* env, target_ulong oldval, target_ulong newval) -{ - allowed = mh_check_allowlist(env); - return false; -} - bool init_plugin(void* self) { panda_cb pcb; + panda_arg_list* memhash_args = panda_get_args("memory_hash"); - // Tracer Filter args handling - // --panda-arg trace_filter:file=filter.json - panda_arg_list* filter_args = panda_get_args("filter"); - const char* filter_file = strdup(panda_parse_string(filter_args, "file", "")); + // --panda-arg memory_hash:filter=filter.json + const char* filter_file = strdup(panda_parse_string(memhash_args, "filter", "")); if (filter_file[0] == '\0') { std::cerr << "ERROR: filter not provided" << std::endl; return false; } tracefilter.reset(new TraceFilter(filter_file)); - panda_free_args(filter_args); - + + // --panda-arg memory_hash:output=output.json + const char* output_file = strdup(panda_parse_string(memhash_args, "output", "")); + if (output_file[0] == '\0') { + std::cerr << "ERROR: output not provided" << std::endl; + return false; + } + output_fp = fopen(output_file, "w"); + panda_free_args(memhash_args); + // enable memory callbacks, turned off by defualt panda_enable_memcb(); @@ -117,10 +200,6 @@ bool init_plugin(void* self) pcb.after_loadvm = (reinterpret_cast(init_memhash)); panda_register_callback(self, PANDA_CB_AFTER_LOADVM, pcb); - // Load Callbacks - //pcb.phys_mem_after_read = mh_phys_mem_after_read; - //panda_register_callback(self, PANDA_CB_PHYS_MEM_AFTER_READ, pcb); - pcb.phys_mem_before_write = mh_phys_mem_write; panda_register_callback(self, PANDA_CB_PHYS_MEM_BEFORE_WRITE, pcb); @@ -129,15 +208,18 @@ bool init_plugin(void* self) panda_register_callback(self, PANDA_CB_ASID_CHANGED, pcb); std::cout << "loaded MEM_HASH" << std::endl; - return true; } void uninit_plugin(void* self) { - std::cout << "unloaded MEM_HASH" << std::endl; - std::cout << "PHYS READS: " << phys_read_count << std::endl; - std::cout << "PHYS PAGES RD: " << phys_pages_read.size() << std::endl; - std::cout << "PHYS WRITES: " << phys_write_count << std::endl; - std::cout << "PHYS PAGES WR: " << phys_pages_written.size() << std::endl; + std::cout << PREFIX "unloading..." << std::endl; + std::cout << PREFIX "individual page writes: " << phys_write_count << std::endl; + std::cout << PREFIX "unique pages pritten to: " << phys_pages_written.size() << std::endl; + std::cout << PREFIX "unique hashes: " << hash_freq.size() << std::endl; + + std::cout << PREFIX "writing json output..." << std::endl; + write_json(); + fclose(output_fp); + std::cout << PREFIX "done." << std::endl; } diff --git a/plugins/memory_hash/pr61hash.cpp b/plugins/memory_hash/pr61hash.cpp new file mode 100644 index 0000000..bc38c50 --- /dev/null +++ b/plugins/memory_hash/pr61hash.cpp @@ -0,0 +1,59 @@ +#include +#include +#include +#include + +#include "pr61hash.h" + +// ### Polynomial Rolling 61-bit Hash -- unofficially calling pr61 + +constexpr std::array poly_hash_powers() +{ + std::array _powers = {1,}; + for (size_t i = 1; i < PAGE_SIZE; i++) { + _powers[i] = (_powers[i-1] * P) % MOD; + } + return _powers; +} + +static std::array powers = poly_hash_powers(); + +inline uint64_t mulmod(uint64_t a, uint64_t b) +{ + return (uint64_t)(((__uint128_t)a*b) % MOD); +} + +uint64_t full_poly_hash(uint8_t* page) +{ + uint64_t h = 0; + for (size_t i = 0; i < PAGE_SIZE; i++) { + //std::cout << powers[i] << std::endl; + h = (h + mulmod(page[i], powers[i])) % MOD; + } + return h; +} + +uint64_t delta_poly_hash(uint64_t h, size_t i, uint8_t oldv, uint8_t newv) +{ + if (i >= PAGE_SIZE) { + std::cerr << "ERROR index issue" << std::endl; + return h; + } + + uint64_t new_ = mulmod(newv, powers[i]); + uint64_t old_ = mulmod(oldv, powers[i]); + return (h + MOD + new_ - old_) % MOD; +} + +uint64_t apply_delta(uint64_t h, uint8_t *old_buffer, uint8_t *new_buffer, size_t size, size_t offset) +{ + uint64_t curr_h = h; + for (size_t i = 0; i < size; i++) { + curr_h = delta_poly_hash(curr_h, offset+i, old_buffer[i], new_buffer[i]); + } + return curr_h; +} + + + + diff --git a/plugins/memory_hash/pr61hash.h b/plugins/memory_hash/pr61hash.h new file mode 100644 index 0000000..04df579 --- /dev/null +++ b/plugins/memory_hash/pr61hash.h @@ -0,0 +1,13 @@ +#ifndef POLY_HASH_H +#define POLY_HASH_H + +constexpr size_t PAGE_SIZE = 4096; +constexpr uint64_t MOD = (1ULL << 61) - 1; +constexpr uint64_t P = 257; + +void init_poly_hash_powers(); +uint64_t full_poly_hash(uint8_t *page); +uint64_t delta_poly_hash(uint64_t h, size_t i, uint8_t oldv, uint8_t newv); +uint64_t apply_delta(uint64_t h, uint8_t *old_buffer, uint8_t *new_buffer, size_t size, size_t offset); + +#endif diff --git a/plugins/memory_hash/pr61hash_test.cpp b/plugins/memory_hash/pr61hash_test.cpp new file mode 100644 index 0000000..e5e4c3a --- /dev/null +++ b/plugins/memory_hash/pr61hash_test.cpp @@ -0,0 +1,55 @@ +#include +#include + +#include "pr61hash.cpp" + +const uint64_t PAGE_HASH = 0x1b1d8d0b72aebff6; + +void test1() +{ + uint8_t page1[PAGE_SIZE] = {}; + uint8_t page2[PAGE_SIZE] = {}; + + uint64_t page1_hash = full_poly_hash(page1); + uint64_t page2_hash = full_poly_hash(page2); + + std::cout << std::hex; + for (size_t i = 0; i < PAGE_SIZE; i++) { + page1[i] = i % 256; + page1_hash = delta_poly_hash(page1_hash, i, 0, i%256); + std::cout << "page1 - " << i << " - 0x"<< std::setw(16) << std::setfill('0') << page1_hash << std::endl; + } + std::cout << "full - xxx - 0x" << std::setw(16) << std::setfill('0') << full_poly_hash(page1) << std::endl; + assert(page1_hash == PAGE_HASH); + + for (int i = PAGE_SIZE-1; i >= 0; i--) { + page2[i] = i % 256; + page2_hash = delta_poly_hash(page2_hash, i, 0, i%256); + std::cout << "page2 - " << i << " - 0x"<< std::setw(16) << std::setfill('0') << page2_hash << std::endl; + } + std::cout << "full - xxx - 0x" << std::setw(16) << std::setfill('0') << full_poly_hash(page1) << std::endl; + assert(full_poly_hash(page2) == full_poly_hash(page1)); + assert(page2_hash == PAGE_HASH); + std::cout << std::dec; +} + +void test2() +{ + uint8_t page1[PAGE_SIZE] = {}; + uint64_t page1_hash = full_poly_hash(page1); + uint64_t page1_full = full_poly_hash(page1); + + std::cout << std::hex; + for (size_t i = 0; i < PAGE_SIZE; i++) { + page1[i] = i % 256; + page1_hash = delta_poly_hash(page1_hash, i, 0, i%256); + page1_full = full_poly_hash(page1); + if (i < 10 || i > 4086) { + std::cout << "page1 - " << i + << " - 0x" << std::setw(16) << std::setfill('0') << page1_hash + << " - 0x" << std::setw(16) << std::setfill('0') << page1_full + << std::endl; + } + } + + std::cout << "full - xxx - 0x" From e1caccff00dc84c4067433bce924e481d82dd4ba Mon Sep 17 00:00:00 2001 From: dr32644 Date: Tue, 24 Jun 2025 15:20:34 -0400 Subject: [PATCH 3/6] fix: updates, still WIP since page swapping is moving physical addresses --- plugins/memory_hash/memory_hash.cpp | 58 ++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/plugins/memory_hash/memory_hash.cpp b/plugins/memory_hash/memory_hash.cpp index 8b54ea2..14fe658 100644 --- a/plugins/memory_hash/memory_hash.cpp +++ b/plugins/memory_hash/memory_hash.cpp @@ -9,9 +9,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -35,9 +35,6 @@ // These need to be extern "C" so that the ABI is compatible with // QEMU/PANDA, which is written in C extern "C" { - void phys_mem_after_write(CPUState *env, target_ulong pc, target_ulong addr, target_ulong size, void *buf); - void phys_mem_after_read(CPUState *env, target_ulong pc, target_ulong addr, target_ulong size, void *buf); - bool init_plugin(void*); void uninit_plugin(void*); } @@ -49,9 +46,15 @@ extern "C" { // [X] TODO: Filter on a PID list // [X] TODO: Optimize filtering // [X] TODO: Polynomial Rolling Hash -// [ ] TODO: Decide how/what to store -// [ ] TODO: -// [ ] TODO: +// [X] TODO: Decide how/what to store +// [ ] TODO: get ASID, PID for each page + +// for readability +using physical_t = uint64_t; +using virtual_t = uint64_t; +using asid_t = uint64_t; +using pr61hash_t = uint64_t; + // ### Globals bool s_memhash_initialized = false; @@ -60,13 +63,17 @@ bool s_memhash_initialized = false; // ### Memory Hash Plugin Variables static FILE *output_fp; static uint64_t phys_write_count = 0; -static std::map> phys_pages_written; -static std::map hash_freq; +static std::map> phys_pages_written; +static std::map> physical2virtual; +static std::map hash_freq; static std::shared_ptr os_manager; static auto tracefilter = std::shared_ptr(); static bool allowed = false; +// before_write triggers after_write to reduce filter checks +static bool before_after = false; + void write_json() { rapidjson::Document document; @@ -95,7 +102,7 @@ void write_json() { pages_writtenj.AddMember(key, value, allocator); } document.AddMember("pages", pages_writtenj, allocator); - + // Output Json char writeBuffer[65536]; // Buffer for writing rapidjson::FileWriteStream os(output_fp, writeBuffer, sizeof(writeBuffer)); @@ -110,13 +117,32 @@ bool mh_check_allowlist(CPUState *env) { return tracefilter->quickCheck(current_process.pid, current_process.asid); } -void mh_phys_mem_write(CPUState *env, target_ptr_t pc, target_ptr_t addr, size_t size, uint8_t *buf) +void mh_virt_mem_after_write(CPUState *env, target_ptr_t pc, target_ptr_t vaddr, size_t size, uint8_t *buf) +{ + if (!before_after) return; + before_after = false; + + asid_t asid = panda_current_asid(env); + + physical_t paddr = panda_virt_to_phys(env, vaddr); + physical_t ppage_id = (paddr & ~(0xFFF)) >> 12; + physical_t vpage_id = (vaddr & ~(0xFFF)) >> 12; + //physical_t page_offset = (paddr & (0xFFF)); + + if (physical2virtual.find(ppage_id) != physical2virtual.end()) { // found + assert(std::get<1>(physical2virtual[ppage_id]) == asid); + return; + } + physical2virtual[ppage_id] = std::make_tuple(vpage_id, asid); +} + +void mh_phys_mem_before_write(CPUState *env, target_ptr_t pc, target_ptr_t addr, size_t size, uint8_t *buf) { if (!allowed || panda_in_kernel(env)) return; if (!mh_check_allowlist(env)) return; - target_ptr_t page_id = (addr & ~(0xFFF)) >> 12; - target_ptr_t page_offset = (addr & (0xFFF)); + physical_t page_id = (addr & ~(0xFFF)) >> 12; + physical_t page_offset = (addr & (0xFFF)); uint8_t buffer[PAGE_SIZE]; phys_write_count++; @@ -149,6 +175,7 @@ void mh_phys_mem_write(CPUState *env, target_ptr_t pc, target_ptr_t addr, size_t uint64_t delta = apply_delta(hash, buffer, buf, page_offset, size); phys_pages_written[page_id].push_back(delta); hash_freq[hash]++; + before_after = true; } bool mh_process_change(CPUState* env, target_ulong oldval, target_ulong newval) @@ -200,7 +227,10 @@ bool init_plugin(void* self) pcb.after_loadvm = (reinterpret_cast(init_memhash)); panda_register_callback(self, PANDA_CB_AFTER_LOADVM, pcb); - pcb.phys_mem_before_write = mh_phys_mem_write; + pcb.virt_mem_after_write = mh_virt_mem_after_write; + panda_register_callback(self, PANDA_CB_VIRT_MEM_AFTER_WRITE, pcb); + + pcb.phys_mem_before_write = mh_phys_mem_before_write; panda_register_callback(self, PANDA_CB_PHYS_MEM_BEFORE_WRITE, pcb); // Track process changes to optimize checks for target threads From 42e4e71e5281ad77814ba3bed23f1cedce89f6e0 Mon Sep 17 00:00:00 2001 From: dr32644 Date: Fri, 27 Jun 2025 12:58:09 -0400 Subject: [PATCH 4/6] fix: correctly pointing at asid,vaddr, dropping physical addresses --- plugins/memory_hash/memory_hash.cpp | 96 ++++++++++++++++++----------- 1 file changed, 60 insertions(+), 36 deletions(-) diff --git a/plugins/memory_hash/memory_hash.cpp b/plugins/memory_hash/memory_hash.cpp index 14fe658..aca659d 100644 --- a/plugins/memory_hash/memory_hash.cpp +++ b/plugins/memory_hash/memory_hash.cpp @@ -54,6 +54,9 @@ using physical_t = uint64_t; using virtual_t = uint64_t; using asid_t = uint64_t; using pr61hash_t = uint64_t; +using page_key_t = std::tuple; +#define ASID 0 +#define PAGE 1 // ### Globals @@ -63,48 +66,66 @@ bool s_memhash_initialized = false; // ### Memory Hash Plugin Variables static FILE *output_fp; static uint64_t phys_write_count = 0; -static std::map> phys_pages_written; -static std::map> physical2virtual; +static std::map> pages_written; static std::map hash_freq; +// filter variables static std::shared_ptr os_manager; static auto tracefilter = std::shared_ptr(); static bool allowed = false; // before_write triggers after_write to reduce filter checks -static bool before_after = false; +static bool before_virt_phys = false; +static page_key_t current_page_key; void write_json() { rapidjson::Document document; document.SetObject(); rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + std::stringstream ss; // Handle Pages - rapidjson::Value pages_writtenj(rapidjson::kObjectType); - for (const auto& kv : phys_pages_written) { - std::stringstream ss; - ss << std::hex << std::uppercase << std::setw(10) << std::setfill('0') << kv.first; + rapidjson::Value pages_writtenj(rapidjson::kArrayType); + for (const auto& kv : pages_written) { + // asid + ss << std::dec << std::get(kv.first); + std::string asid = ss.str(); + ss.str(""); + ss.clear(); + + // page_id + ss << std::hex << std::uppercase << std::setw(10) << std::setfill('0') << std::get(kv.first); std::string page_id = ss.str(); - + ss.str(""); + ss.clear(); + + // hashes rapidjson::Value deltasj(rapidjson::kArrayType); for (const auto& delta : kv.second) { - std::stringstream ss; ss << std::hex << std::uppercase << std::setw(16) << std::setfill('0') << delta; std::string hash = ss.str(); + ss.str(""); + ss.clear(); rapidjson::Value value(hash.c_str(), allocator); deltasj.PushBack(value, allocator); } - rapidjson::Value key(page_id.c_str(), allocator); - rapidjson::Value value(deltasj, allocator); - pages_writtenj.AddMember(key, value, allocator); + // element + rapidjson::Value ele(rapidjson::kObjectType); + rapidjson::Value asidj(asid.c_str(), allocator); + rapidjson::Value page_idj(page_id.c_str(), allocator); + ele.AddMember("asid", asidj, allocator); + ele.AddMember("page_id", page_idj, allocator); + ele.AddMember("hashes", deltasj, allocator); + + pages_writtenj.PushBack(ele, allocator); } document.AddMember("pages", pages_writtenj, allocator); // Output Json - char writeBuffer[65536]; // Buffer for writing + char writeBuffer[65536]; // Buffer for writing, recommend size by docs rapidjson::FileWriteStream os(output_fp, writeBuffer, sizeof(writeBuffer)); rapidjson::Writer writer(os); document.Accept(writer); @@ -117,41 +138,39 @@ bool mh_check_allowlist(CPUState *env) { return tracefilter->quickCheck(current_process.pid, current_process.asid); } -void mh_virt_mem_after_write(CPUState *env, target_ptr_t pc, target_ptr_t vaddr, size_t size, uint8_t *buf) +void mh_virt_mem_before_write(CPUState *env, target_ptr_t pc, target_ptr_t vaddr, size_t size, uint8_t *buf) { - if (!before_after) return; - before_after = false; + if (!allowed || panda_in_kernel(env)) return; + if (!mh_check_allowlist(env)) return; asid_t asid = panda_current_asid(env); - physical_t paddr = panda_virt_to_phys(env, vaddr); - physical_t ppage_id = (paddr & ~(0xFFF)) >> 12; + //physical_t paddr = panda_virt_to_phys(env, vaddr); // could fail physical_t vpage_id = (vaddr & ~(0xFFF)) >> 12; - //physical_t page_offset = (paddr & (0xFFF)); - if (physical2virtual.find(ppage_id) != physical2virtual.end()) { // found - assert(std::get<1>(physical2virtual[ppage_id]) == asid); - return; - } - physical2virtual[ppage_id] = std::make_tuple(vpage_id, asid); + current_page_key = std::make_tuple(asid, vpage_id); + before_virt_phys = true; } void mh_phys_mem_before_write(CPUState *env, target_ptr_t pc, target_ptr_t addr, size_t size, uint8_t *buf) { - if (!allowed || panda_in_kernel(env)) return; - if (!mh_check_allowlist(env)) return; - + if (!before_virt_phys) return; + before_virt_phys = false; + + asid_t asid = panda_current_asid(env); physical_t page_id = (addr & ~(0xFFF)) >> 12; physical_t page_offset = (addr & (0xFFF)); uint8_t buffer[PAGE_SIZE]; phys_write_count++; + // check that asid didnt just randomly change + assert(std::get(current_page_key) == asid); + assert(page_offset + size <= PAGE_SIZE); // read/writes should per page // Check if first time seeing page - if (phys_pages_written.find(page_id) == phys_pages_written.end()) { - + if (pages_written.find(current_page_key) == pages_written.end()) { // Get page to hash if (panda_physical_memory_rw(page_id, buffer, PAGE_SIZE, false) != MEMTX_OK) { std::cout << "ERROR: failed to read page: " << page_id << std::endl; @@ -160,7 +179,7 @@ void mh_phys_mem_before_write(CPUState *env, target_ptr_t pc, target_ptr_t addr, // Calculate the full hash and update structures uint64_t hash = full_poly_hash(buffer); - phys_pages_written[page_id] = { hash }; + pages_written[current_page_key] = { hash }; hash_freq[hash]++; } @@ -171,11 +190,10 @@ void mh_phys_mem_before_write(CPUState *env, target_ptr_t pc, target_ptr_t addr, } // Calculate delta and update structures - uint64_t hash = phys_pages_written[page_id].back(); + uint64_t hash = pages_written[current_page_key].back(); uint64_t delta = apply_delta(hash, buffer, buf, page_offset, size); - phys_pages_written[page_id].push_back(delta); + pages_written[current_page_key].push_back(delta); hash_freq[hash]++; - before_after = true; } bool mh_process_change(CPUState* env, target_ulong oldval, target_ulong newval) @@ -227,8 +245,8 @@ bool init_plugin(void* self) pcb.after_loadvm = (reinterpret_cast(init_memhash)); panda_register_callback(self, PANDA_CB_AFTER_LOADVM, pcb); - pcb.virt_mem_after_write = mh_virt_mem_after_write; - panda_register_callback(self, PANDA_CB_VIRT_MEM_AFTER_WRITE, pcb); + pcb.virt_mem_before_write = mh_virt_mem_before_write; + panda_register_callback(self, PANDA_CB_VIRT_MEM_BEFORE_WRITE, pcb); pcb.phys_mem_before_write = mh_phys_mem_before_write; panda_register_callback(self, PANDA_CB_PHYS_MEM_BEFORE_WRITE, pcb); @@ -245,8 +263,14 @@ void uninit_plugin(void* self) { std::cout << PREFIX "unloading..." << std::endl; std::cout << PREFIX "individual page writes: " << phys_write_count << std::endl; - std::cout << PREFIX "unique pages pritten to: " << phys_pages_written.size() << std::endl; + std::cout << PREFIX "unique pages pritten to: " << pages_written.size() << std::endl; std::cout << PREFIX "unique hashes: " << hash_freq.size() << std::endl; + + for (const auto& kv : hash_freq) { + if (kv.second > 2) { + std::cout << kv.first << " - " << kv.second << std::endl; + } + } std::cout << PREFIX "writing json output..." << std::endl; write_json(); From bfd591174ea62f63fda2af63f121a61464e228e1 Mon Sep 17 00:00:00 2001 From: Drake Marin Petersen Date: Fri, 30 Jan 2026 13:32:53 -0500 Subject: [PATCH 5/6] feat: testing and debugging --- plugins/memory_hash/CMakeLists.txt | 2 +- plugins/memory_hash/memory_hash.cpp | 169 ++++++++++++++++---------- plugins/memory_hash/pr61hash.cpp | 38 +++--- plugins/memory_hash/pr61hash.h | 2 +- plugins/memory_hash/pr61hash_test.cpp | 74 +++++++---- plugins/memory_hash/pr61hash_test.h | 5 + 6 files changed, 182 insertions(+), 108 deletions(-) create mode 100644 plugins/memory_hash/pr61hash_test.h diff --git a/plugins/memory_hash/CMakeLists.txt b/plugins/memory_hash/CMakeLists.txt index 4ec2952..4f32ce1 100644 --- a/plugins/memory_hash/CMakeLists.txt +++ b/plugins/memory_hash/CMakeLists.txt @@ -5,7 +5,7 @@ set(PLUGIN_TARGET "panda_${PANDA_PLUGIN_NAME}") Include_directories(${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11") -set(SRC_FILES ${PANDA_PLUGIN_NAME}.cpp pr61hash.cpp) +set(SRC_FILES ${PANDA_PLUGIN_NAME}.cpp pr61hash.cpp pr61hash_test.cpp) set(LINK_LIBS_I386 ${LINK_LIBS}) set(LINK_LIBS_X86_64 ${LINK_LIBS}) diff --git a/plugins/memory_hash/memory_hash.cpp b/plugins/memory_hash/memory_hash.cpp index aca659d..8a8ed76 100644 --- a/plugins/memory_hash/memory_hash.cpp +++ b/plugins/memory_hash/memory_hash.cpp @@ -29,6 +29,7 @@ #include "apicall_tracer/trace_filter.h" #include "pr61hash.h" +//#include "pr61hash_test.h" #define PREFIX "[memory_hash] " @@ -39,17 +40,8 @@ extern "C" { void uninit_plugin(void*); } -// Todo list -// [X] TODO: Basic plugin setup (build and print messages) -// [X] TODO: Count number of Writes / Reads per insn & bb -- decide what is too much / too slow -// [X] TODO: Get memory pages from physical (maybe virtual is fine) -// [X] TODO: Filter on a PID list -// [X] TODO: Optimize filtering -// [X] TODO: Polynomial Rolling Hash -// [X] TODO: Decide how/what to store -// [ ] TODO: get ASID, PID for each page - -// for readability + +// Types for using physical_t = uint64_t; using virtual_t = uint64_t; using asid_t = uint64_t; @@ -62,24 +54,21 @@ using page_key_t = std::tuple; // ### Globals bool s_memhash_initialized = false; - // ### Memory Hash Plugin Variables static FILE *output_fp; static uint64_t phys_write_count = 0; static std::map> pages_written; -static std::map hash_freq; +static std::map hash_uncommitited; +static std::map hash_freq; // frequency of each hash that appears // filter variables static std::shared_ptr os_manager; static auto tracefilter = std::shared_ptr(); static bool allowed = false; -// before_write triggers after_write to reduce filter checks -static bool before_virt_phys = false; -static page_key_t current_page_key; - -void write_json() { +void write_json() +{ rapidjson::Document document; document.SetObject(); rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); @@ -138,70 +127,114 @@ bool mh_check_allowlist(CPUState *env) { return tracefilter->quickCheck(current_process.pid, current_process.asid); } + void mh_virt_mem_before_write(CPUState *env, target_ptr_t pc, target_ptr_t vaddr, size_t size, uint8_t *buf) { if (!allowed || panda_in_kernel(env)) return; - if (!mh_check_allowlist(env)) return; - - asid_t asid = panda_current_asid(env); - - //physical_t paddr = panda_virt_to_phys(env, vaddr); // could fail - physical_t vpage_id = (vaddr & ~(0xFFF)) >> 12; - - current_page_key = std::make_tuple(asid, vpage_id); - before_virt_phys = true; -} - -void mh_phys_mem_before_write(CPUState *env, target_ptr_t pc, target_ptr_t addr, size_t size, uint8_t *buf) -{ - if (!before_virt_phys) return; - before_virt_phys = false; - asid_t asid = panda_current_asid(env); - physical_t page_id = (addr & ~(0xFFF)) >> 12; - physical_t page_offset = (addr & (0xFFF)); + physical_t vpage_id = (vaddr & ~(0xFFF)); + physical_t vpage_offset = (vaddr & (0xFFF)); uint8_t buffer[PAGE_SIZE]; - - phys_write_count++; - - // check that asid didnt just randomly change - assert(std::get(current_page_key) == asid); + pr61hash_t hash_new = (pr61hash_t)-1; + page_key_t current_page_key; - assert(page_offset + size <= PAGE_SIZE); // read/writes should per page + // "before" hook is called up to 3 times before successful page read, + // first fault for TLB, second fault for process PT. + if (panda_virtual_memory_read(env, vpage_id, buffer, PAGE_SIZE) == -1) return; + // successful page read by this point. - // Check if first time seeing page + asid_t asid = panda_current_asid(env); + current_page_key = std::make_tuple(asid, vpage_id); + + // if first encounter with page if (pages_written.find(current_page_key) == pages_written.end()) { - // Get page to hash - if (panda_physical_memory_rw(page_id, buffer, PAGE_SIZE, false) != MEMTX_OK) { - std::cout << "ERROR: failed to read page: " << page_id << std::endl; - return; - } - // Calculate the full hash and update structures - uint64_t hash = full_poly_hash(buffer); - pages_written[current_page_key] = { hash }; - hash_freq[hash]++; + hash_new = full_poly_hash(buffer); + pages_written[current_page_key] = { hash_new }; + } else { + // Theres a chance something changes in the page between committed hashes. + // Check by calculating the full hash and comparing to last hash for the page. + pr61hash_t hash_curr = full_poly_hash(buffer); + pr61hash_t hash_prev = pages_written[current_page_key].back(); + //std::cout << PREFIX "before PAGE unchanged check: " << (hash_curr != hash_prev) + // << std::hex << std::uppercase << std::setw(16) << std::setfill('0') + // << " HASH_NEW: " << hash_curr + // << " HASH_OLD: " << hash_prev + // << std::dec << std::setw(0) << std::endl; + if (hash_curr != hash_prev) { + std::cerr << PREFIX "WARNING: page update went undetected, syncing..." << std::endl; + pages_written[current_page_key].push_back(hash_curr); + hash_freq[hash_curr]++; + } } - - // Get memory about to be changed - if (panda_physical_memory_rw(page_id, buffer, size, false) != MEMTX_OK) { - std::cout << "ERROR: failed to read page: " << page_id << std::endl; - return; + + // adjust page size if needed + if (size+vpage_offset > PAGE_SIZE) { + size = PAGE_SIZE - vpage_offset; } + + //prepare buffers to align + uint8_t buffer_new[PAGE_SIZE]; + memcpy(&buffer_new[vpage_offset], buf, size); + + pr61hash_t hash_old = pages_written[current_page_key].back(); + hash_new = apply_delta(hash_old, buffer, buffer_new, size, vpage_offset); + hash_uncommitited[current_page_key] = hash_new; + hash_freq[hash_new]++; + + //std::cout << PREFIX "DEBUG: before" + // << std::hex << std::uppercase << std::setw(16) << std::setfill('0') + // << " ASID: 0x" << asid + // << " PAGE: 0x" << vpage_id + // << " OFFSET: 0x" << vpage_offset + // << " SIZE: 0x" << size + // << " MULTIPAGE: " << (vpage_offset + size > PAGE_SIZE) + // << " HASH: " << hash_new + // << std::dec << std::setw(0) << std::endl; +} + +void mh_virt_mem_after_write(CPUState *env, target_ptr_t pc, target_ptr_t vaddr, size_t size, uint8_t *buf) +{ + if (!allowed || panda_in_kernel(env)) return; - // Calculate delta and update structures - uint64_t hash = pages_written[current_page_key].back(); - uint64_t delta = apply_delta(hash, buffer, buf, page_offset, size); - pages_written[current_page_key].push_back(delta); - hash_freq[hash]++; + asid_t asid = panda_current_asid(env); + physical_t vpage_id = (vaddr & ~(0xFFF)); + physical_t vpage_offset = (vaddr & (0xFFF)); + uint8_t buffer[PAGE_SIZE]; + + if (panda_virtual_memory_read(env, vpage_id, buffer, PAGE_SIZE) == -1) return; + + page_key_t current_page_key = std::make_tuple(asid, vpage_id); + //uint64_t hash_a = full_poly_hash(buffer); + pr61hash_t hash_new = hash_uncommitited[current_page_key]; + //std::cout << PREFIX "after PAGE unchanged check: " << (hash_a != hash_b) + // << std::hex << std::uppercase << std::setw(16) << std::setfill('0') + // << " HASH_NEW: " << hash_a + // << " HASH_OLD: " << hash_b + // << std::dec << std::setw(0) << std::endl; + + //commit hash + pages_written[current_page_key].push_back(hash_new); + + //std::cout << PREFIX << "DEBUG: after" + // << std::hex << std::uppercase << std::setw(16) << std::setfill('0') + // << " ASID: 0x" << asid + // << " PAGE: 0x" << vpage_id + // << " OFFSET: 0x" << vpage_offset + // << " SIZE: 0x" << size + // << " MULTIPAGE: " << (vpage_offset + size > PAGE_SIZE) + // << " COMMITTED: " << hash_b + // << std::dec << std::setw(0) << std::endl; } + bool mh_process_change(CPUState* env, target_ulong oldval, target_ulong newval) { allowed = mh_check_allowlist(env); return false; } + void init_memhash(CPUState* env) { std::cout << "initializing memhash" << std::endl; @@ -216,6 +249,7 @@ void init_memhash(CPUState* env) s_memhash_initialized = true; } + bool init_plugin(void* self) { panda_cb pcb; @@ -238,6 +272,9 @@ bool init_plugin(void* self) output_fp = fopen(output_file, "w"); panda_free_args(memhash_args); + // Test Hash lib + //test_pr61(); + // enable memory callbacks, turned off by defualt panda_enable_memcb(); @@ -248,8 +285,11 @@ bool init_plugin(void* self) pcb.virt_mem_before_write = mh_virt_mem_before_write; panda_register_callback(self, PANDA_CB_VIRT_MEM_BEFORE_WRITE, pcb); - pcb.phys_mem_before_write = mh_phys_mem_before_write; - panda_register_callback(self, PANDA_CB_PHYS_MEM_BEFORE_WRITE, pcb); + pcb.virt_mem_after_write = mh_virt_mem_after_write; + panda_register_callback(self, PANDA_CB_VIRT_MEM_AFTER_WRITE, pcb); + + //pcb.phys_mem_before_write = mh_phys_mem_before_write; + //panda_register_callback(self, PANDA_CB_PHYS_MEM_BEFORE_WRITE, pcb); // Track process changes to optimize checks for target threads pcb.asid_changed = mh_process_change; @@ -259,6 +299,7 @@ bool init_plugin(void* self) return true; } + void uninit_plugin(void* self) { std::cout << PREFIX "unloading..." << std::endl; diff --git a/plugins/memory_hash/pr61hash.cpp b/plugins/memory_hash/pr61hash.cpp index bc38c50..4ff08ec 100644 --- a/plugins/memory_hash/pr61hash.cpp +++ b/plugins/memory_hash/pr61hash.cpp @@ -5,9 +5,12 @@ #include "pr61hash.h" +#define PREFIX "[pr61_hash]" + // ### Polynomial Rolling 61-bit Hash -- unofficially calling pr61 constexpr std::array poly_hash_powers() +// Pre-compute powers { std::array _powers = {1,}; for (size_t i = 1; i < PAGE_SIZE; i++) { @@ -19,41 +22,42 @@ constexpr std::array poly_hash_powers() static std::array powers = poly_hash_powers(); inline uint64_t mulmod(uint64_t a, uint64_t b) +// Multiplication under Modulus { return (uint64_t)(((__uint128_t)a*b) % MOD); } uint64_t full_poly_hash(uint8_t* page) +// Full page hash compute { - uint64_t h = 0; + uint64_t hash = 0; for (size_t i = 0; i < PAGE_SIZE; i++) { //std::cout << powers[i] << std::endl; - h = (h + mulmod(page[i], powers[i])) % MOD; + hash = (hash + mulmod(page[i], powers[i])) % MOD; } - return h; + return hash; } -uint64_t delta_poly_hash(uint64_t h, size_t i, uint8_t oldv, uint8_t newv) +uint64_t delta_poly_hash(uint64_t hash, size_t idx, uint8_t oldv, uint8_t newv) +// Compute a single value change in page to apply to old hash { - if (i >= PAGE_SIZE) { - std::cerr << "ERROR index issue" << std::endl; - return h; + if (idx >= PAGE_SIZE) { + std::cerr << PREFIX "ERROR index issue" << std::endl; + return hash; } - uint64_t new_ = mulmod(newv, powers[i]); - uint64_t old_ = mulmod(oldv, powers[i]); - return (h + MOD + new_ - old_) % MOD; + uint64_t newd = mulmod(newv, powers[idx]); + uint64_t oldd = mulmod(oldv, powers[idx]); + return (hash + newd - oldd + MOD) % MOD; } -uint64_t apply_delta(uint64_t h, uint8_t *old_buffer, uint8_t *new_buffer, size_t size, size_t offset) +uint64_t apply_delta(uint64_t hash, uint8_t *old_buffer, uint8_t *new_buffer, size_t size, size_t offset) +// Compute all changes and apply to current hash. NOTE: Buffers expected to be PAGE_SIZE { - uint64_t curr_h = h; - for (size_t i = 0; i < size; i++) { - curr_h = delta_poly_hash(curr_h, offset+i, old_buffer[i], new_buffer[i]); + uint64_t curr_h = hash; + for (size_t i = offset; i < offset+size; i++) { + curr_h = delta_poly_hash(curr_h, i, old_buffer[i], new_buffer[i]); } return curr_h; } - - - diff --git a/plugins/memory_hash/pr61hash.h b/plugins/memory_hash/pr61hash.h index 04df579..1eccfa9 100644 --- a/plugins/memory_hash/pr61hash.h +++ b/plugins/memory_hash/pr61hash.h @@ -1,7 +1,7 @@ #ifndef POLY_HASH_H #define POLY_HASH_H -constexpr size_t PAGE_SIZE = 4096; +constexpr size_t PAGE_SIZE = 4096; // x86_64 - 4KiB is default constexpr uint64_t MOD = (1ULL << 61) - 1; constexpr uint64_t P = 257; diff --git a/plugins/memory_hash/pr61hash_test.cpp b/plugins/memory_hash/pr61hash_test.cpp index e5e4c3a..7d012f2 100644 --- a/plugins/memory_hash/pr61hash_test.cpp +++ b/plugins/memory_hash/pr61hash_test.cpp @@ -1,11 +1,14 @@ #include #include +#include +#include -#include "pr61hash.cpp" +#include "pr61hash.h" const uint64_t PAGE_HASH = 0x1b1d8d0b72aebff6; -void test1() + +void test_fw_bw_sweep() { uint8_t page1[PAGE_SIZE] = {}; uint8_t page2[PAGE_SIZE] = {}; @@ -17,39 +20,60 @@ void test1() for (size_t i = 0; i < PAGE_SIZE; i++) { page1[i] = i % 256; page1_hash = delta_poly_hash(page1_hash, i, 0, i%256); - std::cout << "page1 - " << i << " - 0x"<< std::setw(16) << std::setfill('0') << page1_hash << std::endl; + //std::cout << "page1 - " << i << " - 0x"<< std::setw(16) << std::setfill('0') << page1_hash << std::endl; } - std::cout << "full - xxx - 0x" << std::setw(16) << std::setfill('0') << full_poly_hash(page1) << std::endl; + std::cout << "full - ___ - 0x" << std::setw(16) << std::setfill('0') << full_poly_hash(page1) << std::endl; assert(page1_hash == PAGE_HASH); for (int i = PAGE_SIZE-1; i >= 0; i--) { page2[i] = i % 256; page2_hash = delta_poly_hash(page2_hash, i, 0, i%256); - std::cout << "page2 - " << i << " - 0x"<< std::setw(16) << std::setfill('0') << page2_hash << std::endl; + //std::cout << "page2 - " << i << " - 0x"<< std::setw(16) << std::setfill('0') << page2_hash << std::endl; } - std::cout << "full - xxx - 0x" << std::setw(16) << std::setfill('0') << full_poly_hash(page1) << std::endl; + std::cout << "full - ___ - 0x" << std::setw(16) << std::setfill('0') << full_poly_hash(page1) << std::endl; assert(full_poly_hash(page2) == full_poly_hash(page1)); assert(page2_hash == PAGE_HASH); std::cout << std::dec; + std::cout << "test_fw_bw_sweep Success!" << std::endl; } -void test2() -{ - uint8_t page1[PAGE_SIZE] = {}; - uint64_t page1_hash = full_poly_hash(page1); - uint64_t page1_full = full_poly_hash(page1); - - std::cout << std::hex; - for (size_t i = 0; i < PAGE_SIZE; i++) { - page1[i] = i % 256; - page1_hash = delta_poly_hash(page1_hash, i, 0, i%256); - page1_full = full_poly_hash(page1); - if (i < 10 || i > 4086) { - std::cout << "page1 - " << i - << " - 0x" << std::setw(16) << std::setfill('0') << page1_hash - << " - 0x" << std::setw(16) << std::setfill('0') << page1_full - << std::endl; - } - } + +void test_apply_delta() { + uint8_t page[PAGE_SIZE] = {}; + uint64_t hash = 0; // Starting hash for empty page + uint64_t hash_new; + + const size_t sized = 8; + uint8_t delta[sized] = {'A', 'B', 'C', 'D', 0x32, 0xFF, 0x20, 0x90}; + uint8_t buffer1[PAGE_SIZE] = {}; + uint8_t buffer2[PAGE_SIZE] = {}; - std::cout << "full - xxx - 0x" + //Test 1 + uint16_t offset = 1024; + memcpy(&buffer1[offset], delta, sized); + hash_new = apply_delta(hash, page, buffer1, sized, offset); + memcpy(&page[offset], delta, sized); + hash = full_poly_hash(page); + std::cout << "delta: " << std::hex << std::setw(16) << std::setfill('0') << hash_new<< std::endl; + std::cout << "full: " << std::hex << std::setw(16) << std::setfill('0') << hash << std::endl; + assert(hash == hash_new); + + //Test 2 + offset = 2048; + memcpy(&buffer2[offset], delta, sized); + hash_new = apply_delta(hash, page, buffer2, sized, offset); + memcpy(&page[offset], delta, sized); + hash = full_poly_hash(page); + std::cout << "delta: " << std::hex << std::setw(16) << std::setfill('0') << hash_new<< std::endl; + std::cout << "full: " << std::hex << std::setw(16) << std::setfill('0') << hash << std::endl; + assert(hash == hash_new); + + std::cout << "test_apply_delta Success!" << std::endl; +} + +// Test Suite +void test_pr61() +{ + test_apply_delta(); + test_fw_bw_sweep(); +} diff --git a/plugins/memory_hash/pr61hash_test.h b/plugins/memory_hash/pr61hash_test.h new file mode 100644 index 0000000..b6a5e42 --- /dev/null +++ b/plugins/memory_hash/pr61hash_test.h @@ -0,0 +1,5 @@ +#ifndef POLY_HASH_TEST_H +#define POLY_HASH_TEST_H + +void test_pr61(); +#endif From ca38e83158228e93a1fd5fca32cd363a0b5fe1a8 Mon Sep 17 00:00:00 2001 From: Drake Marin Petersen Date: Tue, 3 Feb 2026 09:33:13 -0500 Subject: [PATCH 6/6] fix: cleaned up version --- plugins/memory_hash/memory_hash.cpp | 31 +++++++++++++++-------------- plugins/memory_hash/pr61hash.cpp | 4 ++-- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/plugins/memory_hash/memory_hash.cpp b/plugins/memory_hash/memory_hash.cpp index 8a8ed76..7ceb86b 100644 --- a/plugins/memory_hash/memory_hash.cpp +++ b/plugins/memory_hash/memory_hash.cpp @@ -162,7 +162,7 @@ void mh_virt_mem_before_write(CPUState *env, target_ptr_t pc, target_ptr_t vaddr // << " HASH_OLD: " << hash_prev // << std::dec << std::setw(0) << std::endl; if (hash_curr != hash_prev) { - std::cerr << PREFIX "WARNING: page update went undetected, syncing..." << std::endl; + //std::cerr << PREFIX "WARNING: page update went undetected, syncing..." << std::endl; pages_written[current_page_key].push_back(hash_curr); hash_freq[hash_curr]++; } @@ -173,12 +173,8 @@ void mh_virt_mem_before_write(CPUState *env, target_ptr_t pc, target_ptr_t vaddr size = PAGE_SIZE - vpage_offset; } - //prepare buffers to align - uint8_t buffer_new[PAGE_SIZE]; - memcpy(&buffer_new[vpage_offset], buf, size); - pr61hash_t hash_old = pages_written[current_page_key].back(); - hash_new = apply_delta(hash_old, buffer, buffer_new, size, vpage_offset); + hash_new = apply_delta(hash_old, &buffer[vpage_offset], buf, size, vpage_offset); hash_uncommitited[current_page_key] = hash_new; hash_freq[hash_new]++; @@ -199,22 +195,28 @@ void mh_virt_mem_after_write(CPUState *env, target_ptr_t pc, target_ptr_t vaddr, asid_t asid = panda_current_asid(env); physical_t vpage_id = (vaddr & ~(0xFFF)); - physical_t vpage_offset = (vaddr & (0xFFF)); + //physical_t vpage_offset = (vaddr & (0xFFF)); uint8_t buffer[PAGE_SIZE]; if (panda_virtual_memory_read(env, vpage_id, buffer, PAGE_SIZE) == -1) return; page_key_t current_page_key = std::make_tuple(asid, vpage_id); - //uint64_t hash_a = full_poly_hash(buffer); pr61hash_t hash_new = hash_uncommitited[current_page_key]; - //std::cout << PREFIX "after PAGE unchanged check: " << (hash_a != hash_b) + + pr61hash_t hash_prev = pages_written[current_page_key].back(); + if (hash_prev == hash_new) return; + + //pr61hash_t hash_a = full_poly_hash(buffer); + //std::cout << PREFIX "after PAGE unchanged check: " << (hash_a != hash_new) // << std::hex << std::uppercase << std::setw(16) << std::setfill('0') // << " HASH_NEW: " << hash_a - // << " HASH_OLD: " << hash_b + // << " HASH_OLD: " << hash_new // << std::dec << std::setw(0) << std::endl; + //assert(hash_a == hash_new); //commit hash pages_written[current_page_key].push_back(hash_new); + phys_write_count++; //std::cout << PREFIX << "DEBUG: after" // << std::hex << std::uppercase << std::setw(16) << std::setfill('0') @@ -223,7 +225,7 @@ void mh_virt_mem_after_write(CPUState *env, target_ptr_t pc, target_ptr_t vaddr, // << " OFFSET: 0x" << vpage_offset // << " SIZE: 0x" << size // << " MULTIPAGE: " << (vpage_offset + size > PAGE_SIZE) - // << " COMMITTED: " << hash_b + // << " COMMITTED: " << hash_new // << std::dec << std::setw(0) << std::endl; } @@ -288,9 +290,6 @@ bool init_plugin(void* self) pcb.virt_mem_after_write = mh_virt_mem_after_write; panda_register_callback(self, PANDA_CB_VIRT_MEM_AFTER_WRITE, pcb); - //pcb.phys_mem_before_write = mh_phys_mem_before_write; - //panda_register_callback(self, PANDA_CB_PHYS_MEM_BEFORE_WRITE, pcb); - // Track process changes to optimize checks for target threads pcb.asid_changed = mh_process_change; panda_register_callback(self, PANDA_CB_ASID_CHANGED, pcb); @@ -307,11 +306,13 @@ void uninit_plugin(void* self) std::cout << PREFIX "unique pages pritten to: " << pages_written.size() << std::endl; std::cout << PREFIX "unique hashes: " << hash_freq.size() << std::endl; + std::cout << std::hex; for (const auto& kv : hash_freq) { - if (kv.second > 2) { + if (kv.second > 100) { std::cout << kv.first << " - " << kv.second << std::endl; } } + std::cout << std::dec; std::cout << PREFIX "writing json output..." << std::endl; write_json(); diff --git a/plugins/memory_hash/pr61hash.cpp b/plugins/memory_hash/pr61hash.cpp index 4ff08ec..57d87cd 100644 --- a/plugins/memory_hash/pr61hash.cpp +++ b/plugins/memory_hash/pr61hash.cpp @@ -55,8 +55,8 @@ uint64_t apply_delta(uint64_t hash, uint8_t *old_buffer, uint8_t *new_buffer, si // Compute all changes and apply to current hash. NOTE: Buffers expected to be PAGE_SIZE { uint64_t curr_h = hash; - for (size_t i = offset; i < offset+size; i++) { - curr_h = delta_poly_hash(curr_h, i, old_buffer[i], new_buffer[i]); + for (size_t i = 0; i < size; i++) { + curr_h = delta_poly_hash(curr_h, offset+i, old_buffer[i], new_buffer[i]); } return curr_h; }