From bf30146e741c2355333b304566cb9d0d85186a31 Mon Sep 17 00:00:00 2001 From: Theodore Robert Campbell Jr Date: Sun, 19 Apr 2026 17:25:24 -0400 Subject: [PATCH 1/2] working haltec v3 w/ screen --- CHANGELOG.md | 5 ++++ README.md | 2 ++ devtool.toml | 9 ++++++++ include/board_config.h | 31 +++++++++++++++++++++++++ platformio.ini | 44 ++++++++++++++++++++++++++++++++++++ src/display/display_oled.cpp | 23 +++++++++++++++++-- src/main.cpp | 12 +++++++++- 7 files changed, 123 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 435046f..8ac1b0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## [Unreleased] + +### Added +- **Heltec WiFi LoRa 32 V3 support** - ESP32-S3 with 128x64 OLED, LoRa SX1262 + # Changelog All notable changes to SparkMiner will be documented in this file. diff --git a/README.md b/README.md index ec1e48b..d0d78b2 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ Find your board below and download the matching firmware from [Releases](https:/ | **Wemos/Lolin S3 Mini** | `esp32-s3-mini_firmware.bin` | RGB LED status indicator | | **WeAct S3 Mini** | `esp32-s3-mini_firmware.bin` | Compatible with Lolin | | **ESP32-S3 + SSD1306 OLED** | `esp32-s3-oled_firmware.bin` | 128x64 I2C OLED | +| **Heltec WiFi LoRa 32 V3** | `heltec-wifi-lora32-v3_firmware.bin` | ESP32-S3, 128x64 OLED, LoRa SX1262 | ### ESP32-C3 Boards @@ -165,6 +166,7 @@ Find your board below and download the matching firmware from [Releases](https:/ | ESP32-S3/C3 + OLED | ✅ Full | 128x64 SSD1306 I2C | | ESP32-S3/C3 Mini | ✅ Full | RGB LED status | | ESP32 Headless | ✅ Full | GPIO LED status indicator | +| Heltec WiFi LoRa 32 V3 | ✅ Full | ESP32-S3, 128x64 OLED, LoRa SX1262 | | LILYGO T-Display S3 | ❌ None | Not yet supported | | LILYGO T-Display V1 | ❌ None | Not yet supported | | ESP32-S2 boards | ❌ None | Single-core not supported | diff --git a/devtool.toml b/devtool.toml index f9e3955..3e419c8 100644 --- a/devtool.toml +++ b/devtool.toml @@ -148,6 +148,15 @@ needs_boot_mode = true port_changes_on_reset = true group = "Headless (No Display)" +[boards.heltec-wifi-lora32-v3] +name = "Heltec WiFi LoRa 32 V3" +env = "heltec-wifi-lora32-v3" +chip = "esp32s3" +description = "Heltec WiFi LoRa 32 V3: ESP32-S3, 128x64 OLED (I2C), LoRa SX1262, no PSRAM (~280 H/s)" +needs_boot_mode = true +port_changes_on_reset = true +group = "OLED Display" + # ============================================================ # Release Settings # ============================================================ diff --git a/include/board_config.h b/include/board_config.h index 6ed8572..1be1f77 100644 --- a/include/board_config.h +++ b/include/board_config.h @@ -353,6 +353,37 @@ // SHA Implementation: Defined in platformio.ini (USE_HARDWARE_SHA=1) +// ============================================================ +// Heltec WiFi LoRa 32 V3 - ESP32-S3, 128x64 OLED (I2C), LoRa SX1262 +// ============================================================ +#elif defined(HELTEC_V3) + #define BOARD_NAME "Heltec WiFi LoRa 32 V3" + + // Use OLED display (not TFT) + #define USE_DISPLAY 0 + #define USE_OLED_DISPLAY 1 + + // OLED configuration (128x64 SSD1306 I2C) + #define OLED_WIDTH 128 + #define OLED_HEIGHT 64 + #define OLED_SDA_PIN 17 + #define OLED_SCL_PIN 18 + #define OLED_I2C_ADDR 0x3C + #define OLED_RST_PIN 21 + + // Display power enable (Vext) + #define VEXT_PIN 36 + + // Onboard LED + #define LED_PIN 35 + + // Buttons + #define BUTTON_PIN 0 // BOOT + #define USER_BUTTON_PIN 14 + #define BUTTON_ACTIVE_LOW 1 + + // SHA Implementation: Defined in platformio.ini (USE_HARDWARE_SHA=1) + // ============================================================ // Default - Generic ESP32 // ============================================================ diff --git a/platformio.ini b/platformio.ini index 4acc529..e4560c5 100644 --- a/platformio.ini +++ b/platformio.ini @@ -681,6 +681,50 @@ lib_ignore = SD_MMC FastLED +; ============================================================ +; Heltec WiFi LoRa 32 V3 - ESP32-S3, 128x64 OLED (I2C), LoRa SX1262 +; Compatible with SparkMiner (SSD1306, U8g2, no PSRAM) +; ============================================================ +[env:heltec-wifi-lora32-v3] +board = heltec_wifi_lora_32_V3 +board_build.partitions = default_8MB.csv +board_build.mcu = esp32s3 +board_build.f_cpu = 240000000L +upload_speed = 921600 + +build_unflags = -Os + +build_flags = + -D AUTO_VERSION=\"v2.9.5\" + -D HELTEC_V3=1 + -D USE_HARDWARE_SHA=1 + -D USE_DISPLAY=0 + -D USE_OLED_DISPLAY=1 + -D OLED_WIDTH=128 + -D OLED_HEIGHT=64 + -D OLED_SDA_PIN=17 + -D OLED_SCL_PIN=18 + -D OLED_I2C_ADDR=0x3C + -D OLED_RST_PIN=21 + -D VEXT_PIN=36 + -D LED_PIN=35 + -D BUTTON_PIN=0 + -D USER_BUTTON_PIN=14 + -O3 + -funroll-loops + ;-D DEBUG_MINING=1 + +lib_deps = + ${env.lib_deps} + olikraus/U8g2@^2.35.9 + +lib_ignore = + TFT_eSPI + OpenFontRender + SD + SD_MMC + FastLED + ; ============================================================ ; ESP32-S3 Headless - No display, serial only ; Dual-core with monochrome display and LoRa diff --git a/src/display/display_oled.cpp b/src/display/display_oled.cpp index 0285ef7..f98399c 100644 --- a/src/display/display_oled.cpp +++ b/src/display/display_oled.cpp @@ -216,10 +216,25 @@ static void drawStatsScreen(const display_data_t *data) { void oled_display_init(uint8_t rotation, uint8_t brightness) { Serial.printf("[OLED] Initializing %dx%d display\n", OLED_WIDTH, OLED_HEIGHT); - // Initialize I2C with custom pins + +#ifdef HELTEC_V3 + Serial.println("HELTEC: Enabling Vext (GPIO36 LOW)"); + pinMode(VEXT_PIN, OUTPUT); + digitalWrite(VEXT_PIN, LOW); // Vext ON (active low) + delay(50); + + Serial.println("HELTEC: Resetting OLED (GPIO21)"); + pinMode(OLED_RST_PIN, OUTPUT); + digitalWrite(OLED_RST_PIN, LOW); + delay(20); + digitalWrite(OLED_RST_PIN, HIGH); + delay(20); + + Serial.println("HELTEC: Starting I2C (17,18)"); +#endif Wire.begin(OLED_SDA_PIN, OLED_SCL_PIN); - // Initialize U8g2 + Serial.println("HELTEC: Initializing SSD1306 @ 0x3C"); s_u8g2.begin(); // Set rotation @@ -237,6 +252,10 @@ void oled_display_init(uint8_t rotation, uint8_t brightness) { // Show boot screen oled_display_show_boot(); + Serial.printf("[HeltecV3] LED on GPIO %d\n", LED_PIN); + pinMode(LED_PIN, OUTPUT); + digitalWrite(LED_PIN, LOW); // Off by default + Serial.println("[OLED] Display initialized"); } diff --git a/src/main.cpp b/src/main.cpp index bac037c..ddea3af 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -48,6 +48,9 @@ volatile bool systemReady = false; // Button handling (OneButton) #if defined(BUTTON_PIN) && (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) OneButton button(BUTTON_PIN, true, true); // active low, enable pullup +#ifdef USER_BUTTON_PIN +OneButton userButton(USER_BUTTON_PIN, true, true); +#endif // Single click: wake screen if off, otherwise cycle screens void onButtonClick() { @@ -59,6 +62,13 @@ void onButtonClick() { display_next_screen(); } +#ifdef HELTEC_V3 +void log_heltec_pins() { + Serial.printf("[HeltecV3] OLED SDA=%d SCL=%d RST=%d VEXT=%d\n", OLED_SDA_PIN, OLED_SCL_PIN, OLED_RST_PIN, VEXT_PIN); + Serial.printf("[HeltecV3] LED=%d BOOT=%d USER=%d\n", LED_PIN, BUTTON_PIN, USER_BUTTON_PIN); +} +#endif + // Double click: cycle screen rotation (0->1->2->3->0) void onButtonDoubleClick() { monitor_reset_activity(); @@ -141,7 +151,7 @@ void onButtonLongPressStart() { #if defined(BUTTON_PIN) && (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) /** - * Dedicated button handling task (with display) + * Dedicated button handling task (wit display) * Runs at higher priority than mining to ensure responsive UI */ void button_task(void *param) { From 165bd169f9a1db0f9327964853c55167bb2ce73e Mon Sep 17 00:00:00 2001 From: Theodore Robert Campbell Jr Date: Tue, 12 May 2026 18:50:00 -0400 Subject: [PATCH 2/2] Improve mining performance and display behavior --- devtool.py | 38 +- devtool.toml | 24 +- include/board_config.h | 33 +- platformio.ini | 93 ++- scripts/config_miner.py | 137 ++++ scripts/gen_build_info.py | 92 +++ scripts/post_build_merge.py | 13 + src/config/nvs_config.cpp | 12 + src/display/display.h | 1 + src/display/display_eink.cpp | 8 +- src/display/display_oled.cpp | 25 +- src/logging.cpp | 98 +++ src/logging.h | 15 + src/main.cpp | 228 ++++-- src/mining/baseline_benchmark.cpp | 420 ++++++++++ src/mining/baseline_benchmark.h | 29 + src/mining/miner.cpp | 1058 +++++++++++++++++++++---- src/mining/miner.h | 24 +- src/mining/miner_sha256.cpp | 657 ++++++++------- src/mining/miner_sha256.h | 71 ++ src/mining/sha256_ll.cpp | 12 +- src/mining/sha256_pipelined_s3.cpp | 4 +- src/mining/sha256_pipelined_s3_v2.cpp | 4 +- src/mining/sha256_pipelined_s3_v3.cpp | 17 +- src/mining/sha256_s3.cpp | 675 +++++++++++++++- src/mining/sha256_s3.h | 47 ++ src/stats/live_stats.cpp | 10 +- src/stats/monitor.cpp | 150 +++- src/stratum/stratum.cpp | 80 +- 29 files changed, 3448 insertions(+), 627 deletions(-) create mode 100644 scripts/config_miner.py create mode 100644 scripts/gen_build_info.py create mode 100644 src/logging.cpp create mode 100644 src/logging.h create mode 100644 src/mining/baseline_benchmark.cpp create mode 100644 src/mining/baseline_benchmark.h diff --git a/devtool.py b/devtool.py index fb8befa..f5e4edc 100644 --- a/devtool.py +++ b/devtool.py @@ -581,6 +581,16 @@ def build(self, board: BoardConfig = None, verbose: bool = True) -> bool: def get_firmware_path(self, board: BoardConfig) -> Optional[Path]: """Find firmware file for a board""" + is_xiao_s3 = board.key.startswith("seeed-xiao-esp32s3") or board.env.startswith("seeed-xiao-esp32s3") + + # Guard: never pick *_factory.bin for XIAO S3 unless merge layout is explicitly verified. + # XIAO should use standard PlatformIO upload path (bootloader/partitions/app offsets). + if is_xiao_s3: + build_fw = self.script_dir / ".pio" / "build" / board.env / "firmware.bin" + if build_fw.exists(): + return build_fw + return None + # Check release firmware first (try friendly key name, then env name) version = self.get_version() fw_dir = self.get_firmware_dir() / version @@ -623,6 +633,8 @@ def flash(self, board: BoardConfig = None, port: str = None, print(f"{c('[ERROR]', Colors.RED)} No board selected!") return False + is_xiao_s3 = board.key.startswith("seeed-xiao-esp32s3") or board.env.startswith("seeed-xiao-esp32s3") + firmware = self.get_firmware_path(board) if not firmware: print(f"{c('[ERROR]', Colors.RED)} No firmware found! Build first.") @@ -647,6 +659,25 @@ def flash(self, board: BoardConfig = None, port: str = None, self.print_bootloader_instructions() input(f"\n{c('[?]', Colors.CYAN)} Press ENTER when in download mode...") + if is_xiao_s3: + print(f"{c('[INFO]', Colors.YELLOW)} XIAO S3 guard active: skipping factory-bin flashing path") + cmd = self.get_pio_cmd() + [ + "run", + "-e", board.env, + "-t", "upload", + "--upload-port", port, + ] + + result = self.run_cmd(cmd) + + if result and result.returncode == 0: + print(f"\n{c('[SUCCESS]', Colors.GREEN)} Upload completed via PlatformIO!") + self.current_port = port + return True + else: + print(f"\n{c('[ERROR]', Colors.RED)} Upload failed!") + return False + flash_addr = "0x0" if "factory" in firmware.name else "0x10000" cmd = [ @@ -701,7 +732,12 @@ def monitor(self, board: BoardConfig = None, port: str = None, print(f" Exit: Ctrl+C") print(f"{c('=' * 60, Colors.CYAN)}\n") - cmd = self.get_pio_cmd() + ["device", "monitor", "-b", str(board.monitor_baud), "-p", port] + cmd = self.get_pio_cmd() + [ + "device", "monitor", + "-e", board.env, + "-b", str(board.monitor_baud), + "-p", port + ] # Add filters for f in self.config.monitor_filters: diff --git a/devtool.toml b/devtool.toml index 3e419c8..c1e1699 100644 --- a/devtool.toml +++ b/devtool.toml @@ -157,6 +157,28 @@ needs_boot_mode = true port_changes_on_reset = true group = "OLED Display" +[boards.seeed-xiao-esp32s3] +name = "Seeed XIAO ESP32-S3" +env = "seeed-xiao-esp32s3" +chip = "esp32s3" +description = "Compact ESP32-S3 headless miner (22x26.5mm, USB-C, 16MB flash, no PSRAM)" +needs_boot_mode = true +port_changes_on_reset = true +flash_mode = "dio" +flash_freq = "80m" +group = "Headless (No Display)" + +[boards.seeed-xiao-esp32s3-safe] +name = "Seeed XIAO ESP32-S3 (Safe Upload)" +env = "seeed-xiao-esp32s3-safe" +chip = "esp32s3" +description = "Safe recovery path: always upload via PlatformIO offset layout, no factory-bin flash" +needs_boot_mode = true +port_changes_on_reset = true +flash_mode = "dio" +flash_freq = "40m" +group = "Headless (No Display)" + # ============================================================ # Release Settings # ============================================================ @@ -178,7 +200,7 @@ firmware_suffix = "_firmware.bin" [monitor] # Filters for platformio device monitor -filters = ["colorize", "esp32_exception_decoder", "time"] +filters = ["esp32_exception_decoder", "time"] # Auto-reconnect on disconnect auto_reconnect = true # Log to file (empty = disabled) diff --git a/include/board_config.h b/include/board_config.h index 1be1f77..a14208b 100644 --- a/include/board_config.h +++ b/include/board_config.h @@ -360,8 +360,12 @@ #define BOARD_NAME "Heltec WiFi LoRa 32 V3" // Use OLED display (not TFT) - #define USE_DISPLAY 0 - #define USE_OLED_DISPLAY 1 + #ifndef USE_DISPLAY + #define USE_DISPLAY 0 + #endif + #ifndef USE_OLED_DISPLAY + #define USE_OLED_DISPLAY 1 + #endif // OLED configuration (128x64 SSD1306 I2C) #define OLED_WIDTH 128 @@ -384,6 +388,31 @@ // SHA Implementation: Defined in platformio.ini (USE_HARDWARE_SHA=1) +// ============================================================ +// Seeed XIAO ESP32-S3 - Compact dual-core headless miner +// Ultra-compact form factor (22x26.5mm), USB-C, 16MB flash +// ============================================================ +#elif defined(SEEED_XIAO_ESP32S3) + #define BOARD_NAME "Seeed XIAO ESP32-S3" + + #ifndef USE_DISPLAY + #define USE_DISPLAY 0 + #endif + #ifndef USE_OLED_DISPLAY + #define USE_OLED_DISPLAY 0 + #endif + #ifndef USE_EINK_DISPLAY + #define USE_EINK_DISPLAY 0 + #endif + + // No built-in LED (XIAO boards are minimal) + #define USE_LED_STATUS 0 + + // NO BUTTON on XIAO - GPIO0 is BOOT button, cannot be used for app purposes + #define BUTTON_PIN -1 + + // SHA Implementation: Defined in platformio.ini (USE_HARDWARE_SHA=1) + // ============================================================ // Default - Generic ESP32 // ============================================================ diff --git a/platformio.ini b/platformio.ini index e4560c5..9ae6136 100644 --- a/platformio.ini +++ b/platformio.ini @@ -1,12 +1,12 @@ ; SparkMiner - Best of BitsyMiner + NerdMiner ; ESP32 Bitcoin Solo Miner with optimized performance ; -; Build: pio run -e esp32-2432s028 -; Upload: pio run -e esp32-2432s028 -t upload +; Build: pio run -e heltec-wifi-lora32-v3 +; Upload: pio run -e heltec-wifi-lora32-v3 -t upload ; Monitor: pio device monitor [platformio] -default_envs = esp32-2432s028 +default_envs = heltec-wifi-lora32-v3 [env] platform = espressif32@6.6.0 @@ -20,6 +20,7 @@ monitor_filters = log2file extra_scripts = + pre:scripts/gen_build_info.py post:scripts/post_build_merge.py lib_deps = @@ -691,6 +692,10 @@ board_build.partitions = default_8MB.csv board_build.mcu = esp32s3 board_build.f_cpu = 240000000L upload_speed = 921600 +monitor_filters = + esp32_exception_decoder + time + log2file build_unflags = -Os @@ -712,7 +717,20 @@ build_flags = -D USER_BUTTON_PIN=14 -O3 -funroll-loops - ;-D DEBUG_MINING=1 + ; -D DEBUG_MINING=1 + -D DEBUG_SHARE_VALIDATION=1 + ; -D DEBUG_HASH_TIMING=1 + ; Experimental: alternative compressor in miner_sha256_complete_from_midstate* (OFF by default) + ; -D MINER_EXPERIMENTAL_COMPRESSOR=1 + ; -D S3_DEBUG_NONCE_WINDOW_ENABLE=1 + ; -D S3_DEBUG_NONCE_WINDOW_START=0x00000000 + ; -D S3_DEBUG_NONCE_WINDOW_END=0x0000FFFF + ; Optional S3 hardware-path experiments (boot-time, deterministic vectors + microbench) + ; -D S3_HW_EXPERIMENTS_ENABLE=1 + ; -D S3_HW_EXPERIMENTS_NONCES=4096 + ; -D S3_HW_EXPERIMENTS_ENABLE=1 + ; Baseline benchmark (run once at boot to measure cycle-level performance) + -D ENABLE_BASELINE_BENCHMARK=1 lib_deps = ${env.lib_deps} @@ -725,6 +743,73 @@ lib_ignore = SD_MMC FastLED + + + +; ============================================================ +; Seeed XIAO ESP32-S3 - Compact dual-core headless miner (MINIMAL) +; Conservative settings for stability, USB-C, no PSRAM +; ============================================================ +[env:seeed-xiao-esp32s3] +board = seeed_xiao_esp32s3 +board_build.partitions = default_8MB.csv +board_build.mcu = esp32s3 +board_build.f_cpu = 240000000L +upload_speed = 921600 +; Conservative flash settings for XIAO compatibility +board_build.flash_mode = dio +board_build.flash_size = 8MB +; Framework 2.0.14 for ESP32-S3 does not ship bootloader_dio_40m.elf, +; so force DIO mode but keep supported 80m bootloader variant. +board_build.flash_freq = 80m + +build_unflags = -Os + +build_flags = + -D AUTO_VERSION=\"v2.9.5\" + -D SEEED_XIAO_ESP32S3=1 + -D USE_HARDWARE_SHA=1 + -D CORE_0_YIELD_COUNT=1024 + ; No PSRAM on standard XIAO S3 - disable completely + ; ARDUINO_USB_MODE=1 for USB host mode, =0 for device (default for CDC) + -D ARDUINO_USB_MODE=0 + -D ARDUINO_USB_CDC_ON_BOOT=1 + -D USE_DISPLAY=0 + -D USE_OLED_DISPLAY=0 + -D USE_EINK_DISPLAY=0 + ; Disable non-essential features for boot stability + -D BUTTON_PIN=-1 + ; Enable boot diagnostics + -D BOOT_DEBUG=1 + ; Optimization - but not as aggressive as Heltec (was -O3) + -O3 + -funroll-loops + ; -D DEBUG_MINING=1 + ; -D DEBUG_HASH_TIMING=1 + ; Enable comprehensive hash evaluation logging for v2.9.5 regression diagnosis + ; -D DEBUG_HASH_EVAL=1 + ; -D DEBUG_HASH_EVAL_SAMPLE_RATE=10000 + ; Optional S3 hardware-path experiments (boot-time, deterministic vectors + microbench) + ; -D S3_HW_EXPERIMENTS_ENABLE=1 + ; -D S3_HW_EXPERIMENTS_NONCES=4096 + +lib_deps = + ${env.lib_deps} + +lib_ignore = + TFT_eSPI + OpenFontRender + SD + SD_MMC + FastLED + +; ============================================================ +; Seeed XIAO ESP32-S3 SAFE alias +; Use this env for explicit recovery uploads/debug sessions +; ============================================================ +[env:seeed-xiao-esp32s3-safe] +extends = env:seeed-xiao-esp32s3 + ; ============================================================ ; ESP32-S3 Headless - No display, serial only ; Dual-core with monochrome display and LoRa diff --git a/scripts/config_miner.py b/scripts/config_miner.py new file mode 100644 index 0000000..bd8aa46 --- /dev/null +++ b/scripts/config_miner.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 + +# SPDX-License-Identifier: GPL-3.0-only +# Copyright Theodore Robert Campbell Jr + +import argparse +import sys +from typing import Any + +import requests + + +def str_to_bool_flag(value: str) -> int: + v = value.strip().lower() + if v in {"1", "true", "yes", "on"}: + return 1 + if v in {"0", "false", "no", "off"}: + return 0 + raise argparse.ArgumentTypeError(f"invalid boolean flag: {value}") + + +def build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser( + description="Configure a SparkMiner device via its AP portal" + ) + + p.add_argument("--host", default="192.168.4.1", help="SparkMiner AP host or IP") + p.add_argument("--ssid", required=True, help="Wi-Fi SSID") + p.add_argument("--wifi-password", required=True, help="Wi-Fi password") + + p.add_argument("--wallet", required=True, help="Primary wallet address") + p.add_argument("--worker", default="SparkMiner", help="Worker name") + + p.add_argument("--pool-url", default="public-pool.io", help="Primary pool host") + p.add_argument("--pool-port", type=int, default=21496, help="Primary pool port") + p.add_argument("--pool-pass", default="x", help="Primary pool password") + + p.add_argument("--backup-pool-url", default="pool.nerdminers.org", help="Backup pool host") + p.add_argument("--backup-pool-port", type=int, default=3333, help="Backup pool port") + p.add_argument("--backup-wallet", default="", help="Backup wallet address") + p.add_argument("--backup-pool-pass", default="x", help="Backup pool password") + + p.add_argument("--brightness", type=int, default=100, help="Brightness 0-100") + p.add_argument("--screen-timeout", type=int, default=0, help="Screen timeout in seconds") + p.add_argument("--diff", type=float, default=0.0014, help="Target difficulty") + p.add_argument("--rotation", type=int, default=0, choices=[0, 1, 2, 3], help="Screen rotation") + p.add_argument("--tz", type=int, default=0, help="Timezone offset or portal tz value") + + p.add_argument("--invert", type=str_to_bool_flag, default=1, help="Invert colors: 0/1") + p.add_argument("--stats-en", type=str_to_bool_flag, default=1, help="Enable stats: 0/1") + p.add_argument("--https-stats", type=str_to_bool_flag, default=0, help="Enable HTTPS stats: 0/1") + + p.add_argument("--stats-api", default="", help="Custom stats API URL") + p.add_argument("--stats-proxy", default="", help="Stats proxy URL") + + p.add_argument("--timeout", type=float, default=10.0, help="HTTP timeout in seconds") + p.add_argument( + "--insecure", + action="store_true", + help="Skip TLS verification if using HTTPS host", + ) + + return p + + +def build_form(args: argparse.Namespace) -> dict[str, Any]: + return { + "s": args.ssid, + "p": args.wifi_password, + "wallet": args.wallet, + "worker": args.worker, + "pool_url": args.pool_url, + "pool_port": str(args.pool_port), + "pool_pass": args.pool_pass, + "bk_pool_url": args.backup_pool_url, + "bk_pool_port": str(args.backup_pool_port), + "bk_wallet": args.backup_wallet, + "bk_pool_pass": args.backup_pool_pass, + "bright": str(args.brightness), + "scrn_to": str(args.screen_timeout), + "diff": f"{args.diff:.6f}", + "rotation": str(args.rotation), + "tz": str(args.tz), + "invert": str(args.invert), + "stats_en": str(args.stats_en), + "stats_api": args.stats_api, + "stats_proxy": args.stats_proxy, + "https_stats": str(args.https_stats), + } + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + + scheme = "https" if str(args.host).startswith("https://") else "http" + if args.host.startswith("http://") or args.host.startswith("https://"): + url = f"{args.host.rstrip('/')}/wifisave" + origin = args.host.rstrip("/") + referer = f"{origin}/wifi?" + else: + url = f"{scheme}://{args.host}/wifisave" + origin = f"{scheme}://{args.host}" + referer = f"{origin}/wifi?" + + form = build_form(args) + + headers = { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Content-Type": "application/x-www-form-urlencoded", + "Origin": origin, + "Referer": referer, + "User-Agent": "sparkminer-config/1.0", + } + + try: + response = requests.post( + url, + headers=headers, + data=form, + timeout=args.timeout, + verify=not args.insecure, + ) + except requests.RequestException as exc: + print(f"request failed: {exc}", file=sys.stderr) + return 1 + + print(f"status: {response.status_code}") + # print(response.text) + + if response.ok: + return 0 + return 2 + + +if __name__ == "__main__": + raise SystemExit(main()) \ No newline at end of file diff --git a/scripts/gen_build_info.py b/scripts/gen_build_info.py new file mode 100644 index 0000000..2379d72 --- /dev/null +++ b/scripts/gen_build_info.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-3.0-only +# Copyright Theodore Robert Campbell Jr + +""" +Generate build metadata header for SparkMiner. + +Emits include/build_info_auto.h with: +- git hash / describe / dirty state +- PlatformIO env + board + MCU +- build timestamp (UTC) +""" + +from datetime import datetime, timezone +from pathlib import Path +import subprocess + +Import("env") + + +def _run(cmd, cwd): + try: + result = subprocess.run( + cmd, + cwd=cwd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + if result.returncode == 0: + return result.stdout.strip() + except Exception: + pass + return "" + + +def _c_string(value): + # Minimal escaping for generated C string literals. + return value.replace("\\", "\\\\").replace('"', '\\"') + + +def generate_build_info(_source, _target, _env): + project_dir = Path(env.subst("$PROJECT_DIR")) + include_dir = project_dir / "include" + include_dir.mkdir(parents=True, exist_ok=True) + out_file = include_dir / "build_info_auto.h" + + pio_env = str(env.get("PIOENV", "unknown")) + pio_board = str(env.get("BOARD", "unknown")) + + board_cfg = env.BoardConfig() + pio_mcu = str(board_cfg.get("build.mcu", "unknown")) + pio_f_cpu = str(board_cfg.get("build.f_cpu", "unknown")) + + git_hash = _run(["git", "rev-parse", "--short", "HEAD"], project_dir) + git_hash = git_hash if git_hash else "nogit" + + git_describe = _run(["git", "describe", "--tags", "--always", "--dirty"], project_dir) + git_describe = git_describe if git_describe else "nogit" + + # Dirty state: true if any tracked/untracked changes are present. + status = _run(["git", "status", "--porcelain"], project_dir) + git_dirty = "1" if status else "0" + + # Build timestamp in UTC for cross-machine comparability. + build_utc = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + content = f"""// Auto-generated by scripts/gen_build_info.py. Do not edit. +#ifndef BUILD_INFO_AUTO_H +#define BUILD_INFO_AUTO_H + +#define BUILD_GIT_HASH \"{_c_string(git_hash)}\" +#define BUILD_GIT_DESCRIBE \"{_c_string(git_describe)}\" +#define BUILD_GIT_DIRTY {git_dirty} + +#define BUILD_PIO_ENV \"{_c_string(pio_env)}\" +#define BUILD_PIO_BOARD \"{_c_string(pio_board)}\" +#define BUILD_PIO_MCU \"{_c_string(pio_mcu)}\" +#define BUILD_PIO_F_CPU \"{_c_string(pio_f_cpu)}\" + +#define BUILD_UTC_TIMESTAMP \"{_c_string(build_utc)}\" + +#endif // BUILD_INFO_AUTO_H +""" + + out_file.write_text(content, encoding="utf-8") + print(f"[BUILD-INFO] Generated: {out_file}") + + +# Run before compile so header is always available. +generate_build_info(None, None, env) diff --git a/scripts/post_build_merge.py b/scripts/post_build_merge.py index 90deff9..c3e17b0 100644 --- a/scripts/post_build_merge.py +++ b/scripts/post_build_merge.py @@ -88,6 +88,8 @@ def get_friendly_name(env_name): 'lilygo-t-display-s3': 'lilygo-t-display-s3', 'lilygo-t-display-v1': 'lilygo-t-display-v1', 'esp32-headless-led': 'esp32-headless-led', + 'seeed-xiao-esp32s3': 'seeed-xiao-esp32s3', + 'seeed-xiao-esp32s3-safe': 'seeed-xiao-esp32s3-safe', } return friendly_names.get(env_name, env_name) @@ -133,6 +135,17 @@ def create_merged_firmware(source, target, env): print(f"Error creating firmware file: {e}") return + # Guard: XIAO S3 must use PlatformIO upload flow with explicit image offsets. + # Do not generate/refresh factory-merged images for this board until merge layout is verified. + if env_name in ("seeed-xiao-esp32s3", "seeed-xiao-esp32s3-safe"): + if factory_file.exists(): + try: + factory_file.unlink() + except Exception: + pass + print("Factory: SKIPPED for XIAO S3 (use pio run -t upload, not merged factory image)") + return + # Create factory file (merged) try: merged_size = 0x400000 # 4MB diff --git a/src/config/nvs_config.cpp b/src/config/nvs_config.cpp index b7eea07..fc1192d 100644 --- a/src/config/nvs_config.cpp +++ b/src/config/nvs_config.cpp @@ -475,6 +475,12 @@ bool nvs_config_load(miner_config_t *config) { return false; } + if (!s_prefs.isKey(NVS_KEY_CONFIG)) { + Serial.println("[NVS] No saved config found (first boot or erased)"); + s_prefs.end(); + return false; + } + size_t len = s_prefs.getBytesLength(NVS_KEY_CONFIG); if (len == 0) { Serial.println("[NVS] No saved config found (first boot or erased)"); @@ -629,6 +635,12 @@ bool nvs_stats_load(mining_persistence_t *stats) { return false; } + if (!s_prefs.isKey(NVS_KEY_STATS)) { + // Expected on first boot before any stats persistence. + s_prefs.end(); + return false; + } + size_t len = s_prefs.getBytesLength(NVS_KEY_STATS); if (len != sizeof(mining_persistence_t)) { Serial.printf("[NVS-STATS] Stats size mismatch: %d vs %d\n", len, sizeof(mining_persistence_t)); diff --git a/src/display/display.h b/src/display/display.h index 06c9652..b9d6392 100644 --- a/src/display/display.h +++ b/src/display/display.h @@ -27,6 +27,7 @@ struct display_data_s { // Mining stats uint64_t totalHashes; double hashRate; + double hashRateAvg; double bestDifficulty; uint32_t sharesAccepted; uint32_t sharesRejected; diff --git a/src/display/display_eink.cpp b/src/display/display_eink.cpp index 86f082f..5cdfe8e 100644 --- a/src/display/display_eink.cpp +++ b/src/display/display_eink.cpp @@ -106,13 +106,13 @@ static bool s_inverted = false; static String formatHashrateCompact(double hashrate) { if (hashrate >= 1e9) { - return String(hashrate / 1e9, 1) + "G"; + return String(hashrate / 1e9, 1) + " G"; } else if (hashrate >= 1e6) { - return String(hashrate / 1e6, 1) + "M"; + return String(hashrate / 1e6, 1) + " M"; } else if (hashrate >= 1e3) { - return String(hashrate / 1e3, 1) + "K"; + return String(hashrate / 1e3, 1) + " K"; } else { - return String((int)hashrate); + return String((int)hashrate) + " "; } } diff --git a/src/display/display_oled.cpp b/src/display/display_oled.cpp index f98399c..ce26739 100644 --- a/src/display/display_oled.cpp +++ b/src/display/display_oled.cpp @@ -77,11 +77,11 @@ static bool s_inverted = false; static String formatHashrateCompact(double hashrate) { if (hashrate >= 1e9) { - return String(hashrate / 1e9, 1) + "G"; + return String(hashrate / 1e9, 1) + " G"; } else if (hashrate >= 1e6) { - return String(hashrate / 1e6, 1) + "M"; + return String(hashrate / 1e6, 1) + " M"; } else if (hashrate >= 1e3) { - return String(hashrate / 1e3, 1) + "K"; + return String(hashrate / 1e3, 1) + " K"; } else { return String((int)hashrate); } @@ -111,7 +111,16 @@ static String formatDiffCompact(double diff) { } else if (diff >= 1e3) { return String(diff / 1e3, 1) + "K"; } else { - return String((int)diff); + if (diff >= 1.0) { + return String(diff, 2); + } else if (diff >= 0.1) { + return String(diff, 3); + } else if (diff >= 0.01) { + return String(diff, 4); + } else if (diff >= 0.001) { + return String(diff, 5); + } + return String(diff, 6); } } @@ -147,16 +156,10 @@ static void drawMainScreen(const display_data_t *data) { // Separator line s_u8g2.drawHLine(0, 10, OLED_WIDTH); - // Large hashrate display - s_u8g2.setFont(u8g2_font_logisoso16_tn); // Large numeric font - String hashrate = formatHashrateCompact(data->hashRate); + String hashrate = formatHashrateCompact(data->hashRate) + "H/s"; int hrWidth = s_u8g2.getStrWidth(hashrate.c_str()); s_u8g2.drawStr((OLED_WIDTH - hrWidth) / 2, 32, hashrate.c_str()); - // "H/s" label below - s_u8g2.setFont(u8g2_font_6x10_tf); - s_u8g2.drawStr((OLED_WIDTH - 18) / 2, 42, "H/s"); - // Bottom stats row #if (OLED_HEIGHT == 64) s_u8g2.drawHLine(0, 48, OLED_WIDTH); diff --git a/src/logging.cpp b/src/logging.cpp new file mode 100644 index 0000000..6bda361 --- /dev/null +++ b/src/logging.cpp @@ -0,0 +1,98 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * Copyright Theodore Robert Campbell Jr + */ + +#include "logging.h" + +#include +#include +#include +#include +#include + +static SemaphoreHandle_t s_logMutex = NULL; +static volatile bool s_startupBarrierOpen = false; + +void log_init() { + if (s_logMutex == NULL) { + s_logMutex = xSemaphoreCreateMutex(); + } +} + +void log_set_startup_barrier(bool open) { + s_startupBarrierOpen = open; +} + +bool log_is_startup_barrier_open() { + return s_startupBarrierOpen; +} + +void log_wait_startup_barrier(uint32_t timeoutMs) { + uint32_t start = millis(); + while (!s_startupBarrierOpen) { + if (timeoutMs > 0 && (millis() - start) >= timeoutMs) { + return; + } + vTaskDelay(pdMS_TO_TICKS(1)); + } +} + +static void log_emit_atomic(const char *text) { + if (text == NULL) { + return; + } + + if (s_logMutex == NULL) { + Serial.print(text); + return; + } + + if (xSemaphoreTake(s_logMutex, pdMS_TO_TICKS(50)) == pdTRUE) { + Serial.print(text); + xSemaphoreGive(s_logMutex); + } else { + // Fallback to avoid dropping logs if mutex is contended. + Serial.print(text); + } +} + +void log_line(const char *message) { + if (message == NULL) { + return; + } + + char line[256]; + size_t len = strnlen(message, sizeof(line) - 2); + memcpy(line, message, len); + if (len == 0 || message[len - 1] != '\n') { + line[len++] = '\n'; + } + line[len] = '\0'; + + log_emit_atomic(line); +} + +void log_linef(const char *fmt, ...) { + if (fmt == NULL) { + return; + } + + char line[320]; + va_list args; + va_start(args, fmt); + int written = vsnprintf(line, sizeof(line), fmt, args); + va_end(args); + + if (written < 0) { + return; + } + + size_t len = strnlen(line, sizeof(line) - 2); + if (len == 0 || line[len - 1] != '\n') { + line[len++] = '\n'; + line[len] = '\0'; + } + + log_emit_atomic(line); +} diff --git a/src/logging.h b/src/logging.h new file mode 100644 index 0000000..b222fe9 --- /dev/null +++ b/src/logging.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * Copyright Theodore Robert Campbell Jr + */ + +#pragma once + +#include + +void log_init(); +void log_set_startup_barrier(bool open); +bool log_is_startup_barrier_open(); +void log_wait_startup_barrier(uint32_t timeoutMs = 5000); +void log_line(const char *message); +void log_linef(const char *fmt, ...); diff --git a/src/main.cpp b/src/main.cpp index ddea3af..43e3836 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -28,12 +28,15 @@ extern "C" { #include #include "mining/miner.h" +#include "mining/baseline_benchmark.h" #include "stratum/stratum_types.h" #include "stratum/stratum.h" #include "config/nvs_config.h" #include "config/wifi_manager.h" #include "stats/monitor.h" #include "display/display.h" +#include "logging.h" +#include // Task handles TaskHandle_t miner0Task = NULL; @@ -46,7 +49,7 @@ TaskHandle_t buttonTask = NULL; volatile bool systemReady = false; // Button handling (OneButton) -#if defined(BUTTON_PIN) && (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) +#if defined(BUTTON_PIN) && BUTTON_PIN >= 0 && (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) OneButton button(BUTTON_PIN, true, true); // active low, enable pullup #ifdef USER_BUTTON_PIN OneButton userButton(USER_BUTTON_PIN, true, true); @@ -149,27 +152,29 @@ void onButtonLongPressStart() { } #endif -#if defined(BUTTON_PIN) && (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) +#if defined(BUTTON_PIN) && BUTTON_PIN >= 0 && (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) /** * Dedicated button handling task (wit display) * Runs at higher priority than mining to ensure responsive UI */ void button_task(void *param) { - Serial.println("[BUTTON] Task started on core 0"); + log_wait_startup_barrier(); + log_line("[BUTTON] Task started on core 0"); for (;;) { button.tick(); vTaskDelay(pdMS_TO_TICKS(10)); // 10ms polling = responsive buttons } } -#elif defined(BUTTON_PIN) +#elif defined(BUTTON_PIN) && BUTTON_PIN >= 0 /** * Headless button handling task * Simple long-press detection for factory reset (no OneButton/display dependencies) * Hold button for 5 seconds to trigger factory reset */ void button_task(void *param) { - Serial.println("[BUTTON] Headless button task started"); + log_wait_startup_barrier(); + log_line("[BUTTON] Headless button task started"); pinMode(BUTTON_PIN, INPUT_PULLUP); unsigned long pressStart = 0; @@ -183,32 +188,32 @@ void button_task(void *param) { // Button just pressed pressStart = millis(); wasPressed = true; - Serial.println("[BUTTON] Press detected - hold 5s for factory reset"); + log_line("[BUTTON] Press detected - hold 5s for factory reset"); } else if (pressed && wasPressed) { // Button held - check duration unsigned long held = millis() - pressStart; if (held >= RESET_HOLD_MS) { - Serial.println("[RESET] *** FACTORY RESET TRIGGERED ***"); + log_line("[RESET] *** FACTORY RESET TRIGGERED ***"); // Clear NVS Preferences prefs; if (prefs.begin("sparkminer", false)) { prefs.clear(); prefs.end(); - Serial.println("[RESET] NVS cleared"); + log_line("[RESET] NVS cleared"); } // Clear WiFi settings WiFi.disconnect(true, true); - Serial.println("[RESET] WiFi settings cleared"); + log_line("[RESET] WiFi settings cleared"); delay(500); - Serial.println("[RESET] Restarting..."); + log_line("[RESET] Restarting..."); ESP.restart(); } } else if (!pressed && wasPressed) { // Button released before 5 seconds - Serial.println("[BUTTON] Released - normal operation continues"); + log_line("[BUTTON] Released - normal operation continues"); wasPressed = false; } @@ -221,6 +226,8 @@ void button_task(void *param) { void setupPowerManagement(); void setupTasks(); void printBanner(); +void logBuildInfo(); +void logBackendSummary(); void checkFactoryReset(); uint32_t tryOverclock(); @@ -287,7 +294,7 @@ uint32_t tryOverclock() { * Hold BOOT button for 5+ seconds to wipe NVS and restart */ void checkFactoryReset() { - #ifdef BUTTON_PIN + #if defined(BUTTON_PIN) && BUTTON_PIN >= 0 pinMode(BUTTON_PIN, INPUT_PULLUP); // Check if button is pressed at boot @@ -343,7 +350,16 @@ void checkFactoryReset() { * Arduino setup - runs once at boot */ void setup() { + // BOOT DIAGNOSTICS - before Serial + #ifdef BOOT_DEBUG + // Toggle GPIO2 (generic LED) to signal we entered setup (if available) + pinMode(2, OUTPUT); + digitalWrite(2, HIGH); + #endif + Serial.begin(115200); + log_init(); + log_set_startup_barrier(false); // Wait for USB CDC to be ready (with timeout for headless operation) // On ESP32-S3, Serial only becomes true when USB host enumerates CDC @@ -353,20 +369,39 @@ void setup() { delay(10); } Serial.flush(); - - // Debug output - Serial.println(); - Serial.println("[BOOT] Starting..."); + + // BOOT DIAGNOSTICS - Serial ready + #ifdef BOOT_DEBUG + Serial.println("\n[BOOT] Serial initialized"); + #endif + + // Debug output + log_line(""); + log_line("[BOOT] Starting..."); // Check for factory reset (hold BOOT button for 5 seconds) + #ifdef BOOT_DEBUG + Serial.println("[BOOT] Checking factory reset button..."); + #endif checkFactoryReset(); + #ifdef BOOT_DEBUG + Serial.println("[BOOT] Factory reset check complete"); + #endif printBanner(); + logBuildInfo(); + logBackendSummary(); // Configure watchdog with longer timeout for mining // Mining loops will yield periodically via vTaskDelay(1) - Serial.println("[INIT] Configuring watchdog timer (30s timeout)..."); + log_line("[INIT] Configuring watchdog timer (30s timeout)..."); + #ifdef BOOT_DEBUG + Serial.println("[BOOT] About to init WDT"); + #endif esp_task_wdt_init(30, true); // 30 second timeout, panic on trigger + #ifdef BOOT_DEBUG + Serial.println("[BOOT] WDT initialized"); + #endif // Disable power management (no CPU throttling/sleep) setupPowerManagement(); @@ -374,16 +409,39 @@ void setup() { // NOTE: ESP32 overclocking via PLL manipulation causes boot loops // This ESP32-D0WD-V3 chip cannot exceed 240MHz // NMMiner's 1000 KH/s must come from SHA optimization, not overclocking - Serial.printf("[INIT] Running at %u MHz\n", getCpuFrequencyMhz()); + log_linef("[INIT] Running at %u MHz", getCpuFrequencyMhz()); // Initialize NVS configuration + #ifdef BOOT_DEBUG + Serial.println("[BOOT] About to init NVS"); + #endif nvs_config_init(); + #ifdef BOOT_DEBUG + Serial.println("[BOOT] NVS initialized"); + #endif // Initialize mining subsystem + #ifdef BOOT_DEBUG + Serial.println("[BOOT] About to init mining"); + #endif miner_init(); + #ifdef BOOT_DEBUG + Serial.println("[BOOT] Mining subsystem initialized"); + #endif + + // Run baseline benchmark (cycle-level performance measurement) + #ifdef ENABLE_BASELINE_BENCHMARK + baseline_benchmark_run(); + #endif // Initialize stratum subsystem + #ifdef BOOT_DEBUG + Serial.println("[BOOT] About to init stratum"); + #endif stratum_init(); + #ifdef BOOT_DEBUG + Serial.println("[BOOT] Stratum subsystem initialized"); + #endif // Load pool configuration from NVS miner_config_t *config = nvs_config_get(); @@ -393,12 +451,18 @@ void setup() { // Initialize display early (needed for WiFi setup screen) #if (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) + #ifdef BOOT_DEBUG + Serial.println("[BOOT] About to init display"); + #endif display_init(config->rotation, config->brightness); display_set_inverted(config->invertColors); + #ifdef BOOT_DEBUG + Serial.println("[BOOT] Display initialized"); + #endif #endif // Setup button handlers (OneButton) - #if defined(BUTTON_PIN) && (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) + #if defined(BUTTON_PIN) && BUTTON_PIN >= 0 && (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) button.setClickMs(400); // Time window for single click (ms) button.setPressMs(1500); // Time for long press to start (1.5s) button.setDebounceMs(50); // Debounce time (ms) @@ -406,57 +470,119 @@ void setup() { button.attachDoubleClick(onButtonDoubleClick); button.attachMultiClick(onButtonMultiClick); // Triple-click for inversion button.attachLongPressStart(onButtonLongPressStart); // Factory reset handler - Serial.println("[INIT] Button handlers registered (click/double/triple/long-press)"); + log_line("[INIT] Button handlers registered (click/double/triple/long-press)"); #endif // Initialize WiFiManager and connect + #ifdef BOOT_DEBUG + Serial.println("[BOOT] About to init WiFi"); + #endif wifi_manager_init(); - Serial.println("[INIT] Starting WiFi..."); + log_line("[INIT] Starting WiFi..."); wifi_manager_start(); + #ifdef BOOT_DEBUG + Serial.println("[BOOT] WiFi started"); + #endif // Register WiFi event handlers for diagnostics WiFi.onEvent([](WiFiEvent_t event, WiFiEventInfo_t info) { - Serial.printf("[WIFI] Disconnected, reason: %d\n", info.wifi_sta_disconnected.reason); + log_linef("[WIFI] Disconnected, reason: %d", info.wifi_sta_disconnected.reason); }, WiFiEvent_t::ARDUINO_EVENT_WIFI_STA_DISCONNECTED); WiFi.onEvent([](WiFiEvent_t event, WiFiEventInfo_t info) { - Serial.printf("[WIFI] Connected, channel: %d\n", WiFi.channel()); + log_linef("[WIFI] Connected, channel: %d", WiFi.channel()); }, WiFiEvent_t::ARDUINO_EVENT_WIFI_STA_CONNECTED); // Initialize monitor (live stats - display already initialized) monitor_init(); - Serial.println("[INIT] Setup complete"); + log_line("[INIT] Setup complete"); // Check if configuration is valid if (!nvs_config_is_valid()) { - Serial.println("[WARN] No wallet configured! Please set up via captive portal."); + log_line("[WARN] No wallet configured! Please set up via captive portal."); } - // Start FreeRTOS tasks - setupTasks(); - // Print configuration summary - Serial.println(); - Serial.println("=== SparkMiner v" AUTO_VERSION " ==="); - Serial.println("SHA-256 Implementation: " - #if defined(USE_HARDWARE_SHA) - "Hardware (ESP32-S3/C3)" - #else - "Software (Optimized)" - #endif - ); - Serial.println("Board: " BOARD_NAME); + const miner_backend_info_t *backend = miner_get_backend_info(); + log_line(""); + log_line("=== SparkMiner v" AUTO_VERSION " ==="); + log_linef("Mining backend: %s", backend->miningBackend); + log_linef("HW SHA: %s", backend->hwShaAvailable ? "available/self-tested only" : "unavailable"); + log_linef("HW SHA hot loop: %s", backend->hwShaHotLoop ? "enabled" : "disabled"); + log_linef("DMA hot path: %s", backend->dmaHotPath ? "active" : "inactive"); + log_line("Board: " BOARD_NAME); #if (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) - Serial.println("Display: Enabled"); + log_line("Display: Enabled"); #else - Serial.println("Display: Disabled"); + log_line("Display: Disabled"); #endif - Serial.println(); + log_line(""); + + // Start FreeRTOS tasks after final startup summary to avoid interleaving. + log_set_startup_barrier(true); + setupTasks(); systemReady = true; } +void logBuildInfo() { + log_line("[BUILD] ===== Build Metadata ====="); + log_linef("[BUILD] Version: %s", AUTO_VERSION); + log_linef("[BUILD] Git: %s (%s)", BUILD_GIT_DESCRIBE, BUILD_GIT_HASH); + log_linef("[BUILD] Git tree: %s", BUILD_GIT_DIRTY ? "DIRTY" : "CLEAN"); + log_linef("[BUILD] Built (UTC): %s", BUILD_UTC_TIMESTAMP); + + log_linef("[BUILD] PIO env: %s", BUILD_PIO_ENV); + log_linef("[BUILD] PIO board: %s", BUILD_PIO_BOARD); + log_linef("[BUILD] MCU: %s @ %s", BUILD_PIO_MCU, BUILD_PIO_F_CPU); + log_linef("[BUILD] Board name: %s", BOARD_NAME); + + log_linef("[BUILD] Runtime CPU: %u MHz | Cores: %d", getCpuFrequencyMhz(), SOC_CPU_CORES_NUM); + + const miner_backend_info_t *backend = miner_get_backend_info(); + + log_linef("[BUILD] Features: HW_SHA=%s, HW_SHA_HOTLOOP=%s, SOFTWARE_MIDSTATE=%s, DMA_HOTPATH=%s", + backend->hwShaAvailable ? "available" : "unavailable", + backend->hwShaHotLoop ? "on" : "off", + backend->softwareMidstate ? "on" : "off", + backend->dmaHotPath ? "on" : "off" + ); + + log_linef("[BUILD] UI: Display=%s, OLED=%s, EINK=%s", + #if USE_DISPLAY + "on" + #else + "off" + #endif + , + #if USE_OLED_DISPLAY + "on" + #else + "off" + #endif + , + #if USE_EINK_DISPLAY + "on" + #else + "off" + #endif + ); + + log_line("[BUILD] =========================="); +} + +void logBackendSummary() { + const miner_backend_info_t *backend = miner_get_backend_info(); + log_linef("[BACKEND] Chip: %s", backend->chip); + log_linef("[BACKEND] Mining backend: %s", backend->miningBackend); + log_linef("[BACKEND] HW SHA available: %s", backend->hwShaAvailable ? "yes" : "no"); + log_linef("[BACKEND] HW SHA hot loop: %s", backend->hwShaHotLoop ? "yes" : "no"); + log_linef("[BACKEND] Midstate restore: %s", backend->midstateRestoreSupported ? "supported" : "unsupported"); + log_linef("[BACKEND] DMA path: %s", backend->dmaHotPath ? "active" : "inactive"); + log_linef("[BACKEND] Nonce split: %s", backend->nonceSplitCore0LowCore1High ? "core0 low / core1 high" : "n/a"); +} + /** * Arduino loop - runs continuously * Minimal work here - most work done in FreeRTOS tasks @@ -477,12 +603,12 @@ void setupPowerManagement() { esp_err_t err = esp_pm_lock_create(ESP_PM_NO_LIGHT_SLEEP, 0, "miner", &pmLock); if (err == ESP_OK) { esp_pm_lock_acquire(pmLock); - Serial.println("[INIT] Power management disabled (no sleep)"); + log_line("[INIT] Power management disabled (no sleep)"); } else { - Serial.println("[WARN] Could not disable power management"); + log_line("[WARN] Could not disable power management"); } #else - Serial.println("[INIT] Power management not enabled in config"); + log_line("[INIT] Power management not enabled in config"); #endif } @@ -491,7 +617,7 @@ void setupPowerManagement() { * Miner tasks are only created if wallet is configured */ void setupTasks() { - Serial.println("[INIT] Creating FreeRTOS tasks..."); + log_line("[INIT] Creating FreeRTOS tasks..."); bool hasValidConfig = nvs_config_is_valid(); @@ -521,7 +647,7 @@ void setupTasks() { // Button task (responsive UI during mining) // Needs 4KB+ stack for NVS writes (rotation save) and display updates - #if defined(BUTTON_PIN) && (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) + #if defined(BUTTON_PIN) && BUTTON_PIN >= 0 && (USE_DISPLAY || USE_OLED_DISPLAY || USE_EINK_DISPLAY) xTaskCreatePinnedToCore( button_task, "Button", @@ -531,7 +657,7 @@ void setupTasks() { &buttonTask, 0 // Core 0 with other UI tasks ); - #elif defined(BUTTON_PIN) + #elif defined(BUTTON_PIN) && BUTTON_PIN >= 0 // Headless button task for factory reset (Issue #15 fix) xTaskCreatePinnedToCore( button_task, @@ -570,7 +696,7 @@ void setupTasks() { MINER_0_CORE ); - Serial.println("[INIT] All tasks created (dual-core mining)"); + log_line("[INIT] All tasks created (dual-core mining)"); #else // Single-core (C3, S2): Run only one miner task, not pinned // Must yield frequently to let WiFi/Stratum work @@ -583,11 +709,11 @@ void setupTasks() { &miner0Task ); - Serial.println("[INIT] All tasks created (single-core mining)"); + log_line("[INIT] All tasks created (single-core mining)"); #endif } else { - Serial.println("[INIT] Monitor task created (mining disabled - no wallet)"); - Serial.println("[INIT] Configure via captive portal or SD card config.json"); + log_line("[INIT] Monitor task created (mining disabled - no wallet)"); + log_line("[INIT] Configure via captive portal or SD card config.json"); } } diff --git a/src/mining/baseline_benchmark.cpp b/src/mining/baseline_benchmark.cpp new file mode 100644 index 0000000..9d63cea --- /dev/null +++ b/src/mining/baseline_benchmark.cpp @@ -0,0 +1,420 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * Copyright Theodore Robert Campbell Jr + * + * SparkMiner - Baseline Performance Benchmark + * + * Measures cycle-level performance of current SHA-256 mining backend. + * Purpose: Establish baseline (560 cycles/hash target) before optimizations. + * + * Enables: -DENABLE_BASELINE_BENCHMARK in platformio.ini + */ + +#include +#include +#include "miner_sha256.h" +#include "sha256_hw.h" +#include "miner.h" +#include "../logging.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// ============================================================ +// Cycle Counter Utilities (Xtensa LX7) +// ============================================================ + +/* + * Read Xtensa CPU cycle counter (CCOUNT register) + * Returns 32-bit cycle count that increments every CPU clock + * Note: Overflows every 2^32 cycles @ 240MHz = 17.9 seconds + */ +static inline uint32_t get_cpu_cycles() { + uint32_t cycles; + asm volatile("rsr %0, ccount" : "=r"(cycles)); + return cycles; +} + +/* + * Compute cycle delta, handling wraparound + */ +static inline uint32_t delta_cycles(uint32_t start, uint32_t end) { + return end - start; // Handles wrap naturally (unsigned arithmetic) +} + +// ============================================================ +// Test Vector: Synthetic header used for cross-validation +// Pattern matches existing MINER-TEST vectors +// ============================================================ + +// Synthetic 80-byte header (version=2, bytes fill 0x00-0xFF pattern) +static const uint8_t TEST_HEADER_BYTES[80] = { + // Version (4 bytes, LE) + 0x00, 0x00, 0x00, 0x20, + // Previous block hash (32 bytes) + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + // Merkle root (32 bytes) + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + // Timestamp (4 bytes, LE) + 0x8b, 0x26, 0x02, 0x6a, + // Difficulty bits (4 bytes, LE) + 0xf0, 0x1f, 0x02, 0x17, + // Nonce (4 bytes, LE) + 0x33, 0x5c, 0x42, 0x2e, +}; + +// ============================================================ +// Baseline Benchmark Implementation +// ============================================================ + +/* + * Single-pass hash timing test + * Measures exact cycles for one double-SHA-256 computation + */ +struct hash_timing_t { + uint32_t cycles_total; + uint32_t cycles_midstate; + uint32_t cycles_complete; + bool correctness_ok; +}; + +static void benchmark_single_hash(hash_timing_t *result) { + block_header_t hb = {0}; + sha256_hash_t midstate = {0}; + sha256_hash_t firstsha = {0}; + sha256_hash_t secondsha = {0}; + + // Copy test header + memcpy((uint8_t *)&hb, TEST_HEADER_BYTES, 80); + + // Warmup: Let CPU cache settle + for (int i = 0; i < 10; i++) { + miner_sha256_midstate(&midstate, &hb); + } + + // Timer: Midstate computation + uint32_t t_start = get_cpu_cycles(); + miner_sha256_midstate(&midstate, &hb); + uint32_t t_mid = get_cpu_cycles(); + result->cycles_midstate = delta_cycles(t_start, t_mid); + + // Timer: Complete double SHA computation + t_start = get_cpu_cycles(); + miner_sha256_complete_from_midstate(&midstate, &hb, &firstsha, &secondsha); + uint32_t t_end = get_cpu_cycles(); + result->cycles_complete = delta_cycles(t_start, t_end); + result->cycles_total = result->cycles_midstate + result->cycles_complete; + + // Cross-validate: direct double-SHA (same pattern as MINER-TEST) + // sha256() is single-SHA; two calls = double-SHA + sha256_hash_t ref_first = {0}; + sha256_hash_t ref_second = {0}; + sha256(&ref_first, (uint8_t *)&hb, 80); + sha256(&ref_second, ref_first.bytes, 32); + + result->correctness_ok = (memcmp(secondsha.bytes, ref_second.bytes, 32) == 0); +} + +/* + * Batch timing: measure many hashes with varied nonces + * Reports average cycles and throughput + */ +struct batch_timing_t { + uint32_t num_hashes; + uint64_t total_cycles; + uint32_t avg_cycles_per_hash; + float avg_us_per_hash; + float throughput_khs; + uint32_t cycles_min; + uint32_t cycles_max; + uint32_t correctness_failures; +}; + +static void benchmark_batch_legacy(uint32_t count, batch_timing_t *result) { + block_header_t hb = {0}; + sha256_hash_t midstate = {0}; + sha256_hash_t firstsha = {0}; + sha256_hash_t secondsha = {0}; + uint32_t nonce = 0x00000000; + + memcpy((uint8_t *)&hb, TEST_HEADER_BYTES, 80); + + // Warmup + for (int i = 0; i < 100; i++) { + hb.nonce = i; + miner_sha256_midstate(&midstate, &hb); + miner_sha256_complete_from_midstate(&midstate, &hb, &firstsha, &secondsha); + } + + result->num_hashes = count; + result->total_cycles = 0; + result->cycles_min = UINT32_MAX; + result->cycles_max = 0; + result->correctness_failures = 0; + + // Batch measurement: measure each hash individually for detailed stats + for (uint32_t i = 0; i < count; i++) { + hb.nonce = nonce++; + miner_sha256_midstate(&midstate, &hb); + + uint32_t t_start = get_cpu_cycles(); + miner_sha256_complete_from_midstate(&midstate, &hb, &firstsha, &secondsha); + uint32_t t_end = get_cpu_cycles(); + + uint32_t delta = delta_cycles(t_start, t_end); + result->total_cycles += delta; + + if (delta < result->cycles_min) result->cycles_min = delta; + if (delta > result->cycles_max) result->cycles_max = delta; + } + + // Correctness sampling (outside timed loop): compare against direct double-SHA. + result->correctness_failures = 0; + for (uint32_t i = 0; i < 64; i++) { + block_header_t hv = hb; + hv.nonce = i; + + sha256_hash_t ms = {0}; + sha256_hash_t first = {0}; + sha256_hash_t second = {0}; + sha256_hash_t ref_first = {0}; + sha256_hash_t ref_second = {0}; + + miner_sha256_midstate(&ms, &hv); + miner_sha256_complete_from_midstate(&ms, &hv, &first, &second); + + sha256(&ref_first, (uint8_t *)&hv, 80); + sha256(&ref_second, ref_first.bytes, 32); + + if (memcmp(second.bytes, ref_second.bytes, 32) != 0) { + result->correctness_failures++; + } + } + + result->avg_cycles_per_hash = result->total_cycles / count; + result->avg_us_per_hash = (float)result->avg_cycles_per_hash / 240.0f; // 240 MHz + result->throughput_khs = 1000.0f / result->avg_us_per_hash; +} + +static void benchmark_batch_prepared(uint32_t count, batch_timing_t *result) { + block_header_t hb = {0}; + sha256_hash_t midstate = {0}; + sha256_hash_t firstsha = {0}; + sha256_hash_t secondsha = {0}; + sha256_tail_schedule_cache_t tail_cache = {0}; + uint32_t nonce = 0x00000000; + + memcpy((uint8_t *)&hb, TEST_HEADER_BYTES, 80); + + miner_sha256_midstate(&midstate, &hb); + miner_sha256_prepare_tail_schedule(&tail_cache, &hb); + + for (int i = 0; i < 100; i++) { + miner_sha256_complete_from_midstate_prepared(&midstate, &tail_cache, hb.nonce + i, &firstsha, &secondsha); + } + + result->num_hashes = count; + result->total_cycles = 0; + result->cycles_min = UINT32_MAX; + result->cycles_max = 0; + result->correctness_failures = 0; + + for (uint32_t i = 0; i < count; i++) { + uint32_t t_start = get_cpu_cycles(); + miner_sha256_complete_from_midstate_prepared(&midstate, &tail_cache, nonce++, &firstsha, &secondsha); + uint32_t t_end = get_cpu_cycles(); + + uint32_t delta = delta_cycles(t_start, t_end); + result->total_cycles += delta; + + if (delta < result->cycles_min) result->cycles_min = delta; + if (delta > result->cycles_max) result->cycles_max = delta; + } + + // Correctness sampling (outside timed loop): compare prepared path to direct double-SHA. + result->correctness_failures = 0; + for (uint32_t i = 0; i < 64; i++) { + block_header_t hv = hb; + hv.nonce = i; + + sha256_hash_t ms = {0}; + sha256_hash_t first = {0}; + sha256_hash_t second = {0}; + sha256_hash_t ref_first = {0}; + sha256_hash_t ref_second = {0}; + sha256_tail_schedule_cache_t vc = {0}; + + miner_sha256_midstate(&ms, &hv); + miner_sha256_prepare_tail_schedule(&vc, &hv); + miner_sha256_complete_from_midstate_prepared(&ms, &vc, hv.nonce, &first, &second); + + sha256(&ref_first, (uint8_t *)&hv, 80); + sha256(&ref_second, ref_first.bytes, 32); + + if (memcmp(second.bytes, ref_second.bytes, 32) != 0) { + result->correctness_failures++; + } + } + + result->avg_cycles_per_hash = result->total_cycles / count; + result->avg_us_per_hash = (float)result->avg_cycles_per_hash / 240.0f; + result->throughput_khs = 1000.0f / result->avg_us_per_hash; +} + +/* + * Dual-core simulation: measure per-core performance + * Simulates mining loop with yield intervals + */ +struct dualcore_timing_t { + uint32_t hashes_per_core; + uint64_t cycles_core0; + uint64_t cycles_core1; + uint32_t avg_cycles_core0; + uint32_t avg_cycles_core1; + float khs_core0; + float khs_core1; + float khs_total; +}; + +static void benchmark_dualcore(uint32_t hashes_per_core, dualcore_timing_t *result) { + block_header_t hb = {0}; + sha256_hash_t midstate = {0}; + sha256_hash_t firstsha = {0}; + sha256_hash_t secondsha = {0}; + + memcpy((uint8_t *)&hb, TEST_HEADER_BYTES, 80); + miner_sha256_midstate(&midstate, &hb); + + // Warmup + for (int i = 0; i < 100; i++) { + hb.nonce = (uint32_t)i; + miner_sha256_complete_from_midstate(&midstate, &hb, &firstsha, &secondsha); + } + + result->hashes_per_core = hashes_per_core; + result->cycles_core0 = 0; + result->cycles_core1 = 0; + + // Core 0 simulation: nonces 0x00000000 - 0x7FFFFFFF + uint32_t nonce = 0x00000000; + for (uint32_t i = 0; i < hashes_per_core; i++) { + hb.nonce = nonce++; + uint32_t t_start = get_cpu_cycles(); + miner_sha256_complete_from_midstate(&midstate, &hb, &firstsha, &secondsha); + uint32_t t_end = get_cpu_cycles(); + + result->cycles_core0 += delta_cycles(t_start, t_end); + } + + // Core 1 simulation: nonces 0x80000000 - 0xFFFFFFFF + nonce = 0x80000000; + for (uint32_t i = 0; i < hashes_per_core; i++) { + hb.nonce = nonce++; + uint32_t t_start = get_cpu_cycles(); + miner_sha256_complete_from_midstate(&midstate, &hb, &firstsha, &secondsha); + uint32_t t_end = get_cpu_cycles(); + + result->cycles_core1 += delta_cycles(t_start, t_end); + } + + result->avg_cycles_core0 = result->cycles_core0 / hashes_per_core; + result->avg_cycles_core1 = result->cycles_core1 / hashes_per_core; + result->khs_core0 = (240.0f * 1000.0f) / (float)result->avg_cycles_core0; + result->khs_core1 = (240.0f * 1000.0f) / (float)result->avg_cycles_core1; + result->khs_total = result->khs_core0 + result->khs_core1; +} + +// ============================================================ +// Public API: Run all benchmarks +// ============================================================ + +void baseline_benchmark_run() { + Serial.println("\n========================================"); + Serial.println("SparkMiner Baseline Performance Benchmark"); + Serial.println("========================================\n"); + + // Test 1: Single hash deterministic vector + Serial.println("[TEST 1] Single Hash Timing"); + Serial.println("-------------------------------"); + hash_timing_t single_timing = {0}; + benchmark_single_hash(&single_timing); + + Serial.printf(" Midstate: %u cycles\n", single_timing.cycles_midstate); + Serial.printf(" Complete SHA: %u cycles (%.1f μs)\n", + single_timing.cycles_complete, + (float)single_timing.cycles_complete / 240.0f); + Serial.printf(" Total: %u cycles (%.1f μs)\n", + single_timing.cycles_total, + (float)single_timing.cycles_total / 240.0f); + Serial.printf(" Correctness: %s (test vector validation)\n", + single_timing.correctness_ok ? "PASS" : "FAIL"); + Serial.println(); + + // Test 2: Batch timing (10k hashes) + Serial.println("[TEST 2] Batch Timing (10,000 hashes)"); + Serial.println("-------------------------------"); + batch_timing_t batch_legacy = {0}; + benchmark_batch_legacy(10000, &batch_legacy); + + Serial.printf(" Legacy min/max/avg: %u / %u / %u cycles/hash (%.1f KH/s)\n", + batch_legacy.cycles_min, + batch_legacy.cycles_max, + batch_legacy.avg_cycles_per_hash, + batch_legacy.throughput_khs); + Serial.printf(" Legacy correctness: %s (failures=%u/64)\n", + batch_legacy.correctness_failures == 0 ? "PASS" : "FAIL", + batch_legacy.correctness_failures); + Serial.println(); + + // Test 3: Dual-core simulation + Serial.println("[TEST 3] Dual-Core Simulation (5,000 hashes/core)"); + Serial.println("-------------------------------"); + dualcore_timing_t dualcore_timing = {0}; + benchmark_dualcore(5000, &dualcore_timing); + + Serial.printf(" Core 0:\n"); + Serial.printf(" Avg cycles: %u cycles/hash (%.1f μs/hash)\n", + dualcore_timing.avg_cycles_core0, + (float)dualcore_timing.avg_cycles_core0 / 240.0f); + Serial.printf(" Throughput: %.1f KH/s\n", dualcore_timing.khs_core0); + Serial.printf("\n Core 1:\n"); + Serial.printf(" Avg cycles: %u cycles/hash (%.1f μs/hash)\n", + dualcore_timing.avg_cycles_core1, + (float)dualcore_timing.avg_cycles_core1 / 240.0f); + Serial.printf(" Throughput: %.1f KH/s\n", dualcore_timing.khs_core1); + Serial.printf("\n Total (simulated dual-core):\n"); + Serial.printf(" Combined: %.1f KH/s\n", dualcore_timing.khs_total); + Serial.println(); + + // Summary & targets + Serial.println("========================================"); + Serial.println("BASELINE SUMMARY"); + Serial.println("========================================"); + Serial.printf("Legacy per-hash: %u cycles (%.1f μs)\n", + batch_legacy.avg_cycles_per_hash, + batch_legacy.avg_us_per_hash); + Serial.printf("Legacy throughput: %.1f KH/s per core (%.1f total)\n", + batch_legacy.throughput_khs, + batch_legacy.throughput_khs * 2.0f); + Serial.println(); + Serial.println("TARGET AFTER OPTIMIZATIONS:"); + Serial.printf("OPT #1 (+15%%): %.1f KH/s total (estimated)\n", + batch_legacy.throughput_khs * 2.0f * 1.15f); + Serial.printf("OPT #1+#2 (+30%%): %.1f KH/s total (estimated)\n", + batch_legacy.throughput_khs * 2.0f * 1.30f); + Serial.printf("OPT #1+#2+#3 (+45%%): %.1f KH/s total (estimated)\n", + batch_legacy.throughput_khs * 2.0f * 1.45f); + Serial.println(); + Serial.println("Benchmark Complete!\n"); +} + +#ifdef __cplusplus +} +#endif diff --git a/src/mining/baseline_benchmark.h b/src/mining/baseline_benchmark.h new file mode 100644 index 0000000..fde03b2 --- /dev/null +++ b/src/mining/baseline_benchmark.h @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * Copyright Theodore Robert Campbell Jr + * + * SparkMiner - Baseline Performance Benchmark Header + * + * Public API for cycle-level performance measurement + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Run complete baseline benchmark suite + * Outputs to Serial console with detailed cycle and throughput metrics + * + * Tests performed: + * 1. Single hash deterministic vector (correctness validation) + * 2. Batch timing over 10,000 hashes (average performance) + * 3. Dual-core simulation (per-core and combined throughput) + */ +void baseline_benchmark_run(); + +#ifdef __cplusplus +} +#endif diff --git a/src/mining/miner.cpp b/src/mining/miner.cpp index 7b216b0..d7f08ab 100644 --- a/src/mining/miner.cpp +++ b/src/mining/miner.cpp @@ -17,6 +17,10 @@ #include #endif +#if defined(CONFIG_IDF_TARGET_ESP32S3) +#include +#endif + #include "miner.h" #include "sha256_types.h" #include "sha256_hw.h" // Hardware SHA-256 wrapper @@ -27,6 +31,7 @@ #include "sha256_pipelined_s3.h" // Pipelined assembly mining (Core 1) - ESP32-S3 #include "miner_sha256.h" // BitsyMiner software SHA-256 (verification + Core 0) #include "../stratum/stratum.h" +#include "../logging.h" #include "board_config.h" // ============================================================ @@ -34,6 +39,69 @@ // ============================================================ #define MAX_DIFFICULTY 0x1d00ffff +// Compile-time deterministic nonce window for S3 debug sessions. +// Enable only when investigating candidate mismatches. +#ifndef S3_DEBUG_NONCE_WINDOW_ENABLE +#define S3_DEBUG_NONCE_WINDOW_ENABLE 0 +#endif + +// Debug nonce windows for S3 MUST be defined in swapped nonce space because +// the assembly loop increments the swapped nonce directly. +#ifndef S3_DEBUG_NONCE_WINDOW_START_SWAPPED +#ifdef S3_DEBUG_NONCE_WINDOW_START +#define S3_DEBUG_NONCE_WINDOW_START_SWAPPED S3_DEBUG_NONCE_WINDOW_START +#else +#define S3_DEBUG_NONCE_WINDOW_START_SWAPPED 0x00000000u +#endif +#endif + +#ifndef S3_DEBUG_NONCE_WINDOW_END_SWAPPED +#ifdef S3_DEBUG_NONCE_WINDOW_END +#define S3_DEBUG_NONCE_WINDOW_END_SWAPPED S3_DEBUG_NONCE_WINDOW_END +#else +#define S3_DEBUG_NONCE_WINDOW_END_SWAPPED 0x0000FFFFu +#endif +#endif + +#if S3_DEBUG_NONCE_WINDOW_ENABLE && (S3_DEBUG_NONCE_WINDOW_END_SWAPPED < S3_DEBUG_NONCE_WINDOW_START_SWAPPED) +#error "S3 debug nonce window invalid: END_SWAPPED must be >= START_SWAPPED" +#endif + +// Optional one-shot runtime sanity test. +// Runs exactly once after boot using a tiny swapped window, prints PASS/WARN, +// then automatically falls back to the configured debug window. +#ifndef S3_DEBUG_NONCE_WINDOW_TINY_SELFTEST_ENABLE +#define S3_DEBUG_NONCE_WINDOW_TINY_SELFTEST_ENABLE 0 +#endif + +#ifndef S3_DEBUG_NONCE_WINDOW_TINY_START_SWAPPED +#define S3_DEBUG_NONCE_WINDOW_TINY_START_SWAPPED 0x00000000u +#endif + +#ifndef S3_DEBUG_NONCE_WINDOW_TINY_END_SWAPPED +#define S3_DEBUG_NONCE_WINDOW_TINY_END_SWAPPED 0x00000010u +#endif + +// Verbose restore matrix diagnostics are useful only for deep bring-up. +// Keep off by default to avoid noisy normal boots. +#ifndef S3_RESTORE_MATRIX_DEBUG +#define S3_RESTORE_MATRIX_DEBUG 0 +#endif + +// Optional deeper S3 hardware experiment harness. +// Disabled by default to avoid boot-time overhead on normal mining builds. +#ifndef S3_HW_EXPERIMENTS_ENABLE +#define S3_HW_EXPERIMENTS_ENABLE 0 +#endif + +#ifndef S3_HW_EXPERIMENTS_NONCES +#define S3_HW_EXPERIMENTS_NONCES 4096 +#endif + +#if S3_DEBUG_NONCE_WINDOW_TINY_SELFTEST_ENABLE && (S3_DEBUG_NONCE_WINDOW_TINY_END_SWAPPED < S3_DEBUG_NONCE_WINDOW_TINY_START_SWAPPED) +#error "S3 tiny self-test window invalid: TINY_END_SWAPPED must be >= TINY_START_SWAPPED" +#endif + // ============================================================ // Globals // ============================================================ @@ -71,9 +139,41 @@ static mining_stats_t s_stats = {0}; volatile uint64_t s_core0Hashes = 0; volatile uint64_t s_core1Hashes = 0; +// Job invalidation counter: increments each time a new job replaces an active one. +// Useful for diagnosing how many hashes are "wasted" per job change. +volatile uint32_t s_jobChanges = 0; + // Nonce ranges for dual-core static unsigned long s_startNonce[2] = {0, 0x80000000}; +#if defined(CONFIG_IDF_TARGET_ESP32S3) +static const miner_backend_info_t s_backendInfo = { + "ESP32-S3", + "software-midstate dual-core", + true, // hwShaAvailable + false, // hwShaHotLoop + true, // softwareMidstate + false, // dmaHotPath + false, // midstateRestoreSupported + true // nonceSplitCore0LowCore1High +}; +#else +static const miner_backend_info_t s_backendInfo = { + "ESP32", + "hardware SHA mixed path", + true, + true, + false, + false, + true, + true +}; +#endif + +const miner_backend_info_t *miner_get_backend_info() { + return &s_backendInfo; +} + // ============================================================ // Utility Functions // ============================================================ @@ -137,24 +237,24 @@ static void bits_to_target(uint32_t nBits, uint8_t *target) { static void divide_256bit_by_double(uint64_t *target, double divisor) { uint64_t result[4] = {0}; double remainder = 0.0; - + // Iterate from MSB (target[3]) to LSB (target[0]) for (int i = 3; i >= 0; i--) { // Add carried remainder from upper word (scaled by 2^64) double val = (double)target[i] + remainder * 18446744073709551616.0; - + double res = val / divisor; - + // Clamp to prevent overflow (shouldn't happen with diff >= 1) if (res >= 18446744073709551615.0) { result[i] = 0xFFFFFFFFFFFFFFFFULL; } else { result[i] = (uint64_t)res; } - + remainder = val - ((double)result[i] * divisor); } - + memcpy(target, result, sizeof(result)); } @@ -198,6 +298,86 @@ static int check_target(const uint8_t *hash, const uint8_t *target) { return 1; // Equal is valid } +#ifdef DEBUG_SHARE_VALIDATION +static void dump_hex_compact(const char *label, const uint8_t *buf, size_t len) { + Serial.print(label); + Serial.print("="); + for (size_t i = 0; i < len; i++) { + Serial.printf("%02x", buf[i]); + } + Serial.println(); +} + +static void dump_s3_candidate_trace_once( + const block_header_t *hdr_verify, + const uint32_t *header_swapped, + const uint32_t *hw_midstate, + uint32_t nonce_native, + uint32_t nonce_swapped, + const sha256_hash_t *sw_hash, + bool sw_verified +) { + static bool dumped = false; + if (dumped || !hdr_verify || !header_swapped || !hw_midstate || !sw_hash) { + return; + } + dumped = true; + + block_header_t hdr_sw = *hdr_verify; + hdr_sw.nonce = nonce_native; + + uint8_t s3_header[80]; + for (int i = 0; i < 20; i++) { + uint32_t w = header_swapped[i]; + if (i == 19) { + w = nonce_swapped; + } + ((uint32_t *)s3_header)[i] = __builtin_bswap32(w); + } + + sha256_hash_t sw_first; + sha256_hash_t sw_second; + sha256(&sw_first, (uint8_t *)&hdr_sw, 80); + sha256(&sw_second, sw_first.bytes, 32); + + sha256_hash_t s3_second; + memset(&s3_second, 0, sizeof(s3_second)); + bool s3_verify_ok = sha256_s3_verify( + hw_midstate, + (const uint8_t *)&header_swapped[16], + nonce_swapped, + s3_second.bytes + ); + + uint32_t logical_h0 = ((const uint32_t *)sw_hash->bytes)[7]; + uint32_t raw_h0 = __builtin_bswap32(logical_h0); + + Serial.println("[S3-DBG] -------- Candidate trace --------"); + Serial.printf("[S3-DBG] nonce_native=%08x nonce_swapped=%08x\n", nonce_native, nonce_swapped); + Serial.printf("[S3-DBG] header[76..79]=%02x %02x %02x %02x\n", + ((uint8_t *)&hdr_sw)[76], ((uint8_t *)&hdr_sw)[77], + ((uint8_t *)&hdr_sw)[78], ((uint8_t *)&hdr_sw)[79]); + dump_hex_compact("[S3-DBG] hdr_sw", (const uint8_t *)&hdr_sw, 80); + dump_hex_compact("[S3-DBG] hdr_s3", s3_header, 80); + dump_hex_compact("[S3-DBG] midstate_input64", (const uint8_t *)&hdr_sw, 64); + dump_hex_compact("[S3-DBG] sw_first", sw_first.bytes, 32); + dump_hex_compact("[S3-DBG] sw_second", sw_second.bytes, 32); + if (s3_verify_ok) { + dump_hex_compact("[S3-DBG] s3_second", s3_second.bytes, 32); + } else { + Serial.println("[S3-DBG] s3_second="); + } + dump_hex_compact("[S3-DBG] pool_target", s_poolTarget, 32); + Serial.printf("[S3-DBG] prefilter_word_raw_h0=%08x upper16=%04x lower16=%04x\n", + raw_h0, (uint16_t)(raw_h0 >> 16), (uint16_t)(raw_h0 & 0xFFFF)); + Serial.printf("[S3-DBG] sw_prefilter=%s full_target=%s sw_verify=%s\n", + (sw_hash->bytes[31] == 0 && sw_hash->bytes[30] == 0) ? "PASS" : "FAIL", + check_target(sw_hash->bytes, s_poolTarget) ? "PASS" : "FAIL", + sw_verified ? "PASS" : "FAIL"); + Serial.println("[S3-DBG] ---------------------------------"); +} +#endif + // ============================================================ // Merkle Root Calculation // ============================================================ @@ -304,16 +484,16 @@ static void hashCheck(const char *jobId, sha256_hash_t *ctx, uint32_t timestamp, // Check against block target (lottery win!) if (check_target(ctx->bytes, s_blockTarget)) { - Serial.println("[MINER] *** BLOCK SOLUTION FOUND! ***"); + log_line("[MINER] *** BLOCK SOLUTION FOUND! ***"); flags |= SUBMIT_FLAG_BLOCK; s_stats.blocks++; } double shareDiff = getDifficulty(ctx); - Serial.printf("[MINER] Share found! Diff: %.4f (pool: %.4f) Nonce: %08x\n", shareDiff, s_poolDifficulty, nonce); + log_linef("[MINER] Share found! Diff: %.4f (pool: %.4f) Nonce: %08x", shareDiff, s_poolDifficulty, nonce); // Debug logging for share validation (Issue #5 investigation) - #if defined(CONFIG_IDF_TARGET_ESP32S3) || defined(DEBUG_SHARE_VALIDATION) + #if defined(DEBUG_SHARE_VALIDATION) Serial.printf("[SHARE] job=%s time=%08x nonce=%08x\n", jobId, timestamp, nonce); Serial.printf("[SHARE] hash[28-31]=%02x%02x%02x%02x (should have leading zeros)\n", ctx->bytes[28], ctx->bytes[29], ctx->bytes[30], ctx->bytes[31]); @@ -367,7 +547,7 @@ void run_sha_benchmark() { sha256_pipelined_mine_v3(sha_base, header, &nonce, &hashes, &active); } uint32_t t1 = micros(); - Serial.printf("[BENCHMARK] v3: %u us for %llu hashes (%.2f kH/s)\n", + Serial.printf("[BENCHMARK] v3: %u us for %llu hashes (%.2f kH/s)\n", t1-t0, hashes, (double)hashes*1000.0/(t1-t0)); Serial.println("[BENCHMARK] Running v4 (100k hashes)..."); @@ -378,11 +558,459 @@ void run_sha_benchmark() { sha256_pipelined_mine_v4(sha_base, midstate, tail, &nonce, &hashes, &active); } t1 = micros(); - Serial.printf("[BENCHMARK] v4: %u us for %llu hashes (%.2f kH/s)\n", + Serial.printf("[BENCHMARK] v4: %u us for %llu hashes (%.2f kH/s)\n", t1-t0, hashes, (double)hashes*1000.0/(t1-t0)); } #endif +#ifdef DEBUG_SHARE_VALIDATION +static void dump_u32_words(const char *label, const uint32_t *words, size_t count) { + Serial.print(label); + Serial.print("="); + for (size_t i = 0; i < count; i++) { + Serial.printf("%08x", words[i]); + if (i + 1 < count) Serial.print(" "); + } + Serial.println(); +} + +static void build_first_tail_block(uint8_t out[64], const block_header_t *hdr) { + memset(out, 0, 64); + memcpy(out, ((const uint8_t *)hdr) + 64, 16); + out[16] = 0x80; + out[62] = 0x02; + out[63] = 0x80; +} + +static void build_second_block(uint8_t out[64], const uint8_t firstDigest[32]) { + memset(out, 0, 64); + memcpy(out, firstDigest, 32); + out[32] = 0x80; + out[62] = 0x01; + out[63] = 0x00; +} + +static void write_be32(uint8_t *dst, uint32_t v) { + dst[0] = (uint8_t)(v >> 24); + dst[1] = (uint8_t)(v >> 16); + dst[2] = (uint8_t)(v >> 8); + dst[3] = (uint8_t)(v); +} + +static void build_first_tail_block_prepared(uint8_t out[64], const sha256_tail_schedule_cache_t *cache, uint32_t nonce) { + memset(out, 0, 64); + write_be32(out + 0, cache->w0); + write_be32(out + 4, cache->w1); + write_be32(out + 8, cache->w2); + write_be32(out + 12, __builtin_bswap32(nonce)); + write_be32(out + 16, 0x80000000u); + write_be32(out + 60, 0x00000280u); +} + +static bool run_prepared_equivalence_nonce_window( + const char *name, + const block_header_t *hdr, + uint32_t startNonce, + uint32_t count +) { + if (!name || !hdr || count == 0) return false; + + block_header_t base = *hdr; + base.nonce = startNonce; + + sha256_hash_t swMidstate; + miner_sha256_midstate(&swMidstate, &base); + + sha256_tail_schedule_cache_t cache = {0}; + miner_sha256_prepare_tail_schedule(&cache, &base); + + for (uint32_t i = 0; i < count; i++) { + block_header_t hb = base; + hb.nonce = startNonce + i; + + sha256_hash_t legacyFirst = {0}; + sha256_hash_t legacySecond = {0}; + sha256_hash_t preparedFirst = {0}; + sha256_hash_t preparedSecond = {0}; + + miner_sha256_complete_from_midstate(&swMidstate, &hb, &legacyFirst, &legacySecond); + miner_sha256_complete_from_midstate_prepared(&swMidstate, &cache, hb.nonce, &preparedFirst, &preparedSecond); + + bool firstEq = (memcmp(legacyFirst.bytes, preparedFirst.bytes, 32) == 0); + bool secondEq = (memcmp(legacySecond.bytes, preparedSecond.bytes, 32) == 0); + if (!firstEq || !secondEq) { + uint8_t legacyTail[64]; + uint8_t preparedTail[64]; + build_first_tail_block(legacyTail, &hb); + build_first_tail_block_prepared(preparedTail, &cache, hb.nonce); + + Serial.printf("[PREPARED-TEST] %s nonce=%08lx (swapped=%08lx) mismatch first=%s second=%s\n", + name, + (unsigned long)hb.nonce, + (unsigned long)__builtin_bswap32(hb.nonce), + firstEq ? "NO" : "YES", + secondEq ? "NO" : "YES"); + dump_hex_compact("[PREPARED-TEST] legacy_tail", legacyTail, 64); + dump_hex_compact("[PREPARED-TEST] prepared_tail", preparedTail, 64); + dump_hex_compact("[PREPARED-TEST] legacy_first", legacyFirst.bytes, 32); + dump_hex_compact("[PREPARED-TEST] prepared_first", preparedFirst.bytes, 32); + dump_hex_compact("[PREPARED-TEST] legacy_second", legacySecond.bytes, 32); + dump_hex_compact("[PREPARED-TEST] prepared_second", preparedSecond.bytes, 32); + Serial.printf("[PREPARED-TEST] %s first divergence point: %s\n", + name, + firstEq ? "second SHA digest" : "first SHA digest"); + return false; + } + } + + return true; +} + +static bool parse_header_hex_80(const char *hex, block_header_t *out) { + if (!hex || !out) return false; + if (strlen(hex) != 160) return false; + memset(out, 0, sizeof(*out)); + hexToBytes((uint8_t *)out, hex, 160); + return true; +} + +static bool run_midstate_vector_test( + const char *name, + const block_header_t *hdr, + const char *expectedFirstHex, + const char *expectedSecondHex +) { + if (!name || !hdr) return false; + + sha256_hash_t directFirst; + sha256_hash_t directSecond; + sha256_hash_t swMidstate; + sha256_hash_t swFirstFromMid; + sha256_hash_t swSecondFromMid; + + sha256(&directFirst, (uint8_t *)hdr, 80); + sha256(&directSecond, directFirst.bytes, 32); + + miner_sha256_midstate(&swMidstate, (block_header_t *)hdr); + miner_sha256_complete_from_midstate(&swMidstate, hdr, &swFirstFromMid, &swSecondFromMid); + + uint8_t tailBlock[64]; + uint8_t secondBlock[64]; + build_first_tail_block(tailBlock, hdr); + build_second_block(secondBlock, swFirstFromMid.bytes); + + dump_hex_compact("[MINER-TEST] header80", (const uint8_t *)hdr, 80); + dump_hex_compact("[MINER-TEST] direct_first", directFirst.bytes, 32); + dump_hex_compact("[MINER-TEST] direct_second", directSecond.bytes, 32); + dump_hex_compact("[MINER-TEST] midstate_raw", swMidstate.bytes, 32); + dump_u32_words("[MINER-TEST] midstate_words", swMidstate.hash, 8); + dump_hex_compact("[MINER-TEST] first_tail_block", tailBlock, 64); + dump_hex_compact("[MINER-TEST] midstate_first", swFirstFromMid.bytes, 32); + dump_hex_compact("[MINER-TEST] second_input_block", secondBlock, 64); + dump_hex_compact("[MINER-TEST] midstate_second", swSecondFromMid.bytes, 32); + + bool direct80Ok = true; + bool swMidCompletionOk = (memcmp(swSecondFromMid.bytes, directSecond.bytes, 32) == 0); + + if (expectedFirstHex && strlen(expectedFirstHex) == 64) { + uint8_t expectedFirst[32]; + hexToBytes(expectedFirst, expectedFirstHex, 64); + direct80Ok = (memcmp(directFirst.bytes, expectedFirst, 32) == 0); + } + + if (expectedSecondHex && strlen(expectedSecondHex) == 64) { + uint8_t expectedSecond[32]; + hexToBytes(expectedSecond, expectedSecondHex, 64); + direct80Ok = direct80Ok && (memcmp(directSecond.bytes, expectedSecond, 32) == 0); + } + + bool hwMidCompletionOk = true; + bool s3VerifyCompletionOk = true; + +#if defined(CONFIG_IDF_TARGET_ESP32S3) + uint32_t header_swapped[20]; + const uint32_t *header_words = (const uint32_t *)hdr; + for (int i = 0; i < 20; i++) { + header_swapped[i] = __builtin_bswap32(header_words[i]); + } + + uint32_t hwMidstate[8] = {0}; + + esp_sha_acquire_hardware(); + sha256_s3_compute_midstate(header_swapped, hwMidstate); + esp_sha_release_hardware(); + + // Midstate-only compare (no SHA_H restore/continue path). + bool hwMidRawEq = (memcmp(hwMidstate, swMidstate.hash, sizeof(hwMidstate)) == 0); + uint32_t hwMidSwapped[8]; + for (int i = 0; i < 8; i++) hwMidSwapped[i] = __builtin_bswap32(hwMidstate[i]); + bool hwMidSwapEq = (memcmp(hwMidSwapped, swMidstate.hash, sizeof(hwMidSwapped)) == 0); + hwMidCompletionOk = hwMidRawEq || hwMidSwapEq; + + Serial.println("[S3-TEST] SHA_H restore+continue path marked unsupported; skipped in normal boot tests"); + +#if S3_RESTORE_MATRIX_DEBUG + if (expectedFirstHex && strlen(expectedFirstHex) == 64) { + uint8_t expectedFirst[32]; + hexToBytes(expectedFirst, expectedFirstHex, 64); + + static const uint32_t knownLiveMidstateWords[8] = { + 0x587002b3, 0xb4886f0d, 0xb54d02f5, 0x065289ec, + 0x0f36da87, 0xff81c170, 0x71bd69d6, 0xfdf0168b + }; + bool swMidWordsMatchKnown = (memcmp(swMidstate.hash, knownLiveMidstateWords, sizeof(knownLiveMidstateWords)) == 0); + Serial.printf("[S3-TEST] software_midstate_words_match_known=%s\n", swMidWordsMatchKnown ? "PASS" : "FAIL"); + + esp_sha_acquire_hardware(); + bool restoreMapPass = sha256_s3_test_restore_mapping( + swMidstate.hash, + ((const uint8_t *)hdr) + 64, + &header_swapped[16], + expectedFirst + ); + esp_sha_release_hardware(); + Serial.printf("[S3-TEST] restore_mapping_any_mode=%s\n", restoreMapPass ? "PASS" : "FAIL"); + } +#endif +#else + hwMidCompletionOk = true; + s3VerifyCompletionOk = true; +#endif + + Serial.printf("[MINER-TEST] direct 80-byte double SHA %s\n", direct80Ok ? "OK" : "FAIL"); + Serial.printf("[MINER-TEST] software midstate completion %s\n", swMidCompletionOk ? "OK" : "FAIL"); + +#if defined(CONFIG_IDF_TARGET_ESP32S3) + // On ESP32-S3: optional S3 midstate restore is unsupported, do not fail software-midstate validation. + Serial.printf("[MINER-TEST] S3 midstate restore optimization %s\n", hwMidCompletionOk ? "OK" : "UNSUPPORTED"); + bool softwareMidstateOk = direct80Ok && swMidCompletionOk; + Serial.printf("[MINER-TEST] %s software-midstate %s\n", name, softwareMidstateOk ? "PASS" : "FAIL"); + return softwareMidstateOk; +#else + // On other platforms: all tests matter + Serial.printf("[MINER-TEST] hardware midstate (compute-only) %s\n", hwMidCompletionOk ? "OK" : "FAIL"); + Serial.printf("[MINER-TEST] hardware restore+continue status %s\n", s3VerifyCompletionOk ? "SKIPPED/UNSUPPORTED" : "UNSUPPORTED"); + bool overall = direct80Ok && swMidCompletionOk && hwMidCompletionOk && s3VerifyCompletionOk; + Serial.printf("[MINER-TEST] %s overall %s\n", name, overall ? "OK" : "FAIL"); + return overall; +#endif +} + +#if defined(CONFIG_IDF_TARGET_ESP32S3) +static void run_s3_hw_experiments(void) { + Serial.printf("[S3-EXP] Enabled (nonces=%u)\n", (unsigned)S3_HW_EXPERIMENTS_NONCES); + + // Experiments use direct SHA register access paths. Keep SHA hardware + // acquired for the whole harness so results are meaningful and repeatable. + esp_sha_acquire_hardware(); + + block_header_t headers[2]; + memset(&headers, 0, sizeof(headers)); + + // Synthetic vector + headers[0].version = 0x20000000; + for (int i = 0; i < 32; i++) { + headers[0].prev_hash[i] = (uint8_t)i; + headers[0].merkle_root[i] = (uint8_t)(0xA0 + i); + } + headers[0].timestamp = 0x6a02268b; + headers[0].difficulty = 0x17021ff0; + headers[0].nonce = 0x2e425c33; + + // Captured live header + parse_header_hex_80( + "000000204ba6eb671af350c5b183bd2467497eca61b4f61eb88901000000000000000000acbb08e6dbac14cb5b23e07499d2f4686972836e871ce6015843d870138514b5cf33026af01f021700000e08", + &headers[1] + ); + + for (int vec = 0; vec < 2; vec++) { + block_header_t hb = headers[vec]; + sha256_hash_t swMid; + sha256_hash_t swFirst; + sha256_hash_t swSecond; + sha256_hash_t swSecondMid; + sha256_s3_verify_trace_t traceA; + sha256_s3_verify_trace_t traceC; + memset(&traceA, 0, sizeof(traceA)); + memset(&traceC, 0, sizeof(traceC)); + + uint32_t header_swapped[20]; + const uint32_t *header_words = (const uint32_t *)&hb; + for (int i = 0; i < 20; i++) { + header_swapped[i] = __builtin_bswap32(header_words[i]); + } + uint32_t nonce_swapped = header_swapped[19]; + + // Baseline software references. + sha256(&swFirst, (uint8_t *)&hb, 80); + sha256(&swSecond, swFirst.bytes, 32); + miner_sha256_midstate(&swMid, &hb); + miner_sha256_complete_from_midstate(&swMid, &hb, &swFirst, &swSecondMid); + + bool swEq = (memcmp(swSecond.bytes, swSecondMid.bytes, 32) == 0); + + // Candidate A: software midstate -> HW restore + continue + HW second SHA. + uint8_t a_out[32] = {0}; + bool a_ok = sha256_s3_verify_trace( + swMid.hash, + (const uint8_t *)&header_swapped[16], + nonce_swapped, + a_out, + &traceA + ); + bool a_match = a_ok && (memcmp(traceA.finalDigestBeBytes, swSecond.bytes, 32) == 0); + + // Candidate C: HW midstate -> HW continue + HW second SHA. + uint32_t hwMid[8] = {0}; + sha256_s3_compute_midstate(header_swapped, hwMid); + + uint8_t c_out[32] = {0}; + bool c_ok = sha256_s3_verify_trace( + hwMid, + (const uint8_t *)&header_swapped[16], + nonce_swapped, + c_out, + &traceC + ); + bool c_match = c_ok && (memcmp(traceC.finalDigestBeBytes, swSecond.bytes, 32) == 0); + + // Candidate B: software first hash + HW second SHA only. + uint8_t b_second[32] = {0}; + bool b_ok = sha256_s3_second_sha_from_first_be(swFirst.bytes, b_second); + bool b_match = b_ok && (memcmp(b_second, swSecond.bytes, 32) == 0); + + Serial.printf("[S3-EXP] vec=%d sw_mid_eq_direct=%s A(swMid->HW)=%s C(hwMid->HW)=%s B(HW second only)=%s\n", + vec, + swEq ? "PASS" : "FAIL", + a_match ? "PASS" : "FAIL", + c_match ? "PASS" : "FAIL", + b_match ? "PASS" : "FAIL"); + + // Microbench (no serial inside loop). + const uint32_t loops = (uint32_t)S3_HW_EXPERIMENTS_NONCES; + uint32_t t0, t1; + uint64_t dummy = 0; + + // SW midstate completion baseline. + block_header_t hbSw = hb; + t0 = micros(); + for (uint32_t i = 0; i < loops; i++) { + hbSw.nonce++; + miner_sha256_complete_from_midstate(&swMid, &hbSw, &swFirst, &swSecondMid); + dummy += swSecondMid.bytes[0]; + } + t1 = micros(); + float sw_hs = (loops * 1000000.0f) / (float)(t1 - t0); + + // Candidate A benchmark. + uint32_t nonceA = nonce_swapped; + t0 = micros(); + for (uint32_t i = 0; i < loops; i++) { + sha256_s3_verify(swMid.hash, (const uint8_t *)&header_swapped[16], nonceA, a_out); + nonceA++; + dummy += a_out[0]; + } + t1 = micros(); + float a_hs = (loops * 1000000.0f) / (float)(t1 - t0); + + // Candidate C benchmark. + uint32_t nonceC = nonce_swapped; + t0 = micros(); + for (uint32_t i = 0; i < loops; i++) { + sha256_s3_verify(hwMid, (const uint8_t *)&header_swapped[16], nonceC, c_out); + nonceC++; + dummy += c_out[0]; + } + t1 = micros(); + float c_hs = (loops * 1000000.0f) / (float)(t1 - t0); + + // Candidate B benchmark (full cost: SW first + HW second). + block_header_t hbB = hb; + t0 = micros(); + for (uint32_t i = 0; i < loops; i++) { + hbB.nonce++; + sha256(&swFirst, (uint8_t *)&hbB, 80); + sha256_s3_second_sha_from_first_be(swFirst.bytes, b_second); + dummy += b_second[0]; + } + t1 = micros(); + float b_hs = (loops * 1000000.0f) / (float)(t1 - t0); + + Serial.printf("[S3-EXP] vec=%d bench SW=%.1fH/s A=%.1fH/s C=%.1fH/s B=%.1fH/s dummy=%llu\n", + vec, sw_hs, a_hs, c_hs, b_hs, dummy); + } + + esp_sha_release_hardware(); +} +#endif + +static bool run_debug_regression_checks(void) { + bool overallOk = true; + +#if defined(CONFIG_IDF_TARGET_ESP32S3) + esp_sha_acquire_hardware(); + bool oneBlockIvOk = sha256_s3_test_one_block_from_iv(); + bool secondShaPathsOk = sha256_s3_test_second_sha_paths(); + esp_sha_release_hardware(); +#else + bool oneBlockIvOk = true; + bool secondShaPathsOk = true; +#endif + + block_header_t testHeader; + memset(&testHeader, 0, sizeof(testHeader)); + testHeader.version = 0x20000000; + for (int i = 0; i < 32; i++) { + testHeader.prev_hash[i] = (uint8_t)i; + testHeader.merkle_root[i] = (uint8_t)(0xA0 + i); + } + testHeader.timestamp = 0x6a02268b; + testHeader.difficulty = 0x17021ff0; + testHeader.nonce = 0x2e425c33; + + const uint8_t *raw = (const uint8_t *)&testHeader; + bool nonceEndianOk = (raw[76] == 0x33 && raw[77] == 0x5c && raw[78] == 0x42 && raw[79] == 0x2e); + Serial.printf("[MINER-TEST] nonce endian %s (bytes=%02x %02x %02x %02x)\n", + nonceEndianOk ? "OK" : "FAIL", raw[76], raw[77], raw[78], raw[79]); + + bool syntheticOk = run_midstate_vector_test( + "synthetic", + &testHeader, + NULL, + "a5a91b4098f5228d9fd3aa5f9681b4d44e342fb53d4d87d605ba2af9d76903a1" + ); + bool preparedSyntheticOk = run_prepared_equivalence_nonce_window("synthetic-prepared", &testHeader, 0x00000000u, 10); + + block_header_t liveHeader; + bool liveParsed = parse_header_hex_80( + "000000204ba6eb671af350c5b183bd2467497eca61b4f61eb88901000000000000000000acbb08e6dbac14cb5b23e07499d2f4686972836e871ce6015843d870138514b5cf33026af01f021700000e08", + &liveHeader + ); + bool liveOk = false; + if (liveParsed) { + liveOk = run_midstate_vector_test( + "live-captured", + &liveHeader, + "94808b8a7f5b7e8382ef9590abc201ac078963f1ceeb7ee9efa9ce9277349bed", + "8f155083f7a147506d9c7726eb8058ed423f583ebab45e28daefc7ab25ac8715" + ); + } else { + Serial.println("[MINER-TEST] live-captured parse FAIL"); + } + bool preparedLiveOk = liveParsed && run_prepared_equivalence_nonce_window("live-prepared", &liveHeader, 0x00000000u, 10); + + // oneBlockIvOk includes restore+continue diagnostics, which are intentionally + // unsupported for the stable backend on S3. Keep this visible but non-gating. + Serial.printf("[MINER-TEST] S3 one-block IV diagnostics %s (non-gating)\n", oneBlockIvOk ? "PASS" : "FAIL"); + Serial.printf("[MINER-TEST] S3 second-SHA path diagnostics %s\n", secondShaPathsOk ? "PASS" : "FAIL"); + + overallOk = secondShaPathsOk && nonceEndianOk && syntheticOk && preparedSyntheticOk && liveParsed && liveOk && preparedLiveOk; + Serial.printf("[MINER-TEST] software-midstate self-test %s\n", overallOk ? "PASS" : "FAIL"); + + return overallOk; +} +#endif + void miner_init() { s_jobMutex = xSemaphoreCreateMutex(); s_shaMutex = xSemaphoreCreateMutex(); // For dual-core hardware SHA sharing @@ -394,8 +1022,78 @@ void miner_init() { // Run DMA-based SHA test at startup sha256_s3_dma_test(); - Serial.println("[MINER] Initialized (Hardware SHA-256 via direct register access)"); - Serial.println("[MINER] Dual-core hardware SHA sharing enabled"); + log_line("[MINER] Initialized"); +#if defined(CONFIG_IDF_TARGET_ESP32S3) + log_line("[MINER] Backend: S3 software-midstate dual-core"); + log_line("[MINER] Active hash loop: software double-SHA256 on both cores"); + log_line("[MINER] HW SHA restore/injection: unsupported for correct mining path on ESP32-S3"); + log_line("[MINER] DMA hot path: inactive"); + log_linef("[MINER] Core0 nonce range: 0x%08lx+ | Core1 nonce range: 0x%08lx+", + s_startNonce[0], s_startNonce[1]); +#else + log_line("[MINER] Backend: dual-core hardware SHA sharing"); +#endif + +#ifdef DEBUG_SHARE_VALIDATION + Serial.println("[BUILD] DEBUG_SHARE_VALIDATION enabled"); +#else + Serial.println("[BUILD] DEBUG_SHARE_VALIDATION disabled"); +#endif + +#if S3_DEBUG_NONCE_WINDOW_ENABLE + const uint32_t dbgStartSwapped = (uint32_t)S3_DEBUG_NONCE_WINDOW_START_SWAPPED; + const uint32_t dbgEndSwapped = (uint32_t)S3_DEBUG_NONCE_WINDOW_END_SWAPPED; + const uint32_t dbgStartNative = __builtin_bswap32(dbgStartSwapped); + const uint32_t dbgEndNative = __builtin_bswap32(dbgEndSwapped); + const uint64_t dbgExpected = (uint64_t)dbgEndSwapped - (uint64_t)dbgStartSwapped + 1ULL; + const bool dbgRoundTrip = (__builtin_bswap32(__builtin_bswap32(dbgStartSwapped)) == dbgStartSwapped); + const bool dbgNativeMonotonic = (__builtin_bswap32(dbgStartSwapped + 1U) == (dbgStartNative + 1U)); + + Serial.println("[BUILD] S3_DEBUG_NONCE_WINDOW_ENABLE enabled"); + Serial.printf("[BUILD] S3 window swapped_start=%08x swapped_end=%08x expected_hashes=%llu\n", + dbgStartSwapped, dbgEndSwapped, dbgExpected); + Serial.printf("[BUILD] S3 window native_start=%08x native_end=%08x\n", + dbgStartNative, dbgEndNative); + Serial.printf("[BUILD] bswap roundtrip=%s, native_monotonic_assumption=%s\n", + dbgRoundTrip ? "PASS" : "FAIL", + dbgNativeMonotonic ? "PASS" : "FAIL"); + if (!dbgNativeMonotonic) { + Serial.println("[BUILD] NOTE: Native nonce ordering is not monotonic under swapped-space increments (expected). Use swapped bounds for deterministic windows."); + } + if (dbgExpected <= 0x11ULL) { + Serial.printf("[BUILD] Tiny S3 window sanity: expected hashes=%llu\n", dbgExpected); + } + +#if S3_DEBUG_NONCE_WINDOW_TINY_SELFTEST_ENABLE + const uint32_t tinyStartSwapped = (uint32_t)S3_DEBUG_NONCE_WINDOW_TINY_START_SWAPPED; + const uint32_t tinyEndSwapped = (uint32_t)S3_DEBUG_NONCE_WINDOW_TINY_END_SWAPPED; + const uint64_t tinyExpected = (uint64_t)tinyEndSwapped - (uint64_t)tinyStartSwapped + 1ULL; + Serial.printf("[BUILD] Tiny one-shot self-test enabled: swapped_start=%08x swapped_end=%08x expected_hashes=%llu\n", + tinyStartSwapped, tinyEndSwapped, tinyExpected); +#else + Serial.println("[BUILD] Tiny one-shot self-test disabled"); +#endif +#else + Serial.println("[BUILD] S3_DEBUG_NONCE_WINDOW_ENABLE disabled"); +#endif + +#ifdef DEBUG_SHARE_VALIDATION + bool midstateChecksOk = run_debug_regression_checks(); +#if defined(CONFIG_IDF_TARGET_ESP32S3) + if (midstateChecksOk) { + Serial.println("[MINER] software-midstate self-test: PASS"); + } else { + Serial.println("[MINER] software-midstate self-test: FAIL (check debug logs)"); + } + Serial.println("[MINER] HW midstate restore: unsupported; continuing on validated software-midstate backend"); +#endif +#endif + +#if defined(CONFIG_IDF_TARGET_ESP32S3) && S3_HW_EXPERIMENTS_ENABLE && defined(DEBUG_SHARE_VALIDATION) + // S3 experiment harness currently depends on debug-validation helpers. + // Enable DEBUG_SHARE_VALIDATION in the board env to run this on every boot. + run_s3_hw_experiments(); +#endif #ifdef BENCHMARK_SHA_VERSIONS run_sha_benchmark(); @@ -406,7 +1104,11 @@ void miner_start_job(const stratum_job_t *job) { if (!job) return; // Wait for any active mining to stop + bool wasActive = s_miningActive; s_miningActive = false; + if (wasActive) { + s_jobChanges++; // Track job invalidations (hashes in flight are discarded) + } while (s_core0Mining || s_core1Mining) { vTaskDelay(10 / portTICK_PERIOD_MS); } @@ -448,14 +1150,23 @@ void miner_start_job(const stratum_job_t *job) { bits_to_target(s_pendingBlock.difficulty, s_blockTarget); setPoolTarget(); - // Random nonce start points for each core - s_startNonce[0] = esp_random(); - s_startNonce[1] = s_startNonce[0] + 0x80000000; + // Deterministic nonce split: Core0 gets lower half [0..7FFFFFFF], Core1 gets upper half [80000000..FFFFFFFF] + // Random offset within each half to avoid repeatedly testing the same nonce region across jobs. + // This guarantees no overlap: cores can't cross the halfway boundary within any realistic job lifetime + // at current hashrates (~1000 H/s). Each half contains ~2.1 billion unique nonces. + uint32_t r = esp_random(); + s_startNonce[0] = r & 0x7FFFFFFFu; // Core0: lower half, random start + s_startNonce[1] = 0x80000000u | (r >> 1); // Core1: upper half, random start s_stats.templates++; xSemaphoreGive(s_jobMutex); + Serial.printf("[MINER] Job #%lu %s | Core0=0x%08lx (lo) Core1=0x%08lx (hi) | total_jobs=%lu\n", + (unsigned long)s_stats.templates, + s_currentJobId, + s_startNonce[0], s_startNonce[1], + (unsigned long)s_stats.templates); s_miningActive = true; } @@ -489,28 +1200,36 @@ void miner_set_extranonce(const char *extraNonce1, int extraNonce2Size) { } // ============================================================ -// Mining Task - Core 0 (Hybrid: Hardware SHA when available, Software fallback) +// Mining Task - Core 0 // ============================================================ void miner_task_core0(void *param) { block_header_t hb; sha256_hash_t ctx; sha256_hash_t sw_midstate; // Software midstate for fallback - uint32_t hw_midstate[8]; // Hardware midstate for opportunistic HW SHA char jobId[MAX_JOB_ID_LEN]; uint32_t minerId = 0; uint32_t yieldCounter = 0; - uint32_t hwHashes = 0; // Track hardware SHA usage - uint32_t swHashes = 0; // Track software SHA usage - Serial.printf("[MINER0] Started on core %d (HYBRID HW/SW SHA, priority %d)\n", - xPortGetCoreID(), uxTaskPriorityGet(NULL)); + #if defined(CONFIG_IDF_TARGET_ESP32S3) + log_wait_startup_barrier(); + log_linef("[MINER0] Started on core %d (S3 SOFTWARE-MIDSTATE, nonce-lo, priority %d)", + xPortGetCoreID(), uxTaskPriorityGet(NULL)); + #else + log_wait_startup_barrier(); + log_linef("[MINER0] Started on core %d (HW/SW mixed SHA path, priority %d)", + xPortGetCoreID(), uxTaskPriorityGet(NULL)); + #endif // Wait for first job while (!s_miningActive) { vTaskDelay(100 / portTICK_PERIOD_MS); } - Serial.println("[MINER0] Got first job, starting hybrid mining (HW when Core 1 yields)"); + #if defined(CONFIG_IDF_TARGET_ESP32S3) + log_line("[MINER0] Got first job, starting software-midstate mining"); + #else + log_line("[MINER0] Got first job, starting HW/SW mixed mining"); + #endif while (true) { if (!s_miningActive) { @@ -521,47 +1240,114 @@ void miner_task_core0(void *param) { s_core0Mining = true; - // Copy job data under mutex + // ======================================================== + // JOB INITIALIZATION (once per stratum.notify) + // ======================================================== + + // Copy job data under mutex - both Core0 and Core1 do this + // independently (with deterministic nonce split) xSemaphoreTake(s_jobMutex, portMAX_DELAY); memcpy(&hb, &s_pendingBlock, sizeof(block_header_t)); strncpy(jobId, s_currentJobId, MAX_JOB_ID_LEN); hb.nonce = s_startNonce[minerId]; xSemaphoreGive(s_jobMutex); - // Always compute SOFTWARE midstate (for fallback and verification) + // ======================================================== + // MIDSTATE COMPUTATION (once per job, reused for all nonces) + // ======================================================== + // + // Compute SHA-256 midstate from first 64 bytes of block header + // (version, prev_hash, merkle_root). This saves 50% of the work + // per nonce since we only process the tail (16 bytes) in the loop. + // + // Work: ~1-2 microseconds (one SHA-256 compression block) + // Reuse: Same midstate used for 2^32 nonces (infinite per job) + // + // Why software (not hardware): + // - Hardware SHA requires mutex (contends with Core1) + // - Software path is deterministic and has no synchronization + // - Macro-unrolled 64 rounds are fast and cache-friendly + // - See SHA256_BACKEND_INVESTIGATION.md for detailed analysis + // miner_sha256_midstate(&sw_midstate, &hb); - // Prepare byte-swapped header for hardware SHA - uint32_t header_swapped[20]; - uint32_t *header_words = (uint32_t *)&hb; - for (int i = 0; i < 20; i++) { - header_swapped[i] = __builtin_bswap32(header_words[i]); - } - - // Try to compute hardware midstate if we can grab the mutex - bool hasHwMidstate = false; - if (!s_core1HasSha && xSemaphoreTake(s_shaMutex, 0) == pdTRUE) { - sha256_ll_acquire(); - sha256_ll_midstate(hw_midstate, (const uint8_t *)header_swapped); - sha256_ll_release(); - xSemaphoreGive(s_shaMutex); - hasHwMidstate = true; - } + // ======================================================== + // MINING HOT LOOP (per nonce - millions of iterations) + // ======================================================== + // + // This is the tight loop that runs ~1 million times per second + // on dual core. Every microsecond counts here. + // + // Nonce range (deterministic, no overlap with Core1): + // Core0: 0x00000000 - 0x7FFFFFFF (lower half) + // Core1: 0x80000000 - 0xFFFFFFFF (upper half) + // + // Algorithm per nonce: + // 1. Complete double SHA-256 using midstate (skip first 64 bytes) + // 2. Check 16-bit prefilter (bytes[31] and [30] must be 0) + // 3. If prefilter passes, check full target and submit if valid + // 4. Increment nonce + // 5. Repeat + // + // Performance: ~4-5 microseconds per nonce (two full SHA compressions) + // No locks, no synchronization, deterministic latency. + // while (s_miningActive) { - // Pure software SHA - no hardware contention with Core 1 + // ===== THE MINING HOT PATH ===== + // + // miner_sha256_header() does: + // 1. Load tail block (last 16 bytes + nonce + padding) + // 2. Expand message schedule (compute w[16..63]) + // 3. Process 64 compression rounds from midstate + // 4. Finalize first SHA-256 (H0-H7) + // 5. Feed result through second SHA-256 (64 rounds) + // 6. Return final hash with 16-bit prefilter check + // + // Returns true if hash passes 16-bit test (rare, ~1 in 65536) + // Only then do we spend time on full target check. + // if (miner_sha256_header(&sw_midstate, &ctx, &hb)) { + // Potential valid share found (passed 16-bit prefilter) + // Now check full target and submit if valid hashCheck(jobId, &ctx, hb.timestamp, hb.nonce); } - hb.nonce++; - s_stats.hashes++; - s_core0Hashes++; // DEBUG: Track Core 0 contribution - yieldCounter++; - // Yield every 256 hashes to let monitor/WiFi tasks run + hb.nonce++; // Increment nonce for next iteration + s_stats.hashes++; // Update global statistics + s_core0Hashes++; // Core0-specific counter + yieldCounter++; // Track yields to prevent watchdog starvation + + // Nonce range boundary check: Core0 owns lower half [0..7FFFFFFF]. + // Wrapping into the upper half would duplicate Core1's nonce range. + // At current hashrates this takes ~25 days; log if it somehow occurs. + if (hb.nonce == 0x80000000u) { + Serial.printf("[MINER0] WARN: Core0 nonce wrapped into Core1 range (nonce=%08lx)\n", hb.nonce); + } + + // Yield every CORE_0_YIELD_COUNT hashes to let monitor/WiFi tasks run if (yieldCounter >= CORE_0_YIELD_COUNT) { yieldCounter = 0; - vTaskDelay(1); // Must use vTaskDelay(1), not taskYIELD() +#ifdef DEBUG_HASH_TIMING + { + static uint32_t s_c0_t0 = 0; + static uint64_t s_c0_h0 = 0; + uint32_t t = (uint32_t)micros(); + uint64_t h = s_core0Hashes; + if (s_c0_t0 != 0) { + uint32_t dt = t - s_c0_t0; + uint64_t dh = h - s_c0_h0; + if (dh > 0 && dt > 0) { + Serial.printf("[MINER0-T] %llu h/%u us=%.1fus/h=%.0fH/s\n", + dh, dt, (float)dt / (float)dh, + (float)dh * 1000000.0f / (float)dt); + } + } + s_c0_t0 = t; + s_c0_h0 = h; + } +#endif + vTaskDelay(1); } } @@ -620,8 +1406,9 @@ void miner_task_core1(void *param) { char jobId[MAX_JOB_ID_LEN]; uint32_t minerId = 1; - Serial.printf("[MINER1] Started on core %d (PIPELINED ASM v3, priority %d)\n", - xPortGetCoreID(), uxTaskPriorityGet(NULL)); + log_wait_startup_barrier(); + log_linef("[MINER1] Started on core %d (PIPELINED ASM v3, priority %d)", + xPortGetCoreID(), uxTaskPriorityGet(NULL)); // Enable SHA peripheral clock and clear reset DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_SHA); @@ -631,7 +1418,7 @@ void miner_task_core1(void *param) { while (!s_miningActive) { vTaskDelay(100 / portTICK_PERIOD_MS); } - Serial.println("[MINER1] Got first job, starting pipelined mining v3"); + log_line("[MINER1] Got first job, starting pipelined mining v3"); // SHA peripheral base address volatile uint32_t *sha_base = (volatile uint32_t *)0x3FF03000; // SHA_TEXT_BASE @@ -740,151 +1527,97 @@ void miner_task_core1(void *param) { } #elif defined(CONFIG_IDF_TARGET_ESP32S3) -#include // For esp_sha_acquire/release_hardware -// ESP32-S3: Optimized pipelined assembly mining with MIDSTATE CACHING (v2) -// Key optimizations: -// 1. Hardware midstate computed ONCE per job (not per nonce!) -// 2. Block 2 template prepared once, only nonce changes -// 3. Double-hash padding leverages zeros from block 2 +// ============================================================ +// Mining Task - Core 1 (ESP32-S3 software-midstate) +// ============================================================ +// Architecture: Both cores use validated software SHA-256 +// - Nonce split: Core0 = 0x00000000 to 0x7fffffff, Core1 = 0x80000000 to 0xffffffff +// - Synchronization: Job mutex protects header/job data +// - SHA: Both cores compute independently (no hardware sharing contention) +// - Performance instrumentation: Per-core hash counters + timing void miner_task_core1(void *param) { block_header_t hb; - block_header_t hbVerify; // BitsyMiner pattern: keep UNSWAPPED copy for verification sha256_hash_t ctx; - sha256_hash_t sw_midstate; // SOFTWARE midstate for verification - uint32_t hw_midstate[8]; // HARDWARE midstate for mining (NEW!) + sha256_hash_t sw_midstate; // SOFTWARE midstate (same as Core0, validated path) char jobId[MAX_JOB_ID_LEN]; uint32_t minerId = 1; + uint32_t yieldCounter = 0; - Serial.printf("[MINER1] Started on core %d (S3 Optimized ASM v2 + Midstate Cache, priority %d)\n", - xPortGetCoreID(), uxTaskPriorityGet(NULL)); - - // Initialize S3 pipelined SHA hardware - sha256_pipelined_s3_init(); + log_wait_startup_barrier(); + log_linef("[MINER1] Started on core %d (S3 SOFTWARE-MIDSTATE, nonce-hi, priority %d)", + xPortGetCoreID(), uxTaskPriorityGet(NULL)); // Wait for first job while (!s_miningActive) { vTaskDelay(100 / portTICK_PERIOD_MS); } - Serial.println("[MINER1] Got first job, starting S3 optimized assembly mining (v2 with midstate)"); + log_linef("[MINER1] Got first job, starting software-midstate mining (nonce range 0x%08lx+)", + s_startNonce[minerId]); while (true) { if (!s_miningActive) { + s_core1Mining = false; vTaskDelay(100 / portTICK_PERIOD_MS); continue; } s_core1Mining = true; - // Copy job data + // Copy job data under mutex (same as Core0) xSemaphoreTake(s_jobMutex, portMAX_DELAY); memcpy(&hb, &s_pendingBlock, sizeof(block_header_t)); - memcpy(&hbVerify, &s_pendingBlock, sizeof(block_header_t)); // Keep UNSWAPPED for verification! strncpy(jobId, s_currentJobId, MAX_JOB_ID_LEN); + hb.nonce = s_startNonce[minerId]; xSemaphoreGive(s_jobMutex); - // BitsyMiner pattern: Compute SOFTWARE midstate on UNSWAPPED header (for verification) - miner_sha256_midstate(&sw_midstate, &hbVerify); - - // ======================================== - // BYTESWAP32 all 20 words of header for hardware SHA - // ======================================== - uint32_t header_swapped[20]; - uint32_t *header_words = (uint32_t *)&hb; - for (int i = 0; i < 20; i++) { - header_swapped[i] = __builtin_bswap32(header_words[i]); - } - - // ======================================== - // OPTIMIZATION v3: Compute hardware midstate ONCE per job! - // Also initialize persistent zeros in SHA_TEXT - // ======================================== - esp_sha_acquire_hardware(); - sha256_s3_compute_midstate(header_swapped, hw_midstate); - sha256_s3_init_zeros(); // Set persistent zeros for block 2 padding - - // Prepare block 2 template (words 16-18: last 4 bytes merkle, timestamp, nbits) - // Word 19 (nonce) will be set per iteration - uint32_t block2_template[3]; - block2_template[0] = header_swapped[16]; // merkle_root tail (swapped) - block2_template[1] = header_swapped[17]; // timestamp (swapped) - block2_template[2] = header_swapped[18]; // nbits (swapped) - - // Nonce in big-endian format for hardware SHA - uint32_t nonce_swapped = __builtin_bswap32(s_startNonce[minerId]); - - #ifdef DEBUG_MINING - Serial.printf("[S3-V3] Midstate cached, zeros persistent, starting batched-copy loop\n"); - static uint32_t s3_call_count = 0; - uint64_t hashes_before = s_stats.hashes; - #endif + // Compute SOFTWARE midstate (validated, working, same as Core0) + miner_sha256_midstate(&sw_midstate, &hb); + // Mining loop: pure software SHA, no contention with Core0 while (s_miningActive) { - // Run ULTRA-OPTIMIZED pipelined assembly mining loop (v3) - // - Midstate restore (same as v2) - // - Batched register loads for SHA_H copy (pipeline memory) - // - Persistent zeros (skip writing 10 zeros per iteration) - #ifdef DEBUG_MINING - s3_call_count++; - #endif - - bool candidate = sha256_pipelined_mine_s3_v3( - hw_midstate, - block2_template, - &nonce_swapped, - &s_stats.hashes, - &s_miningActive - ); - - #ifdef DEBUG_MINING - if ((s3_call_count & 0x7FFFF) == 0) { // Every ~512K calls - uint64_t hashes_now = s_stats.hashes; - Serial.printf("[S3-V3] calls=%u, hashes=%llu\n", s3_call_count, hashes_now); + // Perform double-SHA256 with software midstate + if (miner_sha256_header(&sw_midstate, &ctx, &hb)) { + hashCheck(jobId, &ctx, hb.timestamp, hb.nonce); } - #endif - - if (!s_miningActive) break; - - if (candidate) { - // BitsyMiner pattern: The assembly incremented nonce BEFORE exiting - uint32_t candidate_nonce_swapped = nonce_swapped - 1; - uint32_t candidate_nonce_native = __builtin_bswap32(candidate_nonce_swapped); - - // Debug logging for S3 share validation investigation (Issue #5) - #if defined(CONFIG_IDF_TARGET_ESP32S3) || defined(DEBUG_SHARE_VALIDATION) - Serial.printf("[S3-DBG] Candidate found! nonce_swapped=%08x native=%08x\n", - candidate_nonce_swapped, candidate_nonce_native); - #endif - - // BitsyMiner CRITICAL: Verify with SOFTWARE SHA on UNSWAPPED header - hbVerify.nonce = candidate_nonce_native; - bool swVerified = miner_sha256_header(&sw_midstate, &ctx, &hbVerify); - - // Debug logging for S3 share validation investigation (Issue #5) - #if defined(CONFIG_IDF_TARGET_ESP32S3) || defined(DEBUG_SHARE_VALIDATION) - Serial.printf("[S3-DBG] SW verify=%s hash[28-31]=%02x%02x%02x%02x\n", - swVerified ? "PASS" : "FAIL", - ctx.bytes[28], ctx.bytes[29], ctx.bytes[30], ctx.bytes[31]); - #endif + hb.nonce++; + s_stats.hashes++; + s_core1Hashes++; + yieldCounter++; - if (swVerified) { - hashCheck(jobId, &ctx, hbVerify.timestamp, candidate_nonce_native); - } + // Nonce range boundary check: Core1 owns upper half [80000000..FFFFFFFF]. + // Wrapping through 0 would duplicate Core0's nonce range. + // At current hashrates this takes ~25 days; log if it somehow occurs. + if (hb.nonce == 0x00000000u) { + Serial.printf("[MINER1] WARN: Core1 nonce wrapped through 0 into Core0 range\n"); } - // Yield periodically to prevent WDT - // The ASM function returns every ~65k hashes (on partial match), - // so we yield every 16 iterations (approx 1M hashes) - static uint32_t loop_iter = 0; - if (++loop_iter >= 16) { - loop_iter = 0; - esp_sha_release_hardware(); + // Yield every CORE_0_YIELD_COUNT hashes to let monitor/WiFi tasks run + if (yieldCounter >= CORE_0_YIELD_COUNT) { + yieldCounter = 0; +#ifdef DEBUG_HASH_TIMING + { + static uint32_t s_c1_t0 = 0; + static uint64_t s_c1_h0 = 0; + uint32_t t = (uint32_t)micros(); + uint64_t h = s_core1Hashes; + if (s_c1_t0 != 0) { + uint32_t dt = t - s_c1_t0; + uint64_t dh = h - s_c1_h0; + if (dh > 0 && dt > 0) { + Serial.printf("[MINER1-T] %llu h/%u us=%.1fus/h=%.0fH/s\n", + dh, dt, (float)dt / (float)dh, + (float)dh * 1000000.0f / (float)dt); + } + } + s_c1_t0 = t; + s_c1_h0 = h; + } +#endif vTaskDelay(1); - esp_sha_acquire_hardware(); } } - esp_sha_release_hardware(); s_core1Mining = false; vTaskDelay(20 / portTICK_PERIOD_MS); } @@ -899,14 +1632,15 @@ void miner_task_core1(void *param) { char jobId[MAX_JOB_ID_LEN]; uint32_t minerId = 1; - Serial.printf("[MINER1] Started on core %d (Hardware SHA Midstate, priority %d)\n", - xPortGetCoreID(), uxTaskPriorityGet(NULL)); + log_wait_startup_barrier(); + log_linef("[MINER1] Started on core %d (Hardware SHA Midstate, priority %d)", + xPortGetCoreID(), uxTaskPriorityGet(NULL)); // Wait for first job while (!s_miningActive) { vTaskDelay(100 / portTICK_PERIOD_MS); } - Serial.println("[MINER1] Got first job, starting mining loop"); + log_line("[MINER1] Got first job, starting mining loop"); while (true) { if (!s_miningActive) { diff --git a/src/mining/miner.h b/src/mining/miner.h index 84c885d..3b51cf4 100644 --- a/src/mining/miner.h +++ b/src/mining/miner.h @@ -18,6 +18,17 @@ #include "sha256_hw.h" #include "../stratum/stratum_types.h" +typedef struct { + const char *chip; + const char *miningBackend; + bool hwShaAvailable; + bool hwShaHotLoop; + bool softwareMidstate; + bool dmaHotPath; + bool midstateRestoreSupported; + bool nonceSplitCore0LowCore1High; +} miner_backend_info_t; + /** * Initialize mining subsystem * - Disables watchdog timer @@ -51,14 +62,17 @@ bool miner_is_running(); mining_stats_t* miner_get_stats(); /** - * Mining task for Core 0 (software SHA, lower priority) + * Mining task for Core 0 + * - ESP32-S3: software midstate-complete path (nonce low half) + * - ESP32: hybrid/software helper path * Yields periodically to allow WiFi/Stratum/Display tasks */ void miner_task_core0(void *param); /** * Mining task for Core 1 (dedicated, high priority) - * Uses pipelined SHA for maximum throughput + * - ESP32-S3: software midstate-complete path (nonce high half) + * - ESP32: pipelined SHA hardware path */ void miner_task_core1(void *param); @@ -77,4 +91,10 @@ double miner_get_difficulty(); */ void miner_set_extranonce(const char *extraNonce1, int extraNonce2Size); +/** + * Backend capabilities and active-mode summary. + * Single source of truth for startup/status reporting. + */ +const miner_backend_info_t *miner_get_backend_info(); + #endif // MINER_H diff --git a/src/mining/miner_sha256.cpp b/src/mining/miner_sha256.cpp index e8bbfc9..77a1275 100644 --- a/src/mining/miner_sha256.cpp +++ b/src/mining/miner_sha256.cpp @@ -14,6 +14,8 @@ #define BYTESWAP32(z) ((uint32_t)((z&0xFF)<<24|((z>>8)&0xFF)<<16|((z>>16)&0xFF)<<8|((z>>24)&0xFF))) #define RROT(v, s) ((v)>>(s) | (v)<<(32-(s))) +#define SSIG0(v) (RROT((v),7) ^ RROT((v),18) ^ ((v) >> 3)) +#define SSIG1(v) (RROT((v),17) ^ RROT((v),19) ^ ((v) >> 10)) #define R1_a(i) (w[i] = w[i-16] + (RROT(w[i-15],7) ^ (RROT(w[i-15],18) ^ (w[i-15] >> 3))) + ((RROT(w[i-2],17) ^ RROT(w[i-2],19) ^ (w[i-2] >> 10)))) #define R1(i) (w[i] = w[i-16] + (RROT(w[i-15],7) ^ (RROT(w[i-15],18) ^ (w[i-15] >> 3))) + w[i-7] + ((RROT(w[i-2],17) ^ RROT(w[i-2],19) ^ (w[i-2] >> 10)))) @@ -60,22 +62,62 @@ static uint32_t k[] = { 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; +static const WORD kTailPadWord = 0x80000000u; +static const WORD kTailLenWord = 0x00000280u; +static const WORD kSigma0PadWord = 0x11002000u; // sigma0(0x80000000) +static const WORD kSigma0LenWord = 0x00a00055u; // sigma0(0x00000280) +static const WORD kSigma1LenWord = 0x01100000u; // sigma1(0x00000280) + +#ifndef MINER_EXPERIMENTAL_COMPRESSOR +#define MINER_EXPERIMENTAL_COMPRESSOR 0 +#endif + +#if MINER_EXPERIMENTAL_COMPRESSOR +#define BSIG0(v) (RROT((v), 2) ^ RROT((v), 13) ^ RROT((v), 22)) +#define BSIG1(v) (RROT((v), 6) ^ RROT((v), 11) ^ RROT((v), 25)) +#define CHX(e,f,g) (((e) & (f)) ^ (~(e) & (g))) +#define MAJX(a,b,c) (((a) & (b)) ^ ((a) & (c)) ^ ((b) & (c))) + +static IRAM_ATTR void sha256_compress_64_experimental( + WORD *a, + WORD *b, + WORD *c, + WORD *d, + WORD *e, + WORD *f, + WORD *g, + WORD *h, + const WORD *w +) { + WORD ra = *a, rb = *b, rc = *c, rd = *d; + WORD re = *e, rf = *f, rg = *g, rh = *h; + + for (int i = 0; i < 64; i++) { + const WORD t1 = rh + BSIG1(re) + CHX(re, rf, rg) + k[i] + w[i]; + const WORD t2 = BSIG0(ra) + MAJX(ra, rb, rc); + rh = rg; + rg = rf; + rf = re; + re = rd + t1; + rd = rc; + rc = rb; + rb = ra; + ra = t1 + t2; + } + + *a = ra; *b = rb; *c = rc; *d = rd; + *e = re; *f = rf; *g = rg; *h = rh; +} +#endif -static void sha256_transform(sha256_hash_t *ctx, uint8_t *msg) { + +static void IRAM_ATTR sha256_transform(sha256_hash_t *ctx, uint8_t *msg) { WORD w[64]; WORD temp1, temp2; WORD i, j; - WORD WA[8] = { - ctx->hash[0], - ctx->hash[1], - ctx->hash[2], - ctx->hash[3], - ctx->hash[4], - ctx->hash[5], - ctx->hash[6], - ctx->hash[7] - }; + WORD a = ctx->hash[0], b = ctx->hash[1], c = ctx->hash[2], d = ctx->hash[3]; + WORD e = ctx->hash[4], f = ctx->hash[5], g = ctx->hash[6], h = ctx->hash[7]; // Copy chunk into first 16 words w[0..15] of the message schedule array for (i = 0, j = 0; i < 16; ++i, j += 4) { @@ -88,90 +130,26 @@ static void sha256_transform(sha256_hash_t *ctx, uint8_t *msg) { R1(46); R1(47); R1(48); R1(49); R1(50); R1(51); R1(52); R1(53); R1(54); R1(55); R1(56); R1(57); R1(58); R1(59); R1(60); R1(61); R1(62); R1(63); - CM(0, 1, 2, 3, 4, 5, 6, 7, 0); - CM(7, 0, 1, 2, 3, 4, 5, 6, 1); - CM(6, 7, 0, 1, 2, 3, 4, 5, 2); - CM(5, 6, 7, 0, 1, 2, 3, 4, 3); - CM(4, 5, 6, 7, 0, 1, 2, 3, 4); - CM(3, 4, 5, 6, 7, 0, 1, 2, 5); - CM(2, 3, 4, 5, 6, 7, 0, 1, 6); - CM(1, 2, 3, 4, 5, 6, 7, 0, 7); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 8); - CM(7, 0, 1, 2, 3, 4, 5, 6, 9); - CM(6, 7, 0, 1, 2, 3, 4, 5, 10); - CM(5, 6, 7, 0, 1, 2, 3, 4, 11); - CM(4, 5, 6, 7, 0, 1, 2, 3, 12); - CM(3, 4, 5, 6, 7, 0, 1, 2, 13); - CM(2, 3, 4, 5, 6, 7, 0, 1, 14); - CM(1, 2, 3, 4, 5, 6, 7, 0, 15); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 16); - CM(7, 0, 1, 2, 3, 4, 5, 6, 17); - CM(6, 7, 0, 1, 2, 3, 4, 5, 18); - CM(5, 6, 7, 0, 1, 2, 3, 4, 19); - CM(4, 5, 6, 7, 0, 1, 2, 3, 20); - CM(3, 4, 5, 6, 7, 0, 1, 2, 21); - CM(2, 3, 4, 5, 6, 7, 0, 1, 22); - CM(1, 2, 3, 4, 5, 6, 7, 0, 23); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 24); - CM(7, 0, 1, 2, 3, 4, 5, 6, 25); - CM(6, 7, 0, 1, 2, 3, 4, 5, 26); - CM(5, 6, 7, 0, 1, 2, 3, 4, 27); - CM(4, 5, 6, 7, 0, 1, 2, 3, 28); - CM(3, 4, 5, 6, 7, 0, 1, 2, 29); - CM(2, 3, 4, 5, 6, 7, 0, 1, 30); - CM(1, 2, 3, 4, 5, 6, 7, 0, 31); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 32); - CM(7, 0, 1, 2, 3, 4, 5, 6, 33); - CM(6, 7, 0, 1, 2, 3, 4, 5, 34); - CM(5, 6, 7, 0, 1, 2, 3, 4, 35); - CM(4, 5, 6, 7, 0, 1, 2, 3, 36); - CM(3, 4, 5, 6, 7, 0, 1, 2, 37); - CM(2, 3, 4, 5, 6, 7, 0, 1, 38); - CM(1, 2, 3, 4, 5, 6, 7, 0, 39); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 40); - CM(7, 0, 1, 2, 3, 4, 5, 6, 41); - CM(6, 7, 0, 1, 2, 3, 4, 5, 42); - CM(5, 6, 7, 0, 1, 2, 3, 4, 43); - CM(4, 5, 6, 7, 0, 1, 2, 3, 44); - CM(3, 4, 5, 6, 7, 0, 1, 2, 45); - CM(2, 3, 4, 5, 6, 7, 0, 1, 46); - CM(1, 2, 3, 4, 5, 6, 7, 0, 47); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 48); - CM(7, 0, 1, 2, 3, 4, 5, 6, 49); - CM(6, 7, 0, 1, 2, 3, 4, 5, 50); - CM(5, 6, 7, 0, 1, 2, 3, 4, 51); - CM(4, 5, 6, 7, 0, 1, 2, 3, 52); - CM(3, 4, 5, 6, 7, 0, 1, 2, 53); - CM(2, 3, 4, 5, 6, 7, 0, 1, 54); - CM(1, 2, 3, 4, 5, 6, 7, 0, 55); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 56); - CM(7, 0, 1, 2, 3, 4, 5, 6, 57); - CM(6, 7, 0, 1, 2, 3, 4, 5, 58); - CM(5, 6, 7, 0, 1, 2, 3, 4, 59); - CM(4, 5, 6, 7, 0, 1, 2, 3, 60); - CM(3, 4, 5, 6, 7, 0, 1, 2, 61); - CM(2, 3, 4, 5, 6, 7, 0, 1, 62); - CM(1, 2, 3, 4, 5, 6, 7, 0, 63); - - ctx->hash[0] += WA[0]; - ctx->hash[1] += WA[1]; - ctx->hash[2] += WA[2]; - ctx->hash[3] += WA[3]; - ctx->hash[4] += WA[4]; - ctx->hash[5] += WA[5]; - ctx->hash[6] += WA[6]; - ctx->hash[7] += WA[7]; + C1(0);C1(1);C1(2);C1(3);C1(4);C1(5);C1(6);C1(7);C1(8);C1(9); + C1(10);C1(11);C1(12);C1(13);C1(14);C1(15);C1(16);C1(17);C1(18);C1(19); + C1(20);C1(21);C1(22);C1(23);C1(24);C1(25);C1(26);C1(27);C1(28);C1(29); + C1(30);C1(31);C1(32);C1(33);C1(34);C1(35);C1(36);C1(37);C1(38);C1(39); + C1(40);C1(41);C1(42);C1(43);C1(44);C1(45);C1(46);C1(47);C1(48);C1(49); + C1(50);C1(51);C1(52);C1(53);C1(54);C1(55);C1(56);C1(57);C1(58);C1(59); + C1(60);C1(61);C1(62);C1(63); + + ctx->hash[0] += a; + ctx->hash[1] += b; + ctx->hash[2] += c; + ctx->hash[3] += d; + ctx->hash[4] += e; + ctx->hash[5] += f; + ctx->hash[6] += g; + ctx->hash[7] += h; } -void miner_sha256(sha256_hash_t *ctx, uint8_t *msg, size_t len) { +void IRAM_ATTR miner_sha256(sha256_hash_t *ctx, uint8_t *msg, size_t len) { ctx->hash[0] = h0; ctx->hash[1] = h1; ctx->hash[2] = h2; @@ -223,7 +201,7 @@ void miner_sha256(sha256_hash_t *ctx, uint8_t *msg, size_t len) { } -void miner_sha256_midstate(sha256_hash_t *ctx, block_header_t *hb) { +void IRAM_ATTR miner_sha256_midstate(sha256_hash_t *ctx, block_header_t *hb) { WORD w[64]; WORD temp1, temp2; WORD a, b, c, d, e, f, g, h; @@ -268,22 +246,209 @@ void miner_sha256_midstate(sha256_hash_t *ctx, block_header_t *hb) { ctx->hash[7] += h; } -bool miner_sha256_header(sha256_hash_t *midpoint, sha256_hash_t *ctx, block_header_t *hb) { - sha256_hash_t tmp; +bool IRAM_ATTR miner_sha256_header(sha256_hash_t *midpoint, sha256_hash_t *ctx, block_header_t *hb) { + miner_sha256_complete_from_midstate(midpoint, hb, NULL, ctx); + + // Keep existing fast prefilter semantics for callers that use the bool return. + // Early reject optimization: Check if upper 16 bits of H0 are zero. + // This filters out ~65,536× more hashes before full target comparison. + // Since pool difficulty is typically around 4 billion, this rejects ~99.99% of hashes + // with just a 2-byte comparison instead of a full 32-byte target check. + return (ctx->bytes[31] == 0 && ctx->bytes[30] == 0); +} + +void IRAM_ATTR miner_sha256_prepare_tail_schedule( + sha256_tail_schedule_cache_t *cache, + const block_header_t *hb +) { + const uint8_t *data = (const uint8_t *)hb; + + cache->w0 = GET_DATA(data, 64); + cache->w1 = GET_DATA(data, 68); + cache->w2 = GET_DATA(data, 72); + + cache->w16 = cache->w0 + SSIG0(cache->w1); + cache->w17 = cache->w1 + SSIG0(cache->w2) + kSigma1LenWord; + cache->s1_w16 = SSIG1(cache->w16); + cache->s1_w17 = SSIG1(cache->w17); +} + +/** + * Complete Bitcoin double SHA-256 from pre-computed first-64-byte midstate. + * + * This is the core mining hash function called once per nonce. + * Time: ~4-5 microseconds + * Work: Two full SHA-256 compression blocks (one for tail, one for doubling) + * + * Algorithm: + * 1. Load tail block (last 16 bytes of header + nonce + padding) + * 2. Expand message schedule (w[16..63] from w[0..15]) + * 3. 64 macro-unrolled compression rounds using midstate as initial state + * 4. Finalize first hash (H0-H7 + midstate) + * 5. Byte-swap result to big-endian for second hash input + * 6. Feed through miner_sha256() for second compression + * 7. Return final 32-byte hash in little-endian (for target comparison) + * + * Why software (not hardware): + * - Hardware SHA requires mutex (dual-core contention) + * - Software is single-core per nonce (no synchronization) + * - Macro-unrolled rounds avoid loop overhead + * - No register I/O latency + * - Proven stable and fast in production (~43-45 kH/s) + * + * Alternative hardware path exists but was disabled because: + * - Lock overhead > hardware acceleration benefit + * - Full 80-byte hardware SHA recomputes cached midstate + * - Midstate restore/continue adds register write overhead + * - Detailed analysis: See SHA256_BACKEND_INVESTIGATION.md + * + * @param midpoint Pre-computed midstate from miner_sha256_midstate() + * @param hb Block header with current nonce + * @param firstOut Optional: First SHA-256 result (only used in testing) + * @param secondOut Final double-SHA-256 result + */ +/* + * Inline-only helper: second SHA-256 of a 32-byte first-hash result. + * + * Existing second-SHA optimization (landed before first-tail schedule cache work). + * Replaces miner_sha256(secondOut, tmp.bytes, 32) in the hot path. + * + * Optimizations vs the generic call: + * 1. No byte-array roundtrip – takes raw first-hash words directly as w[0..7] + * 2. Hardcoded padding: w[8..15] = {0x80000000,0,0,0,0,0,0,0x00000100} + * 3. Schedule simplified where padding words are 0: + * sigma0(0)=0, sigma1(0)=0 eliminate additions for w[16..29] + * sigma1(0x100), sigma0(0x80000000), sigma0(0x100) folded to constants + * 4. IV baked in as register initializers (no ctx struct load/store) + * 5. always_inline lets the compiler optimize registers across both SHAs + * + * Input invariant: fh[i] = a_i + midpoint->hash[i] (pre-BYTESWAP32 value) + * This equals GET_DATA(byteswapped_bytes, i*4), so it is the correct w[i] + * input for standard big-endian SHA-256 message loading. + * + * Pre-computed padding constants (verified): + * sigma1(0) = 0 + * sigma0(0) = 0 + * sigma1(0x00000100) = RROT(0x100,17)^RROT(0x100,19)^(0x100>>10) = 0x00A00000 + * sigma0(0x80000000) = RROT(0x80000000,7)^RROT(0x80000000,18)^(0x80000000>>3) = 0x11002000 + * sigma0(0x00000100) = RROT(0x100,7)^RROT(0x100,18)^(0x100>>3) = 0x00400022 + */ +static IRAM_ATTR void sha256_second_from_first_hash( + const WORD *fh, // fh[0..7] = first hash words (a+mid, not byteswapped) + sha256_hash_t *secondOut +) { + WORD w[64]; WORD temp1, temp2; - uint8_t *data = (uint8_t *)hb; - int i, j; - WORD WA[8] = { - midpoint->hash[0], - midpoint->hash[1], - midpoint->hash[2], - midpoint->hash[3], - midpoint->hash[4], - midpoint->hash[5], - midpoint->hash[6], - midpoint->hash[7] - }; + // Load first hash as w[0..7] (no byte parsing needed) + w[0]=fh[0]; w[1]=fh[1]; w[2]=fh[2]; w[3]=fh[3]; + w[4]=fh[4]; w[5]=fh[5]; w[6]=fh[6]; w[7]=fh[7]; + // Hardcoded 32-byte padding: w[8]=0x80000000, w[9..14]=0, w[15]=0x100 + w[8]=0x80000000u; + w[9]=0; w[10]=0; w[11]=0; w[12]=0; w[13]=0; w[14]=0; + w[15]=0x00000100u; + + // w[16] = w[0] + sigma0(w[1]) + w[9] + sigma1(w[14]) + // = w[0] + sigma0(w[1]) [w[9]=0, sigma1(0)=0, w[14]=0] + w[16] = w[0] + (RROT(w[1],7)^RROT(w[1],18)^(w[1]>>3)); + + // w[17] = w[1] + sigma0(w[2]) + w[10] + sigma1(w[15]) + // = w[1] + sigma0(w[2]) + 0xA00000 [w[10]=0, sigma1(0x100)=0xA00000] + w[17] = w[1] + (RROT(w[2],7)^RROT(w[2],18)^(w[2]>>3)) + 0x00A00000u; + + // w[18] = w[2] + sigma0(w[3]) + w[11] + sigma1(w[16]) [w[11]=0] + w[18] = w[2] + (RROT(w[3],7)^RROT(w[3],18)^(w[3]>>3)) + (RROT(w[16],17)^RROT(w[16],19)^(w[16]>>10)); + + // w[19] = w[3] + sigma0(w[4]) + w[12] + sigma1(w[17]) [w[12]=0] + w[19] = w[3] + (RROT(w[4],7)^RROT(w[4],18)^(w[4]>>3)) + (RROT(w[17],17)^RROT(w[17],19)^(w[17]>>10)); + + // w[20] = w[4] + sigma0(w[5]) + w[13] + sigma1(w[18]) + // = w[4] + sigma0(w[5]) + sigma1(w[18]) [w[13]=0] + w[20] = w[4] + (RROT(w[5],7)^RROT(w[5],18)^(w[5]>>3)) + (RROT(w[18],17)^RROT(w[18],19)^(w[18]>>10)); + + // w[21] = w[5] + sigma0(w[6]) + w[14] + sigma1(w[19]) + // = w[5] + sigma0(w[6]) + sigma1(w[19]) [w[14]=0] + w[21] = w[5] + (RROT(w[6],7)^RROT(w[6],18)^(w[6]>>3)) + (RROT(w[19],17)^RROT(w[19],19)^(w[19]>>10)); + + // w[22] = w[6] + sigma0(w[7]) + w[15] + sigma1(w[20]) + // = w[6] + sigma0(w[7]) + 0x100 + sigma1(w[20]) [w[15]=0x100] + w[22] = w[6] + (RROT(w[7],7)^RROT(w[7],18)^(w[7]>>3)) + 0x00000100u + (RROT(w[20],17)^RROT(w[20],19)^(w[20]>>10)); + + // w[23] = w[7] + sigma0(w[8]) + w[16] + sigma1(w[21]) + // = w[7] + 0x11002000 + w[16] + sigma1(w[21]) [sigma0(0x80000000)=0x11002000] + w[23] = w[7] + 0x11002000u + w[16] + (RROT(w[21],17)^RROT(w[21],19)^(w[21]>>10)); + + // w[24] = w[8] + sigma0(w[9]) + w[17] + sigma1(w[22]) + // = 0x80000000 + w[17] + sigma1(w[22]) [sigma0(0)=0, w[9]=0] + w[24] = 0x80000000u + w[17] + (RROT(w[22],17)^RROT(w[22],19)^(w[22]>>10)); + + // w[25] = w[9] + sigma0(w[10]) + w[18] + sigma1(w[23]) + // = w[18] + sigma1(w[23]) [w[9]=0, sigma0(0)=0, w[10]=0] + w[25] = w[18] + (RROT(w[23],17)^RROT(w[23],19)^(w[23]>>10)); + + // w[26] = w[10] + sigma0(w[11]) + w[19] + sigma1(w[24]) + // = w[19] + sigma1(w[24]) [w[10]=0, sigma0(0)=0, w[11]=0] + w[26] = w[19] + (RROT(w[24],17)^RROT(w[24],19)^(w[24]>>10)); + + // w[27] = w[11] + sigma0(w[12]) + w[20] + sigma1(w[25]) + // = w[20] + sigma1(w[25]) [w[11]=0, sigma0(0)=0, w[12]=0] + w[27] = w[20] + (RROT(w[25],17)^RROT(w[25],19)^(w[25]>>10)); + + // w[28] = w[12] + sigma0(w[13]) + w[21] + sigma1(w[26]) + // = w[21] + sigma1(w[26]) [w[12]=0, sigma0(0)=0, w[13]=0] + w[28] = w[21] + (RROT(w[26],17)^RROT(w[26],19)^(w[26]>>10)); + + // w[29] = w[13] + sigma0(w[14]) + w[22] + sigma1(w[27]) + // = w[22] + sigma1(w[27]) [w[13]=0, sigma0(0)=0, w[14]=0] + w[29] = w[22] + (RROT(w[27],17)^RROT(w[27],19)^(w[27]>>10)); + + // w[30] = w[14] + sigma0(w[15]) + w[23] + sigma1(w[28]) + // = 0x400022 + w[23] + sigma1(w[28]) [w[14]=0, sigma0(0x100)=0x400022] + w[30] = 0x00400022u + w[23] + (RROT(w[28],17)^RROT(w[28],19)^(w[28]>>10)); + + // w[31] = w[15] + sigma0(w[16]) + w[24] + sigma1(w[29]) [w[15]=0x100 constant] + w[31] = 0x00000100u + (RROT(w[16],7)^RROT(w[16],18)^(w[16]>>3)) + w[24] + (RROT(w[29],17)^RROT(w[29],19)^(w[29]>>10)); + + // w[32..63]: regular expansion, all inputs data-dependent from here + R1(32); R1(33); R1(34); R1(35); R1(36); R1(37); R1(38); R1(39); R1(40); R1(41); + R1(42); R1(43); R1(44); R1(45); R1(46); R1(47); R1(48); R1(49); R1(50); R1(51); + R1(52); R1(53); R1(54); R1(55); R1(56); R1(57); R1(58); R1(59); R1(60); R1(61); + R1(62); R1(63); + + // Compression: start from SHA-256 IV (baked-in constants, no struct load) + WORD a=h0, b=h1, c=h2, d=h3, e=h4, f=h5, g=h6, h=h7; + C1(0);C1(1);C1(2);C1(3);C1(4);C1(5);C1(6);C1(7);C1(8);C1(9); + C1(10);C1(11);C1(12);C1(13);C1(14);C1(15);C1(16);C1(17);C1(18);C1(19); + C1(20);C1(21);C1(22);C1(23);C1(24);C1(25);C1(26);C1(27);C1(28);C1(29); + C1(30);C1(31);C1(32);C1(33);C1(34);C1(35);C1(36);C1(37);C1(38);C1(39); + C1(40);C1(41);C1(42);C1(43);C1(44);C1(45);C1(46);C1(47);C1(48);C1(49); + C1(50);C1(51);C1(52);C1(53);C1(54);C1(55);C1(56);C1(57);C1(58);C1(59); + C1(60);C1(61);C1(62);C1(63); + + // Davies-Meyer addback with byteswap (same as miner_sha256 output format) + secondOut->hash[0] = BYTESWAP32(a + h0); + secondOut->hash[1] = BYTESWAP32(b + h1); + secondOut->hash[2] = BYTESWAP32(c + h2); + secondOut->hash[3] = BYTESWAP32(d + h3); + secondOut->hash[4] = BYTESWAP32(e + h4); + secondOut->hash[5] = BYTESWAP32(f + h5); + secondOut->hash[6] = BYTESWAP32(g + h6); + secondOut->hash[7] = BYTESWAP32(h + h7); +} + +void IRAM_ATTR miner_sha256_complete_from_midstate( + const sha256_hash_t *midpoint, + const block_header_t *hb, + sha256_hash_t *firstOut, + sha256_hash_t *secondOut +) { +#if !MINER_EXPERIMENTAL_COMPRESSOR + WORD temp1, temp2; +#endif + const uint8_t *data = (const uint8_t *)hb; + + WORD a = midpoint->hash[0], b = midpoint->hash[1], c = midpoint->hash[2], d = midpoint->hash[3]; + WORD e = midpoint->hash[4], f = midpoint->hash[5], g = midpoint->hash[6], h = midpoint->hash[7]; // Second half of block (last 16 bytes of 80-byte header + padding) // w[0..3] = bytes 64-79 (merkle tail, timestamp, nbits, nonce) @@ -303,209 +468,85 @@ bool miner_sha256_header(sha256_hash_t *midpoint, sha256_hash_t *ctx, block_head R1(46); R1(47); R1(48); R1(49); R1(50); R1(51); R1(52); R1(53); R1(54); R1(55); R1(56); R1(57); R1(58); R1(59); R1(60); R1(61); R1(62); R1(63); - CM(0, 1, 2, 3, 4, 5, 6, 7, 0); - CM(7, 0, 1, 2, 3, 4, 5, 6, 1); - CM(6, 7, 0, 1, 2, 3, 4, 5, 2); - CM(5, 6, 7, 0, 1, 2, 3, 4, 3); - CM(4, 5, 6, 7, 0, 1, 2, 3, 4); - CM(3, 4, 5, 6, 7, 0, 1, 2, 5); - CM(2, 3, 4, 5, 6, 7, 0, 1, 6); - CM(1, 2, 3, 4, 5, 6, 7, 0, 7); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 8); - CM(7, 0, 1, 2, 3, 4, 5, 6, 9); - CM(6, 7, 0, 1, 2, 3, 4, 5, 10); - CM(5, 6, 7, 0, 1, 2, 3, 4, 11); - CM(4, 5, 6, 7, 0, 1, 2, 3, 12); - CM(3, 4, 5, 6, 7, 0, 1, 2, 13); - CM(2, 3, 4, 5, 6, 7, 0, 1, 14); - CM(1, 2, 3, 4, 5, 6, 7, 0, 15); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 16); - CM(7, 0, 1, 2, 3, 4, 5, 6, 17); - CM(6, 7, 0, 1, 2, 3, 4, 5, 18); - CM(5, 6, 7, 0, 1, 2, 3, 4, 19); - CM(4, 5, 6, 7, 0, 1, 2, 3, 20); - CM(3, 4, 5, 6, 7, 0, 1, 2, 21); - CM(2, 3, 4, 5, 6, 7, 0, 1, 22); - CM(1, 2, 3, 4, 5, 6, 7, 0, 23); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 24); - CM(7, 0, 1, 2, 3, 4, 5, 6, 25); - CM(6, 7, 0, 1, 2, 3, 4, 5, 26); - CM(5, 6, 7, 0, 1, 2, 3, 4, 27); - CM(4, 5, 6, 7, 0, 1, 2, 3, 28); - CM(3, 4, 5, 6, 7, 0, 1, 2, 29); - CM(2, 3, 4, 5, 6, 7, 0, 1, 30); - CM(1, 2, 3, 4, 5, 6, 7, 0, 31); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 32); - CM(7, 0, 1, 2, 3, 4, 5, 6, 33); - CM(6, 7, 0, 1, 2, 3, 4, 5, 34); - CM(5, 6, 7, 0, 1, 2, 3, 4, 35); - CM(4, 5, 6, 7, 0, 1, 2, 3, 36); - CM(3, 4, 5, 6, 7, 0, 1, 2, 37); - CM(2, 3, 4, 5, 6, 7, 0, 1, 38); - CM(1, 2, 3, 4, 5, 6, 7, 0, 39); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 40); - CM(7, 0, 1, 2, 3, 4, 5, 6, 41); - CM(6, 7, 0, 1, 2, 3, 4, 5, 42); - CM(5, 6, 7, 0, 1, 2, 3, 4, 43); - CM(4, 5, 6, 7, 0, 1, 2, 3, 44); - CM(3, 4, 5, 6, 7, 0, 1, 2, 45); - CM(2, 3, 4, 5, 6, 7, 0, 1, 46); - CM(1, 2, 3, 4, 5, 6, 7, 0, 47); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 48); - CM(7, 0, 1, 2, 3, 4, 5, 6, 49); - CM(6, 7, 0, 1, 2, 3, 4, 5, 50); - CM(5, 6, 7, 0, 1, 2, 3, 4, 51); - CM(4, 5, 6, 7, 0, 1, 2, 3, 52); - CM(3, 4, 5, 6, 7, 0, 1, 2, 53); - CM(2, 3, 4, 5, 6, 7, 0, 1, 54); - CM(1, 2, 3, 4, 5, 6, 7, 0, 55); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 56); - CM(7, 0, 1, 2, 3, 4, 5, 6, 57); - CM(6, 7, 0, 1, 2, 3, 4, 5, 58); - CM(5, 6, 7, 0, 1, 2, 3, 4, 59); - CM(4, 5, 6, 7, 0, 1, 2, 3, 60); - CM(3, 4, 5, 6, 7, 0, 1, 2, 61); - CM(2, 3, 4, 5, 6, 7, 0, 1, 62); - CM(1, 2, 3, 4, 5, 6, 7, 0, 63); +#if MINER_EXPERIMENTAL_COMPRESSOR + sha256_compress_64_experimental(&a, &b, &c, &d, &e, &f, &g, &h, w); +#else + C1(0);C1(1);C1(2);C1(3);C1(4);C1(5);C1(6);C1(7);C1(8);C1(9); + C1(10);C1(11);C1(12);C1(13);C1(14);C1(15);C1(16);C1(17);C1(18);C1(19); + C1(20);C1(21);C1(22);C1(23);C1(24);C1(25);C1(26);C1(27);C1(28);C1(29); + C1(30);C1(31);C1(32);C1(33);C1(34);C1(35);C1(36);C1(37);C1(38);C1(39); + C1(40);C1(41);C1(42);C1(43);C1(44);C1(45);C1(46);C1(47);C1(48);C1(49); + C1(50);C1(51);C1(52);C1(53);C1(54);C1(55);C1(56);C1(57);C1(58);C1(59); + C1(60);C1(61);C1(62);C1(63); +#endif // First hash complete - byte-swap for second hash input - tmp.hash[0] = BYTESWAP32(WA[0] + midpoint->hash[0]); - tmp.hash[1] = BYTESWAP32(WA[1] + midpoint->hash[1]); - tmp.hash[2] = BYTESWAP32(WA[2] + midpoint->hash[2]); - tmp.hash[3] = BYTESWAP32(WA[3] + midpoint->hash[3]); - tmp.hash[4] = BYTESWAP32(WA[4] + midpoint->hash[4]); - tmp.hash[5] = BYTESWAP32(WA[5] + midpoint->hash[5]); - tmp.hash[6] = BYTESWAP32(WA[6] + midpoint->hash[6]); - tmp.hash[7] = BYTESWAP32(WA[7] + midpoint->hash[7]); - - // Copy first hash into working area for double hash - data = (uint8_t *)tmp.hash; - - w[0] = GET_DATA(data, 0); - w[1] = GET_DATA(data, 4); - w[2] = GET_DATA(data, 8); - w[3] = GET_DATA(data, 12); - w[4] = GET_DATA(data, 16); - w[5] = GET_DATA(data, 20); - w[6] = GET_DATA(data, 24); - w[7] = GET_DATA(data, 28); - - w[8] = 0x80000000; - w[9] = w[10] = w[11] = w[12] = w[13] = w[14] = 0; - w[15] = 0x00000100; // 256 bits - - WA[0] = h0; - WA[1] = h1; - WA[2] = h2; - WA[3] = h3; - WA[4] = h4; - WA[5] = h5; - WA[6] = h6; - WA[7] = h7; - - // Abbreviated macros where there is no data - R1_a(16); R1_a(17); R1_a(18); R1_a(19); R1_a(20); R1_a(21); - R1(22); R1(23); - R1_b(24); - R1_c(25); R1_c(26); R1_c(27); R1_c(28); R1_c(29); - R1_d(30); - R1(31); R1(32); R1(33); R1(34); R1(35); - R1(36); R1(37); R1(38); R1(39); R1(40); R1(41); R1(42); R1(43); R1(44); R1(45); - R1(46); R1(47); R1(48); R1(49); R1(50); R1(51); R1(52); R1(53); R1(54); R1(55); - R1(56); R1(57); R1(58); R1(59); R1(60); R1(61); R1(62); R1(63); + // Store as raw (pre-BYTESWAP) words — sha256_second_from_first_hash uses them + // as w[0..7] directly (equivalent to what GET_DATA reads from byteswapped bytes). + WORD fh[8] = { + a + midpoint->hash[0], b + midpoint->hash[1], + c + midpoint->hash[2], d + midpoint->hash[3], + e + midpoint->hash[4], f + midpoint->hash[5], + g + midpoint->hash[6], h + midpoint->hash[7] + }; - CM(0, 1, 2, 3, 4, 5, 6, 7, 0); - CM(7, 0, 1, 2, 3, 4, 5, 6, 1); - CM(6, 7, 0, 1, 2, 3, 4, 5, 2); - CM(5, 6, 7, 0, 1, 2, 3, 4, 3); - CM(4, 5, 6, 7, 0, 1, 2, 3, 4); - CM(3, 4, 5, 6, 7, 0, 1, 2, 5); - CM(2, 3, 4, 5, 6, 7, 0, 1, 6); - CM(1, 2, 3, 4, 5, 6, 7, 0, 7); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 8); - CM(7, 0, 1, 2, 3, 4, 5, 6, 9); - CM(6, 7, 0, 1, 2, 3, 4, 5, 10); - CM(5, 6, 7, 0, 1, 2, 3, 4, 11); - CM(4, 5, 6, 7, 0, 1, 2, 3, 12); - CM(3, 4, 5, 6, 7, 0, 1, 2, 13); - CM(2, 3, 4, 5, 6, 7, 0, 1, 14); - CM(1, 2, 3, 4, 5, 6, 7, 0, 15); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 16); - CM(7, 0, 1, 2, 3, 4, 5, 6, 17); - CM(6, 7, 0, 1, 2, 3, 4, 5, 18); - CM(5, 6, 7, 0, 1, 2, 3, 4, 19); - CM(4, 5, 6, 7, 0, 1, 2, 3, 20); - CM(3, 4, 5, 6, 7, 0, 1, 2, 21); - CM(2, 3, 4, 5, 6, 7, 0, 1, 22); - CM(1, 2, 3, 4, 5, 6, 7, 0, 23); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 24); - CM(7, 0, 1, 2, 3, 4, 5, 6, 25); - CM(6, 7, 0, 1, 2, 3, 4, 5, 26); - CM(5, 6, 7, 0, 1, 2, 3, 4, 27); - CM(4, 5, 6, 7, 0, 1, 2, 3, 28); - CM(3, 4, 5, 6, 7, 0, 1, 2, 29); - CM(2, 3, 4, 5, 6, 7, 0, 1, 30); - CM(1, 2, 3, 4, 5, 6, 7, 0, 31); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 32); - CM(7, 0, 1, 2, 3, 4, 5, 6, 33); - CM(6, 7, 0, 1, 2, 3, 4, 5, 34); - CM(5, 6, 7, 0, 1, 2, 3, 4, 35); - CM(4, 5, 6, 7, 0, 1, 2, 3, 36); - CM(3, 4, 5, 6, 7, 0, 1, 2, 37); - CM(2, 3, 4, 5, 6, 7, 0, 1, 38); - CM(1, 2, 3, 4, 5, 6, 7, 0, 39); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 40); - CM(7, 0, 1, 2, 3, 4, 5, 6, 41); - CM(6, 7, 0, 1, 2, 3, 4, 5, 42); - CM(5, 6, 7, 0, 1, 2, 3, 4, 43); - CM(4, 5, 6, 7, 0, 1, 2, 3, 44); - CM(3, 4, 5, 6, 7, 0, 1, 2, 45); - CM(2, 3, 4, 5, 6, 7, 0, 1, 46); - CM(1, 2, 3, 4, 5, 6, 7, 0, 47); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 48); - CM(7, 0, 1, 2, 3, 4, 5, 6, 49); - CM(6, 7, 0, 1, 2, 3, 4, 5, 50); - CM(5, 6, 7, 0, 1, 2, 3, 4, 51); - CM(4, 5, 6, 7, 0, 1, 2, 3, 52); - CM(3, 4, 5, 6, 7, 0, 1, 2, 53); - CM(2, 3, 4, 5, 6, 7, 0, 1, 54); - CM(1, 2, 3, 4, 5, 6, 7, 0, 55); - - CM(0, 1, 2, 3, 4, 5, 6, 7, 56); - CM(7, 0, 1, 2, 3, 4, 5, 6, 57); - CM(6, 7, 0, 1, 2, 3, 4, 5, 58); - CM(5, 6, 7, 0, 1, 2, 3, 4, 59); - CM(4, 5, 6, 7, 0, 1, 2, 3, 60); - CM(3, 4, 5, 6, 7, 0, 1, 2, 61); - CM(2, 3, 4, 5, 6, 7, 0, 1, 62); - CM(1, 2, 3, 4, 5, 6, 7, 0, 63); - - // Early 16-bit reject - no need to continue if we don't have a good hash - ctx->hash[7] = WA[7] + h7; - if (ctx->hash[7] & 0xffff) return false; - - // Complete the output hash with byte-swap - ctx->hash[0] = BYTESWAP32(WA[0] + h0); - ctx->hash[1] = BYTESWAP32(WA[1] + h1); - ctx->hash[2] = BYTESWAP32(WA[2] + h2); - ctx->hash[3] = BYTESWAP32(WA[3] + h3); - ctx->hash[4] = BYTESWAP32(WA[4] + h4); - ctx->hash[5] = BYTESWAP32(WA[5] + h5); - ctx->hash[6] = BYTESWAP32(WA[6] + h6); - ctx->hash[7] = BYTESWAP32(ctx->hash[7]); + if (firstOut) { + firstOut->hash[0] = BYTESWAP32(fh[0]); + firstOut->hash[1] = BYTESWAP32(fh[1]); + firstOut->hash[2] = BYTESWAP32(fh[2]); + firstOut->hash[3] = BYTESWAP32(fh[3]); + firstOut->hash[4] = BYTESWAP32(fh[4]); + firstOut->hash[5] = BYTESWAP32(fh[5]); + firstOut->hash[6] = BYTESWAP32(fh[6]); + firstOut->hash[7] = BYTESWAP32(fh[7]); + } + + if (secondOut) { + sha256_second_from_first_hash(fh, secondOut); + } +} + +void IRAM_ATTR miner_sha256_complete_from_midstate_prepared( + const sha256_hash_t *midpoint, + const sha256_tail_schedule_cache_t *cache, + uint32_t nonce, + sha256_hash_t *firstOut, + sha256_hash_t *secondOut +) { + // Correctness-first implementation: reconstruct the exact tail bytes from cache+nonce + // and route through the validated legacy completion path. + block_header_t hb = {0}; + uint8_t *data = (uint8_t *)&hb; + + data[64] = (uint8_t)(cache->w0 >> 24); + data[65] = (uint8_t)(cache->w0 >> 16); + data[66] = (uint8_t)(cache->w0 >> 8); + data[67] = (uint8_t)(cache->w0); + + data[68] = (uint8_t)(cache->w1 >> 24); + data[69] = (uint8_t)(cache->w1 >> 16); + data[70] = (uint8_t)(cache->w1 >> 8); + data[71] = (uint8_t)(cache->w1); + + data[72] = (uint8_t)(cache->w2 >> 24); + data[73] = (uint8_t)(cache->w2 >> 16); + data[74] = (uint8_t)(cache->w2 >> 8); + data[75] = (uint8_t)(cache->w2); + + data[76] = (uint8_t)(nonce); + data[77] = (uint8_t)(nonce >> 8); + data[78] = (uint8_t)(nonce >> 16); + data[79] = (uint8_t)(nonce >> 24); + + miner_sha256_complete_from_midstate(midpoint, &hb, firstOut, secondOut); +} - return true; +bool IRAM_ATTR miner_sha256_header_prepared( + const sha256_hash_t *midpoint, + const sha256_tail_schedule_cache_t *cache, + uint32_t nonce, + sha256_hash_t *ctx +) { + miner_sha256_complete_from_midstate_prepared(midpoint, cache, nonce, NULL, ctx); + return (ctx->bytes[31] == 0 && ctx->bytes[30] == 0); } diff --git a/src/mining/miner_sha256.h b/src/mining/miner_sha256.h index 7337eb2..d8866b9 100644 --- a/src/mining/miner_sha256.h +++ b/src/mining/miner_sha256.h @@ -18,6 +18,18 @@ extern "C" { #endif +// Precomputed first-tail schedule data for nonce hot loop. +// Built once per job/template and reused across nonce iterations. +typedef struct { + uint32_t w0; + uint32_t w1; + uint32_t w2; + uint32_t w16; + uint32_t w17; + uint32_t s1_w16; + uint32_t s1_w17; +} sha256_tail_schedule_cache_t; + /** * Standard SHA-256 hash * Output is byte-swapped for little-endian comparison @@ -37,6 +49,23 @@ void miner_sha256(sha256_hash_t *ctx, uint8_t *msg, size_t len); */ void miner_sha256_midstate(sha256_hash_t *ctx, block_header_t *hb); +/** + * Complete Bitcoin double SHA-256 from a precomputed midstate. + * This always computes full outputs (no early reject) and is intended for + * deterministic validation of midstate correctness. + * + * @param midpoint Midstate from miner_sha256_midstate() + * @param hb Full 80-byte header + * @param firstOut First SHA-256 digest (32 bytes) + * @param secondOut Double SHA-256 digest (32 bytes) + */ +void miner_sha256_complete_from_midstate( + const sha256_hash_t *midpoint, + const block_header_t *hb, + sha256_hash_t *firstOut, + sha256_hash_t *secondOut +); + /** * Complete double SHA-256 using pre-computed midstate * Hashes tail (last 16 bytes + nonce) and performs double hash @@ -49,6 +78,48 @@ void miner_sha256_midstate(sha256_hash_t *ctx, block_header_t *hb); */ bool miner_sha256_header(sha256_hash_t *midpoint, sha256_hash_t *ctx, block_header_t *hb); +/** + * Build per-job cache for the first SHA tail schedule. + * Call once after loading a new block header template. + */ +void miner_sha256_prepare_tail_schedule( + sha256_tail_schedule_cache_t *cache, + const block_header_t *hb +); + +/** + * Complete Bitcoin double SHA-256 using a precomputed tail schedule cache. + * + * @param midpoint Midstate from miner_sha256_midstate() + * @param cache Per-job precomputed tail schedule cache + * @param nonce Native-endian nonce value to hash + * @param firstOut Optional first SHA-256 digest output + * @param secondOut Optional second SHA-256 digest output + */ +void miner_sha256_complete_from_midstate_prepared( + const sha256_hash_t *midpoint, + const sha256_tail_schedule_cache_t *cache, + uint32_t nonce, + sha256_hash_t *firstOut, + sha256_hash_t *secondOut +); + +/** + * Hot-loop helper using precomputed tail schedule cache. + * + * @param midpoint Midstate from miner_sha256_midstate() + * @param cache Per-job precomputed tail schedule cache + * @param nonce Native-endian nonce value to hash + * @param ctx Output final hash result + * @return true if hash passes early 16-bit prefilter + */ +bool miner_sha256_header_prepared( + const sha256_hash_t *midpoint, + const sha256_tail_schedule_cache_t *cache, + uint32_t nonce, + sha256_hash_t *ctx +); + #ifdef __cplusplus } #endif diff --git a/src/mining/sha256_ll.cpp b/src/mining/sha256_ll.cpp index 2ad59dc..6d5fd14 100644 --- a/src/mining/sha256_ll.cpp +++ b/src/mining/sha256_ll.cpp @@ -19,6 +19,8 @@ #include #endif +static volatile uint32_t s_wait_idle_timeouts = 0; + // ============================================================================= // Platform-specific register definitions // ============================================================================= @@ -69,11 +71,17 @@ void IRAM_ATTR sha256_ll_wait_idle(void) { uint32_t timeout = 20000; #if defined(CONFIG_IDF_TARGET_ESP32) while (DPORT_REG_READ(SHA_256_BUSY_REG)) { - if (--timeout == 0) break; + if (--timeout == 0) { + s_wait_idle_timeouts++; + break; + } } #else while (REG_READ(SHA_BUSY_REG)) { - if (--timeout == 0) break; + if (--timeout == 0) { + s_wait_idle_timeouts++; + break; + } } #endif } diff --git a/src/mining/sha256_pipelined_s3.cpp b/src/mining/sha256_pipelined_s3.cpp index 01b6638..a3a0a60 100644 --- a/src/mining/sha256_pipelined_s3.cpp +++ b/src/mining/sha256_pipelined_s3.cpp @@ -217,10 +217,8 @@ bool IRAM_ATTR sha256_pipelined_mine_s3( "beqz.n a3, proc_end_s3 \n" // ===== EARLY REJECT: Check H0 upper 16 bits ===== - // For logical H0=0x0000XXXX (share), raw register = 0xXXXX0000 - // So we check if LOWER 16 bits of raw are zero (= upper 16 bits of logical) "l32i.n a3, %[sha_h], 0 \n" // Load full H0 word (raw LE value) - "extui a3, a3, 0, 16 \n" // Extract lower 16 bits (= upper 16 of logical) + "extui a3, a3, 16, 16 \n" // Extract upper 16 bits "beqz.n a3, proc_end_s3 \n" // Exit if potential share "j proc_start_s3 \n" diff --git a/src/mining/sha256_pipelined_s3_v2.cpp b/src/mining/sha256_pipelined_s3_v2.cpp index ba0b679..f00dfa0 100644 --- a/src/mining/sha256_pipelined_s3_v2.cpp +++ b/src/mining/sha256_pipelined_s3_v2.cpp @@ -236,9 +236,9 @@ bool IRAM_ATTR sha256_pipelined_mine_s3_v2( "l8ui a3, %[flag], 0 \n" "beqz.n a3, exit_v2 \n" - // ===== PHASE 11: Early reject - check SHA_H[0] lower 16 bits ===== + // ===== PHASE 11: Early reject - check SHA_H[0] upper 16 bits ===== "l32i a3, a7, 0x40 \n" // Load SHA_H[0] - "extui a3, a3, 0, 16 \n" // Extract lower 16 bits + "extui a3, a3, 16, 16 \n" // Extract upper 16 bits "beqz.n a3, exit_v2 \n" // Exit if potential share! // Continue mining diff --git a/src/mining/sha256_pipelined_s3_v3.cpp b/src/mining/sha256_pipelined_s3_v3.cpp index 3bfb831..a01a277 100644 --- a/src/mining/sha256_pipelined_s3_v3.cpp +++ b/src/mining/sha256_pipelined_s3_v3.cpp @@ -55,7 +55,7 @@ void IRAM_ATTR sha256_s3_init_zeros(void) { */ bool IRAM_ATTR sha256_pipelined_mine_s3_v3( const uint32_t *midstate, // Pre-computed midstate (8 words) - const uint32_t *block2_words, // Block 2 words 0-2 (merkle_tail, timestamp, nbits) - swapped + const uint32_t *block2_words, // Block 2 words 0-2 + word3(stop_plus1_swapped) uint32_t *nonce_ptr, // Current nonce (big-endian/swapped) volatile uint64_t *hash_count_ptr, volatile bool *mining_flag @@ -204,11 +204,22 @@ bool IRAM_ATTR sha256_pipelined_mine_s3_v3( "l8ui a3, %[flag], 0 \n" "beqz.n a3, exit_v3 \n" + // Stop exactly at swapped-space end boundary (end+1 stored in block2_words[3]). + "l32i.n a3, a6, 12 \n" + "beq a2, a3, exit_v3 \n" + // ===== PHASE 11: Early reject ===== + // SHA_H registers are byte-swapped relative to logical SHA-256 words on S3, + // so leading-zero check for logical H0[31:16] maps to RAW lower/upper halves + // depending on read path. Here the inline-ASM path must match software verify, + // which is equivalent to checking RAW upper 16 bits on this code path. "l32i a3, a7, 0x40 \n" // SHA_H[0] - "extui a3, a3, 0, 16 \n" // Lower 16 bits + "extui a3, a3, 16, 16 \n" // Upper 16 bits "beqz.n a3, exit_v3 \n" // Exit if potential share + // Stop exactly at swapped-space end boundary. + "beq a2, a9, exit_v3 \n" + // Continue "j loop_start_v3 \n" @@ -222,7 +233,7 @@ bool IRAM_ATTR sha256_pipelined_mine_s3_v3( [ih] "r"(hash_count_ptr), [nonce] "r"(nonce_ptr), [flag] "r"(mining_flag) - : "a2", "a3", "a4", "a5", "a6", "a7", "a8", "memory" + : "a2", "a3", "a4", "a5", "a6", "a7", "a8", "memory" ); return *mining_flag; diff --git a/src/mining/sha256_s3.cpp b/src/mining/sha256_s3.cpp index b3e93e9..c1a2d27 100644 --- a/src/mining/sha256_s3.cpp +++ b/src/mining/sha256_s3.cpp @@ -35,6 +35,376 @@ static inline bool IRAM_ATTR wait_idle() { return true; } +static void s3_log_words(const char *tag, const char *label, const uint32_t *words, size_t count) { + Serial.printf("%s %s=", tag, label); + for (size_t i = 0; i < count; i++) { + Serial.printf("%08x", words[i]); + if (i + 1 < count) Serial.print(" "); + } + Serial.println(); +} + +static void s3_log_bytes(const char *tag, const char *label, const uint8_t *bytes, size_t count) { + Serial.printf("%s %s=", tag, label); + for (size_t i = 0; i < count; i++) { + Serial.printf("%02x", bytes[i]); + } + Serial.println(); +} + +static void s3_words_to_be_bytes(const uint32_t words[8], uint8_t out[32]) { + for (int i = 0; i < 8; i++) { + out[i * 4 + 0] = (uint8_t)(words[i] >> 24); + out[i * 4 + 1] = (uint8_t)(words[i] >> 16); + out[i * 4 + 2] = (uint8_t)(words[i] >> 8); + out[i * 4 + 3] = (uint8_t)(words[i]); + } +} + +static void s3_words_to_le_bytes(const uint32_t words[8], uint8_t out[32]) { + for (int i = 0; i < 8; i++) { + out[i * 4 + 0] = (uint8_t)(words[i]); + out[i * 4 + 1] = (uint8_t)(words[i] >> 8); + out[i * 4 + 2] = (uint8_t)(words[i] >> 16); + out[i * 4 + 3] = (uint8_t)(words[i] >> 24); + } +} + +static void s3_build_second_sha_block_from_first_digest(const uint8_t first_digest_be[32], uint8_t block64[64]) { + memset(block64, 0, 64); + memcpy(block64, first_digest_be, 32); + block64[32] = 0x80; + block64[62] = 0x01; + block64[63] = 0x00; +} + +static void sha256_s3_transform_midstate_words(uint32_t out[8], const uint32_t in[8], int mode) { + for (int i = 0; i < 8; i++) { + switch (mode) { + case 0: // A + out[i] = in[i]; + break; + case 1: // B + out[i] = __builtin_bswap32(in[i]); + break; + case 2: // C + out[i] = in[7 - i]; + break; + case 3: // D + out[i] = __builtin_bswap32(in[7 - i]); + break; + default: + out[i] = in[i]; + break; + } + } +} + +static const char *sha256_s3_mode_name(int mode) { + switch (mode) { + case 0: return "A"; + case 1: return "B"; + case 2: return "C"; + case 3: return "D"; + default: return "?"; + } +} + +static const char *sha256_s3_tail_mode_name(int mode) { + switch (mode) { + case 0: return "T0"; + case 1: return "T1"; + case 2: return "T2"; + default: return "?"; + } +} + +static inline uint32_t load_le32(const uint8_t *p) { + return ((uint32_t)p[0]) | + ((uint32_t)p[1] << 8) | + ((uint32_t)p[2] << 16) | + ((uint32_t)p[3] << 24); +} + +static inline uint32_t load_be32(const uint8_t *p) { + return ((uint32_t)p[0] << 24) | + ((uint32_t)p[1] << 16) | + ((uint32_t)p[2] << 8) | + ((uint32_t)p[3]); +} + +static inline uint32_t rotr32(uint32_t x, uint32_t n) { + return (x >> n) | (x << (32 - n)); +} + +static void sha256_sw_compress_one_block_from_iv(const uint8_t block[64], uint8_t out_digest[32]) { + static const uint32_t k[64] = { + 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, + 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, + 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, + 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, + 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, + 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, + 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, + 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + }; + + uint32_t w[64]; + for (int i = 0; i < 16; i++) { + w[i] = load_be32(block + (i * 4)); + } + for (int i = 16; i < 64; i++) { + uint32_t s0 = rotr32(w[i - 15], 7) ^ rotr32(w[i - 15], 18) ^ (w[i - 15] >> 3); + uint32_t s1 = rotr32(w[i - 2], 17) ^ rotr32(w[i - 2], 19) ^ (w[i - 2] >> 10); + w[i] = w[i - 16] + s0 + w[i - 7] + s1; + } + + uint32_t a = 0x6a09e667; + uint32_t b = 0xbb67ae85; + uint32_t c = 0x3c6ef372; + uint32_t d = 0xa54ff53a; + uint32_t e = 0x510e527f; + uint32_t f = 0x9b05688c; + uint32_t g = 0x1f83d9ab; + uint32_t h = 0x5be0cd19; + + for (int i = 0; i < 64; i++) { + uint32_t S1 = rotr32(e, 6) ^ rotr32(e, 11) ^ rotr32(e, 25); + uint32_t ch = (e & f) ^ ((~e) & g); + uint32_t temp1 = h + S1 + ch + k[i] + w[i]; + uint32_t S0 = rotr32(a, 2) ^ rotr32(a, 13) ^ rotr32(a, 22); + uint32_t maj = (a & b) ^ (a & c) ^ (b & c); + uint32_t temp2 = S0 + maj; + + h = g; + g = f; + f = e; + e = d + temp1; + d = c; + c = b; + b = a; + a = temp1 + temp2; + } + + uint32_t H[8]; + H[0] = 0x6a09e667 + a; + H[1] = 0xbb67ae85 + b; + H[2] = 0x3c6ef372 + c; + H[3] = 0xa54ff53a + d; + H[4] = 0x510e527f + e; + H[5] = 0x9b05688c + f; + H[6] = 0x1f83d9ab + g; + H[7] = 0x5be0cd19 + h; + + for (int i = 0; i < 8; i++) { + out_digest[i * 4 + 0] = (uint8_t)(H[i] >> 24); + out_digest[i * 4 + 1] = (uint8_t)(H[i] >> 16); + out_digest[i * 4 + 2] = (uint8_t)(H[i] >> 8); + out_digest[i * 4 + 3] = (uint8_t)(H[i]); + } +} + +static bool sha256_s3_hw_run_one_block_from_iv(const uint8_t block[64], uint8_t out_digest[32]) { + uint32_t text_words[16]; + for (int i = 0; i < 16; i++) { + text_words[i] = load_le32(block + (i * 4)); + } + + s3_log_bytes("[S3-IV64]", "logical_block_bytes", block, 64); + s3_log_words("[S3-IV64]", "actual_register_writes_words", text_words, 16); + + for (int i = 0; i < 16; i++) { + write_reg(SHA_TEXT_BASE + (uint32_t)(i * 4), text_words[i]); + } + + write_reg(SHA_MODE_REG, SHA2_256); + write_reg(SHA_START_REG, 1); + if (!wait_idle()) return false; + + uint32_t raw[8]; + for (int i = 0; i < 8; i++) raw[i] = read_reg(SHA_H_BASE + (i * 4)); + for (int i = 0; i < 8; i++) { + uint32_t be = __builtin_bswap32(raw[i]); + out_digest[i * 4 + 0] = (uint8_t)(be >> 24); + out_digest[i * 4 + 1] = (uint8_t)(be >> 16); + out_digest[i * 4 + 2] = (uint8_t)(be >> 8); + out_digest[i * 4 + 3] = (uint8_t)(be); + } + return true; +} + +static bool sha256_s3_hw_restore_iv_then_continue_one_block(const uint8_t block[64], uint8_t out_digest[32]) { + static const uint32_t iv[8] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 + }; + + uint32_t text_words[16]; + for (int i = 0; i < 16; i++) { + text_words[i] = load_le32(block + (i * 4)); + } + + s3_log_words("[S3-IV64]", "restore_iv_words", iv, 8); + s3_log_words("[S3-IV64]", "restore_text_words", text_words, 16); + s3_log_bytes("[S3-IV64]", "restore_block_bytes", block, 64); + + for (int i = 0; i < 8; i++) { + write_reg(SHA_H_BASE + (uint32_t)(i * 4), iv[i]); + } + uint32_t h0_after_restore = read_reg(SHA_H_BASE + 0x00); + Serial.printf("[S3-IV64] restore_step=after_iv_write h0=%08x\n", h0_after_restore); + for (int i = 0; i < 16; i++) { + write_reg(SHA_TEXT_BASE + (uint32_t)(i * 4), text_words[i]); + } + + write_reg(SHA_MODE_REG, SHA2_256); + uint32_t h0_after_mode = read_reg(SHA_H_BASE + 0x00); + Serial.printf("[S3-IV64] restore_step=after_set_mode h0=%08x\n", h0_after_mode); + write_reg(SHA_CONTINUE_REG, 1); + if (!wait_idle()) return false; + + uint32_t raw[8]; + for (int i = 0; i < 8; i++) raw[i] = read_reg(SHA_H_BASE + (i * 4)); + s3_log_words("[S3-IV64]", "restore_digest_raw_words", raw, 8); + for (int i = 0; i < 8; i++) { + uint32_t be = __builtin_bswap32(raw[i]); + out_digest[i * 4 + 0] = (uint8_t)(be >> 24); + out_digest[i * 4 + 1] = (uint8_t)(be >> 16); + out_digest[i * 4 + 2] = (uint8_t)(be >> 8); + out_digest[i * 4 + 3] = (uint8_t)(be); + } + return true; +} + +static void sha256_s3_build_tail_block_t0(uint8_t out[64], const uint8_t *canonical_header_tail16) { + memset(out, 0, 64); + memcpy(out, canonical_header_tail16, 16); + out[16] = 0x80; + out[62] = 0x02; + out[63] = 0x80; +} + +static void sha256_s3_build_tail_block_t1_from_t0(uint8_t out[64], const uint8_t t0[64]) { + memcpy(out, t0, 64); + // Swap only the first 4 message words (16 bytes), keep padding/length canonical. + for (int w = 0; w < 4; w++) { + uint8_t b0 = out[w * 4 + 0]; + uint8_t b1 = out[w * 4 + 1]; + uint8_t b2 = out[w * 4 + 2]; + uint8_t b3 = out[w * 4 + 3]; + out[w * 4 + 0] = b3; + out[w * 4 + 1] = b2; + out[w * 4 + 2] = b1; + out[w * 4 + 3] = b0; + } +} + +static void sha256_s3_build_tail_block_t2( + uint8_t out[64], + const uint8_t *canonical_header_tail16, + const uint32_t *header_tail_words_swapped +) { + memset(out, 0, 64); + if (header_tail_words_swapped) { + // Interpret these as register-source words and serialize as little-endian bytes. + for (int i = 0; i < 4; i++) { + uint32_t w = header_tail_words_swapped[i]; + out[i * 4 + 0] = (uint8_t)(w & 0xFF); + out[i * 4 + 1] = (uint8_t)((w >> 8) & 0xFF); + out[i * 4 + 2] = (uint8_t)((w >> 16) & 0xFF); + out[i * 4 + 3] = (uint8_t)((w >> 24) & 0xFF); + } + } else { + // Fallback keeps T2 deterministic even if swapped words are unavailable. + memcpy(out, canonical_header_tail16, 16); + } + out[16] = 0x80; + out[62] = 0x02; + out[63] = 0x80; +} + +static void sha256_s3_build_text_words_from_tail_block(const uint8_t tail_block[64], uint32_t text_words[16]) { + for (int i = 0; i < 16; i++) { + text_words[i] = load_le32(tail_block + (i * 4)); + } +} + +static void sha256_s3_restore_midstate( + const uint32_t *midstate_words, + int mode, + uint32_t transformed_words[8], + uint32_t readback_words[8] +) { + if (!midstate_words || !transformed_words || !readback_words) return; + + Serial.printf("[S3-RESTORE] input_ptr=%p\n", (const void *)midstate_words); + s3_log_words("[S3-RESTORE]", "input_words", midstate_words, 8); + s3_log_bytes("[S3-RESTORE]", "input_bytes", (const uint8_t *)midstate_words, 32); + + uint32_t before_words[8]; + memcpy(before_words, midstate_words, sizeof(before_words)); + s3_log_words("[S3-RESTORE]", "before_words", before_words, 8); + + sha256_s3_transform_midstate_words(transformed_words, midstate_words, mode); + Serial.printf("[S3-RESTORE] step_name=MODE_%s\n", sha256_s3_mode_name(mode)); + s3_log_words("[S3-RESTORE]", "after_words", transformed_words, 8); + + for (int i = 0; i < 8; i++) { + uint32_t offset = (uint32_t)(i * 4); + write_reg(SHA_H_BASE + offset, transformed_words[i]); + Serial.printf("[S3-RESTORE] write SHA_H[%d] offset=0x%02x value=%08x\n", i, offset, transformed_words[i]); + readback_words[i] = read_reg(SHA_H_BASE + offset); + Serial.printf("[S3-RESTORE] read SHA_H[%d] value=%08x\n", i, readback_words[i]); + } +} + +static bool sha256_s3_first_from_midstate_mode( + const uint32_t *midstate_words, + const uint8_t tail_block[64], + const char *tail_mode_name, + int mode, + uint8_t first_digest_out[32] +) { + if (!midstate_words || !tail_block || !tail_mode_name || !first_digest_out) return false; + + uint32_t transformed[8] = {0}; + uint32_t readback[8] = {0}; + uint32_t text_words[16] = {0}; + + sha256_s3_restore_midstate(midstate_words, mode, transformed, readback); + + s3_log_bytes("[S3-RESTORE]", "logical_tail_block_bytes", tail_block, 64); + + sha256_s3_build_text_words_from_tail_block(tail_block, text_words); + s3_log_words("[S3-RESTORE]", "actual_register_writes_words", text_words, 16); + Serial.printf("[S3-RESTORE] tail_mode=%s\n", tail_mode_name); + + for (int i = 0; i < 16; i++) { + write_reg(SHA_TEXT_BASE + (uint32_t)(i * 4), text_words[i]); + } + + // Check whether setting mode appears to clobber restored SHA_H state. + uint32_t before_mode_h0 = read_reg(SHA_H_BASE + 0x00); + write_reg(SHA_MODE_REG, SHA2_256); + uint32_t after_mode_h0 = read_reg(SHA_H_BASE + 0x00); + Serial.printf("[S3-RESTORE] step_name=SET_MODE before_h0=%08x after_h0=%08x\n", before_mode_h0, after_mode_h0); + + write_reg(SHA_CONTINUE_REG, 1); + if (!wait_idle()) return false; + + uint32_t raw_first[8]; + for (int i = 0; i < 8; i++) { + raw_first[i] = read_reg(SHA_H_BASE + (i * 4)); + } + s3_log_words("[S3-RESTORE]", "s3_first_raw_words", raw_first, 8); + + uint32_t *out_words = (uint32_t *)first_digest_out; + for (int i = 0; i < 8; i++) { + out_words[i] = __builtin_bswap32(raw_first[i]); + } + return true; +} + void sha256_s3_init(void) { Serial.println("[SHA-S3] Optimized S3 mining initialized (Direct Regs)"); @@ -245,18 +615,30 @@ bool sha256_s3_verify( const uint8_t *header_tail, uint32_t nonce, uint8_t *hash_out +) { + return sha256_s3_verify_trace(midstate, header_tail, nonce, hash_out, NULL); +} + +bool sha256_s3_verify_trace( + const uint32_t *midstate, + const uint8_t *header_tail, + uint32_t nonce, + uint8_t *hash_out, + sha256_s3_verify_trace_t *trace ) { const uint32_t *tail_words = (const uint32_t*)header_tail; // 1. Restore Midstate - write_reg(SHA_H_BASE + 0x00, midstate[0]); - write_reg(SHA_H_BASE + 0x04, midstate[1]); - write_reg(SHA_H_BASE + 0x08, midstate[2]); - write_reg(SHA_H_BASE + 0x0C, midstate[3]); - write_reg(SHA_H_BASE + 0x10, midstate[4]); - write_reg(SHA_H_BASE + 0x14, midstate[5]); - write_reg(SHA_H_BASE + 0x18, midstate[6]); - write_reg(SHA_H_BASE + 0x1C, midstate[7]); + for (int i = 0; i < 8; i++) { + if (trace) trace->restoredMidstateWords[i] = midstate[i]; + write_reg(SHA_H_BASE + (i * 4), midstate[i]); + } + + if (trace) { + for (int i = 0; i < 8; i++) { + trace->shaHAfterRestoreWords[i] = read_reg(SHA_H_BASE + (i * 4)); + } + } // 2. Write Text + nonce write_reg(SHA_TEXT_BASE + 0x00, tail_words[0]); @@ -278,8 +660,24 @@ bool sha256_s3_verify( uint32_t h[8]; for (int i=0; i<8; i++) h[i] = read_reg(SHA_H_BASE + i*4); + if (trace) { + uint32_t *firstWords = (uint32_t *)trace->firstDigestBytes; + for (int i = 0; i < 8; i++) { + trace->firstDigestRawWords[i] = h[i]; + firstWords[i] = __builtin_bswap32(h[i]); + } + } + for (int i=0; i<8; i++) write_reg(SHA_TEXT_BASE + i*4, h[i]); + if (trace) { + memset(trace->secondInputBlockBytes, 0, sizeof(trace->secondInputBlockBytes)); + memcpy(trace->secondInputBlockBytes, trace->firstDigestBytes, 32); + trace->secondInputBlockBytes[32] = 0x80; + trace->secondInputBlockBytes[62] = 0x01; + trace->secondInputBlockBytes[63] = 0x00; + } + write_reg(SHA_TEXT_BASE + 0x20, 0x00000080); // Corrected padding for (int i=9; i<15; i++) write_reg(SHA_TEXT_BASE + i*4, 0); write_reg(SHA_TEXT_BASE + 0x3C, 0x00010000); // Corrected length (256 bits) @@ -289,19 +687,264 @@ bool sha256_s3_verify( if (!wait_idle()) return false; + uint32_t rawFinal[8]; + for (int i = 0; i < 8; i++) { + rawFinal[i] = read_reg(SHA_H_BASE + (i * 4)); + if (trace) trace->secondDigestRawWords[i] = rawFinal[i]; + } + + if (trace) { + for (int i = 0; i < 8; i++) { + uint32_t be = __builtin_bswap32(rawFinal[i]); + trace->finalDigestBeBytes[i * 4 + 0] = (uint8_t)(be >> 24); + trace->finalDigestBeBytes[i * 4 + 1] = (uint8_t)(be >> 16); + trace->finalDigestBeBytes[i * 4 + 2] = (uint8_t)(be >> 8); + trace->finalDigestBeBytes[i * 4 + 3] = (uint8_t)(be); + } + } + // 6. Read Output - reverse word order and byte-swap to match check_target expectations // check_target compares from bytes[31] down, so H0 (MSB) must be at bytes[28-31] uint32_t *out = (uint32_t *)hash_out; - out[7] = __builtin_bswap32(read_reg(SHA_H_BASE + 0x00)); // H0 -> bytes[28-31] - out[6] = __builtin_bswap32(read_reg(SHA_H_BASE + 0x04)); // H1 -> bytes[24-27] - out[5] = __builtin_bswap32(read_reg(SHA_H_BASE + 0x08)); // H2 -> bytes[20-23] - out[4] = __builtin_bswap32(read_reg(SHA_H_BASE + 0x0C)); // H3 -> bytes[16-19] - out[3] = __builtin_bswap32(read_reg(SHA_H_BASE + 0x10)); // H4 -> bytes[12-15] - out[2] = __builtin_bswap32(read_reg(SHA_H_BASE + 0x14)); // H5 -> bytes[8-11] - out[1] = __builtin_bswap32(read_reg(SHA_H_BASE + 0x18)); // H6 -> bytes[4-7] - out[0] = __builtin_bswap32(read_reg(SHA_H_BASE + 0x1C)); // H7 -> bytes[0-3] + out[7] = __builtin_bswap32(rawFinal[0]); // H0 -> bytes[28-31] + out[6] = __builtin_bswap32(rawFinal[1]); // H1 -> bytes[24-27] + out[5] = __builtin_bswap32(rawFinal[2]); // H2 -> bytes[20-23] + out[4] = __builtin_bswap32(rawFinal[3]); // H3 -> bytes[16-19] + out[3] = __builtin_bswap32(rawFinal[4]); // H4 -> bytes[12-15] + out[2] = __builtin_bswap32(rawFinal[5]); // H5 -> bytes[8-11] + out[1] = __builtin_bswap32(rawFinal[6]); // H6 -> bytes[4-7] + out[0] = __builtin_bswap32(rawFinal[7]); // H7 -> bytes[0-3] + + if (trace) { + memcpy(trace->finalDigestBytes, hash_out, 32); + } return true; } +bool sha256_s3_test_restore_mapping( + const uint32_t *midstate_words, + const uint8_t *canonical_header_tail16, + const uint32_t *header_tail_words_swapped, + const uint8_t *expected_first_digest +) { + if (!midstate_words || !canonical_header_tail16 || !expected_first_digest) return false; + + uint8_t tailT0[64] = {0}; + uint8_t tailT1[64] = {0}; + uint8_t tailT2[64] = {0}; + sha256_s3_build_tail_block_t0(tailT0, canonical_header_tail16); + sha256_s3_build_tail_block_t1_from_t0(tailT1, tailT0); + sha256_s3_build_tail_block_t2(tailT2, canonical_header_tail16, header_tail_words_swapped); + + s3_log_bytes("[S3-MATRIX]", "tail_T0", tailT0, 64); + s3_log_bytes("[S3-MATRIX]", "tail_T1", tailT1, 64); + s3_log_bytes("[S3-MATRIX]", "tail_T2", tailT2, 64); + + bool t1eqt2 = (memcmp(tailT1, tailT2, 64) == 0); + Serial.printf("[S3-MATRIX] tail_T1_equals_T2=%s\n", t1eqt2 ? "YES" : "NO"); + + const uint8_t *tailBlocks[3] = {tailT0, tailT1, tailT2}; + + bool anyPass = false; + for (int restoreMode = 0; restoreMode < 4; restoreMode++) { + for (int tailMode = 0; tailMode < 3; tailMode++) { + uint8_t first_digest[32] = {0}; + bool ok = sha256_s3_first_from_midstate_mode( + midstate_words, + tailBlocks[tailMode], + sha256_s3_tail_mode_name(tailMode), + restoreMode, + first_digest + ); + bool match = ok && (memcmp(first_digest, expected_first_digest, 32) == 0); + + Serial.printf("[S3-MATRIX] restore=%s tail=%s first=", + sha256_s3_mode_name(restoreMode), + sha256_s3_tail_mode_name(tailMode)); + for (int i = 0; i < 32; i++) Serial.printf("%02x", first_digest[i]); + Serial.printf(" match=%s\n", match ? "PASS" : "FAIL"); + + anyPass = anyPass || match; + } + } + + Serial.printf("[S3-MATRIX] any_match=%s\n", anyPass ? "PASS" : "FAIL"); + + return anyPass; +} + +bool sha256_s3_test_one_block_from_iv(void) { + // Canonical one-block SHA-256 message block for "abc" (already padded to 64 bytes). + static const uint8_t block64[64] = { + 0x61,0x62,0x63,0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18 + }; + static const uint8_t expected_digest[32] = { + 0xba,0x78,0x16,0xbf,0x8f,0x01,0xcf,0xea,0x41,0x41,0x40,0xde,0x5d,0xae,0x22,0x23, + 0xb0,0x03,0x61,0xa3,0x96,0x17,0x7a,0x9c,0xb4,0x10,0xff,0x61,0xf2,0x00,0x15,0xad + }; + + uint8_t hw_from_iv[32] = {0}; + uint8_t sw_from_iv[32] = {0}; + + s3_log_bytes("[S3-IV64]", "block64", block64, 64); + s3_log_bytes("[S3-IV64]", "expected_digest", expected_digest, 32); + + bool hwStartOk = sha256_s3_hw_run_one_block_from_iv(block64, hw_from_iv); + sha256_sw_compress_one_block_from_iv(block64, sw_from_iv); + + bool hwStartMatch = hwStartOk && (memcmp(hw_from_iv, expected_digest, 32) == 0); + bool swMatch = (memcmp(sw_from_iv, expected_digest, 32) == 0); + + s3_log_bytes("[S3-IV64]", "hw_from_iv", hw_from_iv, 32); + Serial.printf("[S3-IV64] test=1 hw_full_64_from_iv match=%s\n", hwStartMatch ? "PASS" : "FAIL"); + + s3_log_bytes("[S3-IV64]", "sw_compress_from_iv", sw_from_iv, 32); + Serial.printf("[S3-IV64] test=2 sw_compress_from_iv match=%s\n", swMatch ? "PASS" : "FAIL"); + + uint8_t hw_restore_continue[32] = {0}; + bool hwRestoreOk = sha256_s3_hw_restore_iv_then_continue_one_block(block64, hw_restore_continue); + bool hwRestoreMatch = hwRestoreOk && (memcmp(hw_restore_continue, expected_digest, 32) == 0); + s3_log_bytes("[S3-IV64]", "hw_restore_iv_plus_continue", hw_restore_continue, 32); + Serial.printf("[S3-IV64] test=3 hw_restore_iv_plus_continue match=%s\n", hwRestoreMatch ? "PASS" : "FAIL"); + + // Official API path for resume testing (esp_sha_write_digest_state + continue block). + static const uint32_t iv_words[8] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 + }; + uint32_t api_raw[8] = {0}; + uint8_t api_raw_as_be[32] = {0}; + uint8_t api_raw_as_le[32] = {0}; + uint32_t iv_copy[8]; + memcpy(iv_copy, iv_words, sizeof(iv_copy)); + + esp_sha_write_digest_state((esp_sha_type)SHA2_256, iv_copy); + int api_ret = esp_sha_dma((esp_sha_type)SHA2_256, block64, 64, NULL, 0, false); + esp_sha_read_digest_state((esp_sha_type)SHA2_256, api_raw); + s3_words_to_be_bytes(api_raw, api_raw_as_be); + s3_words_to_le_bytes(api_raw, api_raw_as_le); + + bool api_match_be = (memcmp(api_raw_as_be, expected_digest, 32) == 0); + bool api_match_le = (memcmp(api_raw_as_le, expected_digest, 32) == 0); + + Serial.printf("[S3-IV64] test=4 api_resume ret=%d be_match=%s le_match=%s\n", + api_ret, + api_match_be ? "PASS" : "FAIL", + api_match_le ? "PASS" : "FAIL"); + s3_log_words("[S3-IV64]", "api_resume_raw_words", api_raw, 8); + s3_log_bytes("[S3-IV64]", "api_resume_raw_as_be", api_raw_as_be, 32); + s3_log_bytes("[S3-IV64]", "api_resume_raw_as_le", api_raw_as_le, 32); + + bool overall = hwStartMatch && swMatch && hwRestoreMatch; + Serial.printf("[S3-IV64] overall=%s\n", overall ? "PASS" : "FAIL"); + return overall; +} + +static bool sha256_s3_second_sha_from_first_be_internal( + const uint8_t first_digest_be[32], + uint8_t out_digest_be[32], + bool verbose +) { + if (!first_digest_be || !out_digest_be) return false; + + uint32_t text_words[8]; + for (int i = 0; i < 8; i++) { + text_words[i] = load_le32(first_digest_be + (i * 4)); + } + + if (verbose) { + s3_log_bytes("[S3-SHA2]", "first_digest_be", first_digest_be, 32); + s3_log_words("[S3-SHA2]", "second_input_text_words", text_words, 8); + } + + for (int i = 0; i < 8; i++) { + write_reg(SHA_TEXT_BASE + (uint32_t)(i * 4), text_words[i]); + } + + write_reg(SHA_TEXT_BASE + 0x20, 0x00000080); + for (int i = 9; i < 15; i++) { + write_reg(SHA_TEXT_BASE + (uint32_t)(i * 4), 0x00000000); + } + write_reg(SHA_TEXT_BASE + 0x3C, 0x00010000); + + write_reg(SHA_MODE_REG, SHA2_256); + write_reg(SHA_START_REG, 1); + if (!wait_idle()) return false; + + uint32_t raw[8]; + for (int i = 0; i < 8; i++) { + raw[i] = read_reg(SHA_H_BASE + (uint32_t)(i * 4)); + uint32_t be = __builtin_bswap32(raw[i]); + out_digest_be[i * 4 + 0] = (uint8_t)(be >> 24); + out_digest_be[i * 4 + 1] = (uint8_t)(be >> 16); + out_digest_be[i * 4 + 2] = (uint8_t)(be >> 8); + out_digest_be[i * 4 + 3] = (uint8_t)(be); + } + + if (verbose) { + uint8_t raw_as_be[32] = {0}; + uint8_t raw_as_le[32] = {0}; + s3_words_to_be_bytes(raw, raw_as_be); + s3_words_to_le_bytes(raw, raw_as_le); + s3_log_words("[S3-SHA2]", "second_digest_raw_words", raw, 8); + s3_log_bytes("[S3-SHA2]", "second_digest_raw_as_be", raw_as_be, 32); + s3_log_bytes("[S3-SHA2]", "second_digest_raw_as_le", raw_as_le, 32); + s3_log_bytes("[S3-SHA2]", "second_digest_out_be", out_digest_be, 32); + } + + return true; +} + +bool sha256_s3_second_sha_from_first_be(const uint8_t first_digest_be[32], uint8_t out_digest_be[32]) { + return sha256_s3_second_sha_from_first_be_internal(first_digest_be, out_digest_be, false); +} + +bool sha256_s3_test_second_sha_paths(void) { + // first_digest_be is SHA256("abc") + static const uint8_t first_digest_be[32] = { + 0xba,0x78,0x16,0xbf,0x8f,0x01,0xcf,0xea,0x41,0x41,0x40,0xde,0x5d,0xae,0x22,0x23, + 0xb0,0x03,0x61,0xa3,0x96,0x17,0x7a,0x9c,0xb4,0x10,0xff,0x61,0xf2,0x00,0x15,0xad + }; + + // expected_second_be is SHA256(SHA256("abc")) + static const uint8_t expected_second_be[32] = { + 0x4f,0x8b,0x42,0xc2,0x2d,0xd3,0x72,0x9b,0x51,0x9b,0xa6,0xf6,0x8d,0x2d,0xa7,0xcc, + 0x5b,0x2d,0x60,0x6d,0x05,0xda,0xed,0x5a,0xd5,0x12,0x8c,0xc0,0x3e,0x6c,0x63,0x58 + }; + + uint8_t second_block[64] = {0}; + uint8_t sw_second_from_block[32] = {0}; + uint8_t hw_second_from_block[32] = {0}; + uint8_t hw_second_fast[32] = {0}; + + s3_build_second_sha_block_from_first_digest(first_digest_be, second_block); + sha256_sw_compress_one_block_from_iv(second_block, sw_second_from_block); + bool hw_block_ok = sha256_s3_hw_run_one_block_from_iv(second_block, hw_second_from_block); + bool hw_fast_ok = sha256_s3_second_sha_from_first_be_internal(first_digest_be, hw_second_fast, true); + + bool sw_match = (memcmp(sw_second_from_block, expected_second_be, 32) == 0); + bool hw_block_match = hw_block_ok && (memcmp(hw_second_from_block, expected_second_be, 32) == 0); + bool hw_fast_match = hw_fast_ok && (memcmp(hw_second_fast, expected_second_be, 32) == 0); + bool hw_block_vs_fast = hw_block_ok && hw_fast_ok && (memcmp(hw_second_from_block, hw_second_fast, 32) == 0); + + s3_log_bytes("[S3-SHA2]", "first_digest_be", first_digest_be, 32); + s3_log_bytes("[S3-SHA2]", "second_block_bytes", second_block, 64); + s3_log_bytes("[S3-SHA2]", "expected_second_be", expected_second_be, 32); + s3_log_bytes("[S3-SHA2]", "sw_second_from_block", sw_second_from_block, 32); + s3_log_bytes("[S3-SHA2]", "hw_second_from_block", hw_second_from_block, 32); + s3_log_bytes("[S3-SHA2]", "hw_second_fast", hw_second_fast, 32); + + Serial.printf("[S3-SHA2] sw_ref_match=%s hw_block_match=%s hw_fast_match=%s hw_block_eq_fast=%s\n", + sw_match ? "PASS" : "FAIL", + hw_block_match ? "PASS" : "FAIL", + hw_fast_match ? "PASS" : "FAIL", + hw_block_vs_fast ? "PASS" : "FAIL"); + + return sw_match && hw_block_match && hw_fast_match && hw_block_vs_fast; +} + #endif diff --git a/src/mining/sha256_s3.h b/src/mining/sha256_s3.h index c00c852..1d5ef0e 100644 --- a/src/mining/sha256_s3.h +++ b/src/mining/sha256_s3.h @@ -4,6 +4,17 @@ #ifdef CONFIG_IDF_TARGET_ESP32S3 +typedef struct { + uint32_t restoredMidstateWords[8]; + uint32_t shaHAfterRestoreWords[8]; + uint32_t firstDigestRawWords[8]; + uint8_t firstDigestBytes[32]; + uint8_t finalDigestBeBytes[32]; + uint8_t secondInputBlockBytes[64]; + uint32_t secondDigestRawWords[8]; + uint8_t finalDigestBytes[32]; +} sha256_s3_verify_trace_t; + // Initialize S3 SHA hardware void sha256_s3_init(void); @@ -32,4 +43,40 @@ bool sha256_s3_verify( uint8_t *hash_out // 32-byte output ); +// Isolated deterministic single-shot verify helper with detailed internal trace. +// This function does not use the mining loop and is intended for boot-time testing. +bool sha256_s3_verify_trace( + const uint32_t *midstate, + const uint8_t *header_tail, + uint32_t nonce, + uint8_t *hash_out, + sha256_s3_verify_trace_t *trace +); + +// Deterministic restore/tail diagnostic matrix: +// runs one first-pass compression (midstate + tail block) for restore modes A/B/C/D +// and tail modes T0/T1/T2, then compares against expected first digest. +bool sha256_s3_test_restore_mapping( + const uint32_t *midstate_words, + const uint8_t *canonical_header_tail16, + const uint32_t *header_tail_words_swapped, + const uint8_t *expected_first_digest +); + +// One-block IV diagnostic for SHA-256 compression correctness. +// Uses a canonical pre-padded 64-byte block ("abc" block) and reports: +// 1) HW from IV (START) +// 2) SW one-block compression from IV +// 3) HW with explicit IV restore + CONTINUE +bool sha256_s3_test_one_block_from_iv(void); + +// Deeper diagnostics for second-SHA-only acceleration path. +// Runs software + hardware comparisons for a known first-digest vector and +// reports where representation or state-resume mismatches occur. +bool sha256_s3_test_second_sha_paths(void); + +// Hardware second-SHA only path. +// Input and output are canonical SHA-256 digest bytes (big-endian words H0..H7). +bool sha256_s3_second_sha_from_first_be(const uint8_t first_digest_be[32], uint8_t out_digest_be[32]); + #endif diff --git a/src/stats/live_stats.cpp b/src/stats/live_stats.cpp index 6f8ecdf..225a047 100644 --- a/src/stats/live_stats.cpp +++ b/src/stats/live_stats.cpp @@ -17,6 +17,7 @@ #include "live_stats.h" #include "board_config.h" #include "../config/nvs_config.h" +#include "../logging.h" // ============================================================ // Globals @@ -622,6 +623,12 @@ static bool fetchFromCustomApi() { if (s_jsonDoc.containsKey("workers")) { s_stats.poolWorkersCount = s_jsonDoc["workers"]; s_stats.poolValid = true; + } else if (s_jsonDoc.containsKey("workersCount")) { + s_stats.poolWorkersCount = s_jsonDoc["workersCount"]; + s_stats.poolValid = true; + } else if (s_jsonDoc.containsKey("pool_workers_count")) { + s_stats.poolWorkersCount = s_jsonDoc["pool_workers_count"]; + s_stats.poolValid = true; } if (s_jsonDoc.containsKey("failovers")) { @@ -983,7 +990,8 @@ void live_stats_task(void *param) { s_lastNetworkUpdate = bootTime - UPDATE_NETWORK_MS - 5000; s_lastCustomApiUpdate = bootTime - UPDATE_PRICE_MS - 1000; - Serial.println("[STATS] Task started"); + log_wait_startup_barrier(); + log_line("[STATS] Task started"); while (true) { if (WiFi.status() == WL_CONNECTED) { diff --git a/src/stats/monitor.cpp b/src/stats/monitor.cpp index 7c30ece..b9c3427 100644 --- a/src/stats/monitor.cpp +++ b/src/stats/monitor.cpp @@ -14,6 +14,7 @@ #include "../stratum/stratum.h" #include "../config/nvs_config.h" #include "../config/wifi_manager.h" +#include "../logging.h" // Update intervals #define DISPLAY_UPDATE_MS 1000 // 1 second @@ -33,6 +34,37 @@ static bool s_earlySaveDone = false; // Track if we've done the early save static uint32_t s_lastAcceptedCount = 0; // Track shares for first-share save static uint32_t s_lastLedShareCount = 0; // Track shares for LED flash +extern TaskHandle_t miner0Task; +extern TaskHandle_t miner1Task; +extern TaskHandle_t stratumTask; +extern TaskHandle_t monitorTask; +extern TaskHandle_t buttonTask; + +static char taskStateChar(TaskHandle_t handle) { + if (handle == NULL) { + return '-'; + } + +#if (INCLUDE_eTaskGetState == 1) + switch (eTaskGetState(handle)) { + case eRunning: + return 'R'; + case eReady: + return 'Y'; + case eBlocked: + return 'B'; + case eSuspended: + return 'S'; + case eDeleted: + return 'D'; + default: + return '?'; + } +#else + return 'A'; +#endif +} + // Track session start values to calculate deltas for persistence static uint64_t s_sessionStartHashes = 0; static uint32_t s_sessionStartShares = 0; @@ -67,7 +99,8 @@ static void updateDisplayData(display_data_t *data) { data->uptimeSeconds = (millis() - s_startTime) / 1000; data->avgLatency = mstats->avgLatency; - // Calculate hashrate with EMA smoothing + // Calculate display hashrate from the same live window family as the console. + // Expose both the raw live window and a smoothed value for the UI. static uint64_t lastHashes = 0; static uint32_t lastHashTime = 0; static double smoothedHashRate = 0.0; @@ -80,19 +113,17 @@ static void updateDisplayData(display_data_t *data) { uint64_t deltaHashes = mstats->hashes - lastHashes; double instantRate = (double)deltaHashes * 1000.0 / elapsed; - // Exponential moving average (alpha=0.15 for smooth but responsive updates) - // Lower alpha = smoother but slower to respond - // Higher alpha = more responsive but jumpier - const double alpha = 0.15; + data->hashRate = instantRate; + const double alpha = 0.15; if (firstSample) { smoothedHashRate = instantRate; firstSample = false; } else { smoothedHashRate = alpha * instantRate + (1.0 - alpha) * smoothedHashRate; } + data->hashRateAvg = smoothedHashRate; - data->hashRate = smoothedHashRate; lastHashes = mstats->hashes; lastHashTime = now; } @@ -208,7 +239,8 @@ void monitor_reset_activity() { } void monitor_task(void *param) { - Serial.printf("[MONITOR] Task started on core %d\n", xPortGetCoreID()); + log_wait_startup_barrier(); + log_linef("[MONITOR] Task started on core %d", xPortGetCoreID()); if (!s_initialized) { monitor_init(); @@ -253,21 +285,64 @@ void monitor_task(void *param) { } #endif - // Also print to serial for headless/debug + // Serial stats: printed every STATS_UPDATE_MS (10s) + // All rates (total + per-core) are computed from the same delta window + // so: total H/s == Core0 H/s + Core1 H/s, and percentages sum to 100%. + // displayData.hashRate (EMA-smoothed) is used only for the display, not here. static uint32_t lastSerialPrint = 0; if (now - lastSerialPrint >= 10000) { - Serial.printf("[STATS] Hashrate: %.2f H/s | Shares: %u/%u | Ping: %u ms | Best: %.4f\n", - displayData.hashRate, + extern volatile uint64_t s_core0Hashes; + extern volatile uint64_t s_core1Hashes; + static uint64_t lastCore0 = 0, lastCore1 = 0; + static uint32_t lastStatsTime = 0; + static uint32_t lastHealthPrint = 0; + static uint32_t lastHealthAccepted = 0; + static uint32_t lastHealthRejected = 0; + + // Snapshot counters atomically (best-effort; volatile reads) + uint64_t snap0 = s_core0Hashes; + uint64_t snap1 = s_core1Hashes; + uint32_t statsElapsed = (lastStatsTime > 0) ? (now - lastStatsTime) : 10000; + + double c0hs = 0.0, c1hs = 0.0, totalHs = 0.0; + double c0pct = 0.0, c1pct = 0.0; + if (lastStatsTime > 0 && statsElapsed > 0) { + uint64_t d0 = snap0 - lastCore0; + uint64_t d1 = snap1 - lastCore1; + c0hs = (double)d0 * 1000.0 / statsElapsed; + c1hs = (double)d1 * 1000.0 / statsElapsed; + totalHs = c0hs + c1hs; + if (totalHs > 0.0) { + c0pct = c0hs / totalHs * 100.0; + c1pct = c1hs / totalHs * 100.0; + } + } + + // Total line: rate is derived from per-core sum (not EMA) + extern volatile uint32_t s_jobChanges; + Serial.printf("[STATS] Total: %.1f H/s (window %lus) | Shares: %u/%u | Ping: %u ms | Best: %.4f | Jobs: %lu (chg: %lu)\n", + totalHs, + (unsigned long)(statsElapsed / 1000), displayData.sharesAccepted, displayData.sharesAccepted + displayData.sharesRejected, displayData.avgLatency, - displayData.bestDifficulty); - + displayData.bestDifficulty, + (unsigned long)miner_get_stats()->templates, + (unsigned long)s_jobChanges); + Serial.printf("[STATS] Core0: %.1f H/s (%.0f%%, total %llu) | Core1: %.1f H/s (%.0f%%, total %llu)\n", + c0hs, c0pct, snap0, c1hs, c1pct, snap1); + if (displayData.poolName) { - Serial.printf("[STATS] Pool: %s (%d workers) %s\n", - displayData.poolName, - displayData.poolWorkersTotal, - (displayData.poolFailovers > 0) ? "[FAILOVER]" : ""); + if (displayData.poolWorkersTotal > 0) { + Serial.printf("[STATS] Pool: %s (%d workers) %s\n", + displayData.poolName, + displayData.poolWorkersTotal, + (displayData.poolFailovers > 0) ? "[FAILOVER]" : ""); + } else { + Serial.printf("[STATS] Pool: %s (workers: n/a) %s\n", + displayData.poolName, + (displayData.poolFailovers > 0) ? "[FAILOVER]" : ""); + } } if (displayData.btcPrice > 0) { @@ -277,27 +352,52 @@ void monitor_task(void *param) { displayData.halfHourFee); } - // DEBUG: Per-core hash contribution - extern volatile uint64_t s_core0Hashes; - extern volatile uint64_t s_core1Hashes; - Serial.printf("[STATS] Core0: %llu hashes, Core1: %llu hashes\n", s_core0Hashes, s_core1Hashes); + lastCore0 = snap0; + lastCore1 = snap1; + lastStatsTime = now; - // Heap monitoring - track memory usage over time + // Heap monitoring uint32_t freeHeap = ESP.getFreeHeap(); uint32_t minFreeHeap = ESP.getMinFreeHeap(); uint32_t maxAllocHeap = ESP.getMaxAllocHeap(); - - // Always log heap stats for debugging memory leaks Serial.printf("[HEAP] Free: %lu | Min: %lu | MaxAlloc: %lu\n", freeHeap, minFreeHeap, maxAllocHeap); - - // Warn if critically low (under 30KB) if (freeHeap < 30000) { Serial.println("[HEAP] CRITICAL: Memory very low - may crash soon!"); } else if (freeHeap < 50000) { Serial.println("[HEAP] WARNING: Memory getting low"); } + if (lastHealthPrint == 0) { + lastHealthPrint = now; + lastHealthAccepted = displayData.sharesAccepted; + lastHealthRejected = displayData.sharesRejected; + } else if (now - lastHealthPrint >= 60000) { + uint32_t acceptedDelta = displayData.sharesAccepted - lastHealthAccepted; + uint32_t rejectedDelta = displayData.sharesRejected - lastHealthRejected; + const char *wifiState = displayData.wifiConnected ? "up" : "down"; + const char *poolState = displayData.poolConnected ? "up" : "down"; + + Serial.printf( + "[HEALTH] hs=%.1f minHeap=%lu A/R=+%lu/+%lu wifi=%s pool=%s tasks{m0:%c,m1:%c,str:%c,mon:%c,btn:%c}\n", + totalHs, + minFreeHeap, + (unsigned long)acceptedDelta, + (unsigned long)rejectedDelta, + wifiState, + poolState, + taskStateChar(miner0Task), + taskStateChar(miner1Task), + taskStateChar(stratumTask), + taskStateChar(monitorTask), + taskStateChar(buttonTask) + ); + + lastHealthAccepted = displayData.sharesAccepted; + lastHealthRejected = displayData.sharesRejected; + lastHealthPrint = now; + } + lastSerialPrint = now; } diff --git a/src/stratum/stratum.cpp b/src/stratum/stratum.cpp index 7ef216b..684758d 100644 --- a/src/stratum/stratum.cpp +++ b/src/stratum/stratum.cpp @@ -12,18 +12,19 @@ #include #include "stratum.h" #include "../mining/miner.h" +#include "../logging.h" -// ============================================================ +// ============================================================ // Constants -// ============================================================ +// ============================================================ #define STRATUM_MSG_BUFFER 512 #define RESPONSE_TIMEOUT_MS 3000 #define KEEPALIVE_MS 120000 #define INACTIVITY_MS 700000 -// ============================================================ +// ============================================================ // Global State -// ============================================================ +// ============================================================ static QueueHandle_t s_submitQueue = NULL; static submit_entry_t s_pendingResponses[MAX_PENDING_SUBMISSIONS]; static uint16_t s_pendingIndex = 0; @@ -54,9 +55,9 @@ static int s_extraNonce2Size = 4; // JSON document for parsing static StaticJsonDocument<4096> s_doc; -// ============================================================ +// ============================================================ // Utility Functions -// ============================================================ +// ============================================================ static uint32_t getNextId() { if (s_messageId == UINT32_MAX) { @@ -86,7 +87,7 @@ static String readBoundedLine(WiFiClient& client, size_t maxLen = 4096) { String line; line.reserve(256); // Initial allocation unsigned long start = millis(); - + while (client.connected() && (millis() - start < 5000)) { if (client.available()) { char c = client.read(); @@ -111,9 +112,9 @@ static String readBoundedLine(WiFiClient& client, size_t maxLen = 4096) { return line; } -// ============================================================ +// ============================================================ // Protocol Functions -// ============================================================ +// ============================================================ static bool sendMessage(WiFiClient &client, const char *msg) { if (!client.connected()) return false; @@ -326,7 +327,12 @@ static void handleServerMessage(WiFiClient &client) { // Helper: Read lines until we get a response with matching ID (or timeout) // Handles method calls (set_difficulty, notify) that arrive before the response +// IMPORTANT: This function properly handles out-of-order JSON-RPC responses, +// which is normal in async Stratum v1 - the server may send mining.notify or +// mining.set_difficulty before responding to your mining.authorize request. static bool waitForResponseById(WiFiClient &client, uint32_t expectedId, String &outResponse, int maxAttempts = 10) { + int outOfOrderCount = 0; + for (int attempt = 0; attempt < maxAttempts; attempt++) { String line = readBoundedLine(client); // Use bounded read to prevent OOM line.trim(); @@ -348,12 +354,16 @@ static bool waitForResponseById(WiFiClient &client, uint32_t expectedId, String if (s_doc.containsKey("method")) { const char *method = s_doc["method"]; - // Handle set_difficulty immediately since it's important + // Handle set_difficulty and notify immediately - both may arrive before + // the auth response, and discarding notify means no jobs until the next block. if (strcmp(method, "mining.set_difficulty") == 0) { double diff = s_doc["params"][0] | 1.0; if (!isnan(diff) && diff > 0) { miner_set_difficulty(diff); + dbg("[STRATUM] Set difficulty via async method: %.6f\n", diff); } + } else if (strcmp(method, "mining.notify") == 0) { + parseMiningNotify(line); } // Continue reading for our actual response continue; @@ -366,11 +376,22 @@ static bool waitForResponseById(WiFiClient &client, uint32_t expectedId, String outResponse = line; return true; } - Serial.printf("[STRATUM] Got response for different id: %lu (expected %lu)\n", respId, expectedId); + // Log out-of-order responses but continue searching + outOfOrderCount++; + Serial.printf("[STRATUM] Received response id=%lu (expected %lu, attempt %d/%d)\n", + respId, expectedId, attempt + 1, maxAttempts); + + // If we get more than 2 out-of-order responses, assume the server + // is misbehaving or our request was not received. Bail after max attempts. + if (outOfOrderCount > 2) { + Serial.printf("[STRATUM] Multiple out-of-order responses detected, giving up after %d attempts\n", + attempt + 1); + return false; + } } } - Serial.println("[STRATUM] Max attempts reached waiting for response"); + Serial.printf("[STRATUM] Max attempts reached waiting for response id=%lu\n", expectedId); return false; } @@ -424,7 +445,7 @@ static bool subscribe(WiFiClient &client, const char *wallet, const char *passwo } else { safeStrCpy(fullUsername, wallet, sizeof(fullUsername)); } - + // Store authorized worker name for submissions safeStrCpy(s_authorizedWorkerName, fullUsername, sizeof(s_authorizedWorkerName)); @@ -496,9 +517,9 @@ static void submitShare(WiFiClient &client, const submit_entry_t *entry) { } } -// ============================================================ +// ============================================================ // Public API -// ============================================================ +// ============================================================ void stratum_init() { // Create submission queue @@ -521,7 +542,8 @@ void stratum_task(void *param) { uint32_t lastConnectAttempt = 0; uint32_t backupConnectTime = 0; - Serial.printf("[STRATUM] Task started on core %d\n", xPortGetCoreID()); + log_wait_startup_barrier(); + log_linef("[STRATUM] Task started on core %d", xPortGetCoreID()); while (true) { // Wait for WiFi with auto-reconnect (Issue #4 fix) @@ -530,7 +552,7 @@ void stratum_task(void *param) { miner_stop(); client.stop(); s_isConnected = false; - Serial.println("[WIFI] Connection lost, attempting reconnect..."); + log_line("[WIFI] Connection lost, attempting reconnect..."); } // Calculate exponential backoff: 1s, 2s, 4s, 8s, 15s max @@ -540,8 +562,8 @@ void stratum_task(void *param) { if (millis() - s_lastWifiReconnectAttempt >= backoffMs) { s_wifiReconnectAttempts++; s_lastWifiReconnectAttempt = millis(); - Serial.printf("[WIFI] Reconnect attempt %lu (backoff: %lums)\n", - s_wifiReconnectAttempts, backoffMs); + log_linef("[WIFI] Reconnect attempt %lu (backoff: %lums)", + s_wifiReconnectAttempts, backoffMs); WiFi.reconnect(); } @@ -551,7 +573,7 @@ void stratum_task(void *param) { // WiFi connected - reset reconnect counter if (s_wifiReconnectAttempts > 0) { - Serial.printf("[WIFI] Reconnected after %lu attempts\n", s_wifiReconnectAttempts); + log_linef("[WIFI] Reconnected after %lu attempts", s_wifiReconnectAttempts); s_wifiReconnectAttempts = 0; } @@ -587,8 +609,8 @@ void stratum_task(void *param) { usingBackup = false; - Serial.printf("[STRATUM] Connecting to %s:%d...\n", - s_primaryPool.url, s_primaryPool.port); + log_linef("[STRATUM] Connecting to %s:%d...", + s_primaryPool.url, s_primaryPool.port); // STABILITY FIX: Use connect timeout (10s) to prevent long blocks if (client.connect(s_primaryPool.url, s_primaryPool.port, 10000)) { @@ -596,17 +618,17 @@ void stratum_task(void *param) { s_isConnected = true; s_lastActivity = millis(); safeStrCpy(s_currentPoolUrl, s_primaryPool.url, MAX_POOL_URL_LEN); - Serial.println("[STRATUM] Connected to primary pool"); + log_line("[STRATUM] Connected to primary pool"); } else { client.stop(); } } else { - Serial.println("[STRATUM] Connection failed"); + log_line("[STRATUM] Connection failed"); // Try backup pool after 30s of failures if (s_hasBackupPool && (millis() - lastConnectAttempt > POOL_FAILOVER_MS)) { - Serial.printf("[STRATUM] Trying backup: %s:%d\n", - s_backupPool.url, s_backupPool.port); + log_linef("[STRATUM] Trying backup: %s:%d", + s_backupPool.url, s_backupPool.port); // STABILITY FIX: Use connect timeout (10s) if (client.connect(s_backupPool.url, s_backupPool.port, 10000)) { @@ -616,7 +638,7 @@ void stratum_task(void *param) { backupConnectTime = millis(); s_lastActivity = millis(); safeStrCpy(s_currentPoolUrl, s_backupPool.url, MAX_POOL_URL_LEN); - Serial.println("[STRATUM] Connected to backup pool"); + log_line("[STRATUM] Connected to backup pool"); } else { client.stop(); } @@ -647,7 +669,7 @@ void stratum_task(void *param) { testClient.stop(); // Clean up the old (now empty) client usingBackup = false; safeStrCpy(s_currentPoolUrl, s_primaryPool.url, MAX_POOL_URL_LEN); - Serial.println("[STRATUM] Switched back to primary pool"); + log_line("[STRATUM] Switched back to primary pool"); continue; } else { testClient.stop(); @@ -680,7 +702,7 @@ void stratum_task(void *param) { // Check for inactivity if (millis() - s_lastActivity > INACTIVITY_MS) { - Serial.println("[STRATUM] Pool inactive, disconnecting"); + log_line("[STRATUM] Pool inactive, disconnecting"); miner_stop(); client.stop(); s_isConnected = false;