diff --git a/firmware/controller/src/commands/stage_commands.cpp b/firmware/controller/src/commands/stage_commands.cpp index 812b46157..2363234e2 100644 --- a/firmware/controller/src/commands/stage_commands.cpp +++ b/firmware/controller/src/commands/stage_commands.cpp @@ -1,15 +1,30 @@ #include "stage_commands.h" +// Mark a move callback as failed: clear in_progress so the host stops +// blocking, but flag CMD_EXECUTION_ERROR so the host doesn't treat the +// non-move as a successful completion. Without this, send_position_update +// would broadcast COMPLETED_WITHOUT_ERRORS and the host's tracked position +// would silently desync from the hardware. +static inline void mark_move_failed() +{ + mcu_cmd_execution_in_progress = false; + mcu_cmd_execution_status = CMD_EXECUTION_ERROR; +} + void callback_move_x() { long relative_position = int32_t(uint32_t(buffer_rx[2]) << 24 | uint32_t(buffer_rx[3]) << 16 | uint32_t(buffer_rx[4]) << 8 | uint32_t(buffer_rx[5])); long current_position = tmc4361A_currentPosition(&tmc4361[x]); X_direction = sgn(relative_position); X_commanded_target_position = ( relative_position > 0 ? min(current_position + relative_position, X_POS_LIMIT) : max(current_position + relative_position, X_NEG_LIMIT) ); + mcu_cmd_execution_in_progress = true; if ( tmc4361A_moveTo(&tmc4361[x], X_commanded_target_position) == 0) { X_commanded_movement_in_progress = true; - mcu_cmd_execution_in_progress = true; + } + else + { + mark_move_failed(); } } @@ -19,10 +34,14 @@ void callback_move_y() long current_position = tmc4361A_currentPosition(&tmc4361[y]); Y_direction = sgn(relative_position); Y_commanded_target_position = ( relative_position > 0 ? min(current_position + relative_position, Y_POS_LIMIT) : max(current_position + relative_position, Y_NEG_LIMIT) ); + mcu_cmd_execution_in_progress = true; if ( tmc4361A_moveTo(&tmc4361[y], Y_commanded_target_position) == 0) { Y_commanded_movement_in_progress = true; - mcu_cmd_execution_in_progress = true; + } + else + { + mark_move_failed(); } } @@ -33,26 +52,40 @@ void callback_move_z() Z_direction = sgn(relative_position); Z_commanded_target_position = ( relative_position > 0 ? min(current_position + relative_position, Z_POS_LIMIT) : max(current_position + relative_position, Z_NEG_LIMIT) ); focusPosition = Z_commanded_target_position; + mcu_cmd_execution_in_progress = true; if ( tmc4361A_moveTo(&tmc4361[z], Z_commanded_target_position) == 0) { Z_commanded_movement_in_progress = true; - mcu_cmd_execution_in_progress = true; + } + else + { + mark_move_failed(); } } // Helper function for filter wheel movement (shared by W and W2) static void move_filterwheel(uint8_t axis, bool enabled, int* direction, long* target_position, bool* movement_in_progress) { - if (!enabled) return; + if (!enabled) + { + // Filter wheel not initialized: surface as an execution error so the + // host re-homes instead of trusting that the move happened. + mark_move_failed(); + return; + } long relative_position = int32_t(uint32_t(buffer_rx[2]) << 24 | uint32_t(buffer_rx[3]) << 16 | uint32_t(buffer_rx[4]) << 8 | uint32_t(buffer_rx[5])); long current_position = tmc4361A_currentPosition(&tmc4361[axis]); *direction = sgn(relative_position); *target_position = current_position + relative_position; + mcu_cmd_execution_in_progress = true; if (tmc4361A_moveTo(&tmc4361[axis], *target_position) == 0) { *movement_in_progress = true; - mcu_cmd_execution_in_progress = true; + } + else + { + mark_move_failed(); } } @@ -71,10 +104,14 @@ void callback_move_to_x() long absolute_position = int32_t(uint32_t(buffer_rx[2]) << 24 | uint32_t(buffer_rx[3]) << 16 | uint32_t(buffer_rx[4]) << 8 | uint32_t(buffer_rx[5])); X_direction = sgn(absolute_position - tmc4361A_currentPosition(&tmc4361[x])); X_commanded_target_position = absolute_position; + mcu_cmd_execution_in_progress = true; if (tmc4361A_moveTo(&tmc4361[x], X_commanded_target_position) == 0) { X_commanded_movement_in_progress = true; - mcu_cmd_execution_in_progress = true; + } + else + { + mark_move_failed(); } } @@ -83,10 +120,14 @@ void callback_move_to_y() long absolute_position = int32_t(uint32_t(buffer_rx[2]) << 24 | uint32_t(buffer_rx[3]) << 16 | uint32_t(buffer_rx[4]) << 8 | uint32_t(buffer_rx[5])); Y_direction = sgn(absolute_position - tmc4361A_currentPosition(&tmc4361[y])); Y_commanded_target_position = absolute_position; + mcu_cmd_execution_in_progress = true; if (tmc4361A_moveTo(&tmc4361[y], Y_commanded_target_position) == 0) { Y_commanded_movement_in_progress = true; - mcu_cmd_execution_in_progress = true; + } + else + { + mark_move_failed(); } } @@ -95,25 +136,36 @@ void callback_move_to_z() long absolute_position = int32_t(uint32_t(buffer_rx[2]) << 24 | uint32_t(buffer_rx[3]) << 16 | uint32_t(buffer_rx[4]) << 8 | uint32_t(buffer_rx[5])); Z_direction = sgn(absolute_position - tmc4361A_currentPosition(&tmc4361[z])); Z_commanded_target_position = absolute_position; + mcu_cmd_execution_in_progress = true; if (tmc4361A_moveTo(&tmc4361[z], Z_commanded_target_position) == 0) { focusPosition = absolute_position; Z_commanded_movement_in_progress = true; - mcu_cmd_execution_in_progress = true; + } + else + { + mark_move_failed(); } } void callback_move_to_w() { - if (enable_filterwheel == true) { - long absolute_position = int32_t(uint32_t(buffer_rx[2]) << 24 | uint32_t(buffer_rx[3]) << 16 | uint32_t(buffer_rx[4]) << 8 | uint32_t(buffer_rx[5])); - W_direction = sgn(absolute_position - tmc4361A_currentPosition(&tmc4361[w])); - W_commanded_target_position = absolute_position; - if (tmc4361A_moveTo(&tmc4361[w], W_commanded_target_position) == 0) - { + if (enable_filterwheel != true) + { + mark_move_failed(); + return; + } + long absolute_position = int32_t(uint32_t(buffer_rx[2]) << 24 | uint32_t(buffer_rx[3]) << 16 | uint32_t(buffer_rx[4]) << 8 | uint32_t(buffer_rx[5])); + W_direction = sgn(absolute_position - tmc4361A_currentPosition(&tmc4361[w])); + W_commanded_target_position = absolute_position; + mcu_cmd_execution_in_progress = true; + if (tmc4361A_moveTo(&tmc4361[w], W_commanded_target_position) == 0) + { W_commanded_movement_in_progress = true; - mcu_cmd_execution_in_progress = true; - } + } + else + { + mark_move_failed(); } } diff --git a/firmware/controller/src/constants.h b/firmware/controller/src/constants.h index ef7b1acb7..1e5058008 100644 --- a/firmware/controller/src/constants.h +++ b/firmware/controller/src/constants.h @@ -9,8 +9,10 @@ // Version is sent in response byte 22 as nibble-encoded: high nibble = major, low nibble = minor // Version 1.0 = first version with multi-port illumination support // Version 1.1 = serial watchdog for illumination auto-shutoff +// Version 1.2 = move callbacks report CMD_EXECUTION_ERROR on tmc4361A_moveTo +// failure; W axis microstep position broadcast in bytes 19-20 #define FIRMWARE_VERSION_MAJOR 1 -#define FIRMWARE_VERSION_MINOR 1 +#define FIRMWARE_VERSION_MINOR 2 #include "def/def_v1.h" diff --git a/firmware/controller/src/globals.cpp b/firmware/controller/src/globals.cpp index f15a4f965..873fa90d9 100644 --- a/firmware/controller/src/globals.cpp +++ b/firmware/controller/src/globals.cpp @@ -33,6 +33,11 @@ uint16_t home_safety_margin[TOTAL_AXES] = {4, 4, 4, 4, 4}; volatile int buffer_rx_ptr = 0; byte cmd_id = 0; bool mcu_cmd_execution_in_progress = false; +// Status reported in the next position-update packet whenever +// mcu_cmd_execution_in_progress is false. Reset to COMPLETED_WITHOUT_ERRORS +// at the start of every received command and overwritten with +// CMD_EXECUTION_ERROR by a callback that detects a failure. +byte mcu_cmd_execution_status = COMPLETED_WITHOUT_ERRORS; bool checksum_error = false; // limit switch diff --git a/firmware/controller/src/globals.h b/firmware/controller/src/globals.h index 096dd9967..b3c4321e0 100644 --- a/firmware/controller/src/globals.h +++ b/firmware/controller/src/globals.h @@ -37,6 +37,7 @@ extern uint16_t home_safety_margin[TOTAL_AXES]; extern volatile int buffer_rx_ptr; extern byte cmd_id; extern bool mcu_cmd_execution_in_progress; +extern byte mcu_cmd_execution_status; extern bool checksum_error; // limit switch diff --git a/firmware/controller/src/serial_communication.cpp b/firmware/controller/src/serial_communication.cpp index 5af2d8acf..f6134edbd 100644 --- a/firmware/controller/src/serial_communication.cpp +++ b/firmware/controller/src/serial_communication.cpp @@ -26,6 +26,12 @@ void process_serial_message() // Reset watchdog timer on every valid serial message last_serial_message_time = millis(); + // Reset execution status for this new command; callbacks set it to + // CMD_EXECUTION_ERROR if they detect a failure (e.g. tmc4361A_moveTo + // returning non-zero) so the host can distinguish a real success + // from a silently dropped move. + mcu_cmd_execution_status = COMPLETED_WITHOUT_ERRORS; + CommandCallback p_callback = cmd_map[buffer_rx[1]]; if (!p_callback) { callback_default(); @@ -46,8 +52,10 @@ void send_position_update() buffer_tx[0] = cmd_id; if (checksum_error) buffer_tx[1] = CMD_CHECKSUM_ERROR; // cmd_execution_status + else if (mcu_cmd_execution_in_progress) + buffer_tx[1] = IN_PROGRESS; else - buffer_tx[1] = mcu_cmd_execution_in_progress ? IN_PROGRESS : COMPLETED_WITHOUT_ERRORS; // cmd_execution_status + buffer_tx[1] = mcu_cmd_execution_status; // COMPLETED_WITHOUT_ERRORS or CMD_EXECUTION_ERROR uint32_t X_pos_int32t = uint32_t( X_use_encoder ? X_pos : int32_t(tmc4361A_currentPosition(&tmc4361[x])) ); buffer_tx[2] = byte(X_pos_int32t >> 24); @@ -74,9 +82,34 @@ void send_position_update() buffer_tx[18] &= ~ (1 << BIT_POS_JOYSTICK_BUTTON); // clear the joystick button bit buffer_tx[18] = buffer_tx[18] | joystick_button_pressed << BIT_POS_JOYSTICK_BUTTON; - // Clear reserved bytes to avoid stale data affecting the checksum - buffer_tx[19] = 0; - buffer_tx[20] = 0; + // Bytes 19-20: W axis current microstep position as signed int16, + // big-endian. Used by the host to detect silent filter-wheel move + // failures (W has no encoder feedback). Gated on `enable_filterwheel` + // because tmc4361[w] is uninitialized until INITFILTERWHEEL arrives + // (init.cpp:123 only inits indices 0..STAGE_AXES-1); reading from an + // uninitialized struct would dereference a NULL config pointer. + // + // Saturates rather than wraps: with ~1600 usteps/slot, an 8-slot wheel + // stays well within ±32k between homes during normal use, but a long + // monotonic sequence (or a misconfigured larger wheel) could overflow. + // The host sees the saturated value and the verification will mismatch, + // triggering a re-home — safer than a silent wrap. + // Byte 21 stays reserved (future use for W2 position). + if (enable_filterwheel) + { + long w_pos = tmc4361A_currentPosition(&tmc4361[w]); + int16_t W_pos_int16; + if (w_pos > INT16_MAX) W_pos_int16 = INT16_MAX; + else if (w_pos < INT16_MIN) W_pos_int16 = INT16_MIN; + else W_pos_int16 = (int16_t) w_pos; + buffer_tx[19] = byte((W_pos_int16 >> 8) & 0xFF); + buffer_tx[20] = byte(W_pos_int16 & 0xFF); + } + else + { + buffer_tx[19] = 0; + buffer_tx[20] = 0; + } buffer_tx[21] = 0; // Firmware version in byte 22: high nibble = major, low nibble = minor diff --git a/software/control/_def.py b/software/control/_def.py index 841c2cfdf..3b2cb40e1 100644 --- a/software/control/_def.py +++ b/software/control/_def.py @@ -361,6 +361,14 @@ def port_index_to_source_code(port_index: int) -> int: # Response byte positions for MCU protocol (24-byte response) RESPONSE_BYTE_FIRMWARE_VERSION = 22 # Nibble-encoded: high=major, low=minor +# Bytes 19-20: W axis current microstep position (signed int16, big-endian). +# Populated by firmware >= v1.2; zero on older firmware. Used by the filter +# wheel controller to verify that a MOVE_W command actually moved the motor. +RESPONSE_BYTE_W_POS_HI = 19 +RESPONSE_BYTE_W_POS_LO = 20 +# Firmware version at which W position broadcast became available. +MIN_FW_VERSION_W_POS_BROADCAST = (1, 2) + # Serial watchdog (illumination auto-shutoff safety) # Must match firmware constants in constants.h DEFAULT_WATCHDOG_TIMEOUT_MS = 5000 # 5 seconds (matches firmware) @@ -665,6 +673,13 @@ def is_piezo_only(self) -> bool: HAS_ENCODER_Z = False HAS_ENCODER_W = False +# Maximum mismatch (in microsteps) tolerated when verifying a filter-wheel +# move against the broadcast W position. Firmware completion guarantees +# exact match for stepper-only setups (operations.cpp:515); the small budget +# only absorbs PID/encoder settling on closed-loop W setups. ±10 ≈ 0.6% of +# one slot (1600 microsteps). +W_POS_TOLERANCE_USTEPS = 10 + # enable PID control ENABLE_PID_X = False ENABLE_PID_Y = False diff --git a/software/control/microcontroller.py b/software/control/microcontroller.py index 635a3b41c..e8748ac5c 100644 --- a/software/control/microcontroller.py +++ b/software/control/microcontroller.py @@ -185,12 +185,22 @@ class SimSerial(AbstractCephlaMicroSerial): # Simulated firmware version # v1.0: multi-port illumination support # v1.1: serial watchdog for illumination auto-shutoff + # v1.2: CMD_EXECUTION_ERROR for failed moves + W position in bytes 19-20 FIRMWARE_VERSION_MAJOR = 1 - FIRMWARE_VERSION_MINOR = 1 + FIRMWARE_VERSION_MINOR = 2 @staticmethod def response_bytes_for( - command_id, execution_status, x, y, z, theta, joystick_button, switch, firmware_version=(1, 1) + command_id, + execution_status, + x, + y, + z, + theta, + joystick_button, + switch, + firmware_version=(1, 2), + w=0, ) -> bytes: """ - byte 0: command ID (1 byte) @@ -200,19 +210,30 @@ def response_bytes_for( - bytes 10-13: Z pos (4 bytes) - bytes 14-17: Theta (4 bytes) - byte 18: buttons and switches (1 byte) - - bytes 19-21: reserved (3 bytes) + - bytes 19-20: W pos as signed int16 big-endian (v1.2+; zero otherwise) + - byte 21: reserved - byte 22: firmware version, nibble-encoded (1 byte) - byte 23: CRC (1 byte) """ crc_calculator = CrcCalculator(Crc8.CCITT, table_based=True) button_state = joystick_button << BIT_POS_JOYSTICK_BUTTON | switch << BIT_POS_SWITCH - # Firmware version is nibble-encoded in byte 22 (last byte of reserved section) - # High nibble = major version, low nibble = minor version version_byte = (firmware_version[0] << 4) | (firmware_version[1] & 0x0F) - reserved_state = version_byte # Byte 22 = version_byte when packed as big-endian int + w_int16 = max(-0x8000, min(0x7FFF, int(w))) if firmware_version >= MIN_FW_VERSION_W_POS_BROADCAST else 0 response = bytearray( - struct.pack(">BBiiiiBi", command_id, execution_status, x, y, z, theta, button_state, reserved_state) + struct.pack( + ">BBiiiiBhBB", + command_id, + execution_status, + x, + y, + z, + theta, + button_state, + w_int16, + 0, # byte 21 reserved + version_byte, + ) ) response.append(crc_calculator.calculate_checksum(response)) return bytes(response) @@ -370,6 +391,7 @@ def _respond_to(self, write_bytes): self.joystick_button, self.switch, firmware_version=(SimSerial.FIRMWARE_VERSION_MAJOR, SimSerial.FIRMWARE_VERSION_MINOR), + w=self.w, ) ) @@ -1460,10 +1482,22 @@ def send_command(self, command): self._warn_if_reads_stale() - def abort_current_command(self, reason): + def abort_current_command(self, reason, recoverable: bool = False): + """Mark the current MCU command as aborted. + + Args: + reason: Human-readable reason; surfaced in the CommandAborted exception. + recoverable: If True, log at WARNING — caller will retry or handle + the failure. If False (default), log at ERROR (operator attention + warranted). + """ cmd_type = self.last_command[1] if self.last_command is not None else -1 cmd_name = _CMD_NAMES.get(cmd_type, f"UNKNOWN({cmd_type})") - self.log.error(f"[MCU] !!! Command {self._cmd_id} ({cmd_name}) ABORTED: {reason}") + msg = f"[MCU] Command {self._cmd_id} ({cmd_name}) aborted: {reason}" + if recoverable: + self.log.warning(msg) + else: + self.log.error(msg) self.last_command_aborted_error = CommandAborted(reason=reason, command_id=self._cmd_id) self.mcu_cmd_execution_in_progress = False @@ -1594,6 +1628,22 @@ def get_msg_with_good_checksum(): else: self.log.error("[MCU] !!! checksum error, resending command") self.resend_last_command() + elif ( + self.mcu_cmd_execution_in_progress + and self._cmd_id_mcu == self._cmd_id + and self._cmd_execution_status == CMD_EXECUTION_STATUS.CMD_EXECUTION_ERROR + ): + # Firmware reported it couldn't execute this command (e.g. + # tmc4361A_moveTo returned non-zero, or a filter-wheel move + # arrived before INITFILTERWHEEL). Fail fast so callers + # don't pay the full 5 s `wait_till_operation_is_completed` + # timeout waiting for a completion that will never arrive. + # Marked recoverable=True so the abort logs at WARNING: + # callers like SquidFilterWheel handle this via re-home + retry. + self.abort_current_command( + reason="firmware reported CMD_EXECUTION_ERROR", + recoverable=True, + ) elif ( self.mcu_cmd_execution_in_progress and self._cmd_id_mcu != self._cmd_id @@ -1617,6 +1667,12 @@ def get_msg_with_good_checksum(): msg[14:18], MicrocontrollerDef.N_BYTES_POS ) # unit: microstep or encoder resolution + # W position broadcast added in firmware v1.2; older firmware + # leaves bytes 19-20 as zero. Callers gate on + # supports_w_pos_broadcast() before trusting w_pos. + if self.firmware_version >= MIN_FW_VERSION_W_POS_BROADCAST: + self.w_pos = self._payload_to_int(msg[RESPONSE_BYTE_W_POS_HI : RESPONSE_BYTE_W_POS_LO + 1], 2) + self.button_and_switch_state = msg[18] # joystick button tmp = self.button_and_switch_state & (1 << BIT_POS_JOYSTICK_BUTTON) @@ -1675,6 +1731,15 @@ def supports_multi_port(self) -> bool: """ return self.firmware_version >= (1, 0) + def supports_w_pos_broadcast(self) -> bool: + """Check if firmware broadcasts the W axis position in status packets. + + Added in firmware v1.2 (bytes 19-20). Callers should use this to gate + any verification that compares ``self.w_pos`` against an expected + value; older firmware leaves those bytes zero. + """ + return self.firmware_version >= MIN_FW_VERSION_W_POS_BROADCAST + def get_button_and_switch_state(self): return self.button_and_switch_state diff --git a/software/squid/filter_wheel_controller/cephla.py b/software/squid/filter_wheel_controller/cephla.py index 480a7ed63..690b17e4e 100644 --- a/software/squid/filter_wheel_controller/cephla.py +++ b/software/squid/filter_wheel_controller/cephla.py @@ -3,7 +3,7 @@ import squid.logging from control._def import * -from control.microcontroller import Microcontroller +from control.microcontroller import CommandAborted, Microcontroller from squid.abc import AbstractFilterWheelController, FilterWheelInfo from squid.config import SquidFilterWheelConfig @@ -70,6 +70,9 @@ def __init__( # The protocol_axis_to_internal() function in firmware handles this conversion. _MOTOR_SLOT_TO_AXIS = {3: AXIS.W, 4: AXIS.W2} + # Errors raised by `_move_and_verify` that the re-home + retry path can recover from. + _RECOVERABLE_MOVE_ERRORS = (TimeoutError, CommandAborted) + def _configure_wheel(self, wheel_id: int, config: SquidFilterWheelConfig): """Configure a single filter wheel motor.""" motor_slot = config.motor_slot_index @@ -90,6 +93,11 @@ def _configure_wheel(self, wheel_id: int, config: SquidFilterWheelConfig): self.microcontroller.configure_stage_pid(axis, config.transitions_per_revolution, ENCODER_FLIP_DIR_W) self.microcontroller.turn_on_stage_pid(axis, ENABLE_PID_W) + @staticmethod + def _delta_to_usteps(delta: float) -> int: + """Microsteps the firmware will be commanded to step for `delta` mm.""" + return int(STAGE_MOVEMENT_SIGN_W * delta / (SCREW_PITCH_W_MM / (MICROSTEPPING_DEFAULT_W * FULLSTEPS_PER_REV_W))) + def _move_wheel(self, wheel_id: int, delta: float): """Move a specific wheel by delta distance. @@ -99,9 +107,7 @@ def _move_wheel(self, wheel_id: int, delta: float): """ config = self._configs[wheel_id] motor_slot = config.motor_slot_index - usteps = int( - STAGE_MOVEMENT_SIGN_W * delta / (SCREW_PITCH_W_MM / (MICROSTEPPING_DEFAULT_W * FULLSTEPS_PER_REV_W)) - ) + usteps = self._delta_to_usteps(delta) if motor_slot == 3: self.microcontroller.move_w_usteps(usteps) @@ -110,21 +116,59 @@ def _move_wheel(self, wheel_id: int, delta: float): else: raise ValueError(f"Unsupported motor_slot_index: {motor_slot}") + def _verify_w_move(self, wheel_id: int, w_pos_before: int, expected_usteps_delta: int) -> None: + """Compare actual broadcast W position against the commanded delta. + + Allows ±W_POS_TOLERANCE_USTEPS of jitter; anything larger means the + move was dropped or partial. Raises TimeoutError on mismatch so + callers fall into the re-home + retry path. + """ + actual_delta = self.microcontroller.w_pos - w_pos_before + if abs(actual_delta - expected_usteps_delta) > W_POS_TOLERANCE_USTEPS: + _log.warning( + f"Filter wheel {wheel_id} W position mismatch after move " + f"(expected delta {expected_usteps_delta} usteps, observed {actual_delta}); " + "treating as silent failure." + ) + raise TimeoutError(f"Filter wheel {wheel_id} did not move as commanded") + + def _move_and_verify(self, wheel_id: int, delta: float, target_pos: int) -> None: + """Single move attempt: command the move, wait for completion, verify + the motor actually moved (when firmware supports W broadcast for the + wheel's axis), update tracked position. Raises TimeoutError / + CommandAborted on failure so callers can re-home and retry. + """ + config = self._configs[wheel_id] + can_verify = config.motor_slot_index == 3 and self.microcontroller.supports_w_pos_broadcast() + if can_verify: + w_pos_before = self.microcontroller.w_pos + expected_usteps_delta = self._delta_to_usteps(delta) + self._move_wheel(wheel_id, delta) + self.microcontroller.wait_till_operation_is_completed() + self._verify_w_move(wheel_id, w_pos_before, expected_usteps_delta) + else: + self._move_wheel(wheel_id, delta) + self.microcontroller.wait_till_operation_is_completed() + self._positions[wheel_id] = target_pos + def _move_to_position(self, wheel_id: int, target_pos: int): - """Move wheel to target position with automatic re-home on failure. + """Move wheel to target position with progressive recovery on failure. - If the movement times out (e.g., motor stall), this method will: - 1. Log a warning - 2. Re-home the wheel to re-synchronize position tracking - 3. Retry the movement to the target position - 4. Raise an exception if retry also fails + Recovery is conditioned on failure type, not unconditional, because a + blind resend can compound partial-motor-stall errors into a silent + desync (motor moves 800/1600, resend moves another 1600, verification + sees delta=1600 ✓ but wheel is one slot past target). - Args: - wheel_id: The ID of the wheel to move. - target_pos: The target position index. + - CommandAborted from CMD_EXECUTION_ERROR: firmware rejected the move + before the motor moved (tmc4361A_moveTo returned non-zero). The + wheel is still at the tracked position, so a plain resend is safe + and avoids the ~4 s re-home cost. + - TimeoutError (ack timeout, or verification mismatch): the motor + state is uncertain — it may have moved partially. Skip the resend + and re-home to re-sync ground truth. Raises: - TimeoutError: If both the initial move and retry after re-home fail. + TimeoutError or CommandAborted: If all attempts fail. """ config = self._configs[wheel_id] current_pos = self._positions[wheel_id] @@ -136,27 +180,29 @@ def _move_to_position(self, wheel_id: int, target_pos: int): delta = (target_pos - current_pos) * step_size try: - self._move_wheel(wheel_id, delta) - self.microcontroller.wait_till_operation_is_completed() - self._positions[wheel_id] = target_pos - except TimeoutError: - _log.warning(f"Filter wheel {wheel_id} movement timed out. " f"Re-homing to re-sync position tracking...") - # Re-home to re-synchronize position tracking - self._home_wheel(wheel_id) - - # Retry the movement (position is now at min_index after homing) - current_pos = self._positions[wheel_id] - delta = (target_pos - current_pos) * step_size + self._move_and_verify(wheel_id, delta, target_pos) + return + except CommandAborted as e: + _log.warning(f"Filter wheel {wheel_id} command aborted ({e}); resending in software...") try: - self._move_wheel(wheel_id, delta) - self.microcontroller.wait_till_operation_is_completed() - self._positions[wheel_id] = target_pos - _log.info(f"Filter wheel {wheel_id} recovery successful, now at position {target_pos}") - except TimeoutError: - _log.error( - f"Filter wheel {wheel_id} movement failed even after re-home. " f"Hardware may need attention." - ) - raise + self._move_and_verify(wheel_id, delta, target_pos) + _log.info(f"Filter wheel {wheel_id} software resend succeeded, now at position {target_pos}") + return + except self._RECOVERABLE_MOVE_ERRORS as e2: + _log.warning(f"Filter wheel {wheel_id} resend also failed ({e2}); re-homing to re-sync...") + except TimeoutError as e: + _log.warning(f"Filter wheel {wheel_id} move uncertain ({e}); re-homing to re-sync...") + + self._home_wheel(wheel_id) + # Position is now at min_index after homing; recompute delta. + current_pos = self._positions[wheel_id] + delta = (target_pos - current_pos) * step_size + try: + self._move_and_verify(wheel_id, delta, target_pos) + _log.info(f"Filter wheel {wheel_id} recovery via re-home succeeded, now at position {target_pos}") + except self._RECOVERABLE_MOVE_ERRORS: + _log.error(f"Filter wheel {wheel_id} movement failed even after re-home. Hardware may need attention.") + raise def _home_wheel(self, wheel_id: int): """Home a specific wheel. diff --git a/software/tests/squid/test_filter_wheel.py b/software/tests/squid/test_filter_wheel.py index 617961f53..c949445e2 100644 --- a/software/tests/squid/test_filter_wheel.py +++ b/software/tests/squid/test_filter_wheel.py @@ -118,3 +118,140 @@ def test_normal_init_configures_encoder_pid(self, mock_microcontroller, squid_co mock_microcontroller.set_pid_arguments.assert_called_once() mock_microcontroller.configure_stage_pid.assert_called_once() mock_microcontroller.turn_on_stage_pid.assert_called_once() + + +class TestSquidFilterWheelWPosVerification: + """Tests for the post-move W position verification path (firmware >= v1.2).""" + + @pytest.fixture + def squid_config(self): + return SquidFilterWheelConfig( + max_index=8, + min_index=1, + offset=0.008, + motor_slot_index=3, + transitions_per_revolution=4000, + ) + + def _build_wheel(self, supports_broadcast: bool, w_pos_after_move: int): + """Construct a SquidFilterWheel whose mocked microcontroller advances + `w_pos` to `w_pos_after_move` whenever `move_w_usteps` is called. + """ + mc = MagicMock() + mc.supports_w_pos_broadcast.return_value = supports_broadcast + mc.w_pos = 0 + + def fake_move_w_usteps(_usteps): + mc.w_pos = w_pos_after_move + + mc.move_w_usteps.side_effect = fake_move_w_usteps + return mc + + def test_verify_passes_when_motor_moves_as_commanded(self, squid_config): + """Move 1 -> 2 (delta = +1600 usteps) and have the broadcast match.""" + mc = self._build_wheel(supports_broadcast=True, w_pos_after_move=1600) + wheel = SquidFilterWheel(mc, squid_config, skip_init=True) + wheel.initialize([1]) + + wheel.set_filter_wheel_position({1: 2}) + + assert wheel.get_filter_wheel_position()[1] == 2 + mc.home_w.assert_not_called() + + def test_verify_triggers_rehome_when_motor_did_not_move(self, squid_config): + """If broadcast W position doesn't change, we should re-home + retry.""" + mc = self._build_wheel(supports_broadcast=True, w_pos_after_move=0) + wheel = SquidFilterWheel(mc, squid_config, skip_init=True) + wheel.initialize([1]) + + with pytest.raises(TimeoutError): + wheel.set_filter_wheel_position({1: 2}) + + # First attempt fails verification -> re-home -> retry still fails. + assert mc.home_w.call_count == 1 + assert mc.move_w_usteps.call_count >= 2 + + def test_verification_skipped_when_firmware_does_not_broadcast(self, squid_config): + """Old firmware: w_pos is unreliable, verification must be skipped + (otherwise the move would falsely look like a silent failure).""" + mc = self._build_wheel(supports_broadcast=False, w_pos_after_move=0) + wheel = SquidFilterWheel(mc, squid_config, skip_init=True) + wheel.initialize([1]) + + wheel.set_filter_wheel_position({1: 2}) + + assert wheel.get_filter_wheel_position()[1] == 2 + mc.home_w.assert_not_called() + + def test_verification_skipped_for_w2_axis(self): + """W2 (motor_slot 4) isn't broadcast yet; verification must be skipped.""" + w2_config = SquidFilterWheelConfig( + max_index=8, + min_index=1, + offset=0.008, + motor_slot_index=4, + transitions_per_revolution=4000, + ) + mc = MagicMock() + mc.supports_w_pos_broadcast.return_value = True + mc.w_pos = 0 # Never advances; would trip verification if it ran. + + wheel = SquidFilterWheel(mc, w2_config, skip_init=True) + wheel.initialize([1]) + wheel.set_filter_wheel_position({1: 2}) + + assert wheel.get_filter_wheel_position()[1] == 2 + mc.home_w2.assert_not_called() + + def test_command_aborted_triggers_software_resend_not_rehome(self, squid_config): + """If the firmware raises CommandAborted (CMD_EXECUTION_ERROR) the motor + never moved, so we should resend in software instead of paying the + ~4 s re-home cost. + """ + from control.microcontroller import CommandAborted + + mc = MagicMock() + mc.supports_w_pos_broadcast.return_value = True + mc.w_pos = 0 + attempts = {"count": 0} + + def fake_move_w_usteps(usteps): + attempts["count"] += 1 + # First attempt fails fast (firmware abort), second attempt succeeds. + if attempts["count"] == 1: + mc.wait_till_operation_is_completed.side_effect = CommandAborted(reason="test", command_id=1) + else: + mc.wait_till_operation_is_completed.side_effect = None + mc.w_pos = usteps + + mc.move_w_usteps.side_effect = fake_move_w_usteps + wheel = SquidFilterWheel(mc, squid_config, skip_init=True) + wheel.initialize([1]) + + wheel.set_filter_wheel_position({1: 2}) + + assert wheel.get_filter_wheel_position()[1] == 2 + mc.home_w.assert_not_called() # Must NOT re-home; software resend is enough. + assert attempts["count"] == 2 # Initial + software resend. + + def test_timeout_skips_resend_and_goes_straight_to_rehome(self, squid_config): + """Verification mismatch (TimeoutError) means motor state is uncertain + (e.g., partial stall) — must skip the bare resend to avoid compounding + the error into a silent desync. Re-home is the only safe recovery. + """ + # w_pos_after_move=0 simulates a "motor didn't reach target" failure; + # verification raises TimeoutError, so we should re-home immediately, + # NOT attempt a bare resend that could push the wheel one slot past target. + mc = self._build_wheel(supports_broadcast=True, w_pos_after_move=0) + wheel = SquidFilterWheel(mc, squid_config, skip_init=True) + wheel.initialize([1]) + initial_move_count = mc.move_w_usteps.call_count + + with pytest.raises(TimeoutError): + wheel.set_filter_wheel_position({1: 2}) + + # Exactly one re-home — not the two attempts a blind-retry ladder would produce. + assert mc.home_w.call_count == 1 + # Move count: 1 (initial attempt) + 1 (offset after home) + 1 (post-home attempt) = 3. + # If the buggy "blind resend" ladder had fired, we'd see >= 4 (initial + resend + offset + post-home). + assert mc.move_w_usteps.call_count - initial_move_count == 3 diff --git a/software/tests/test_multiport_illumination_bugs.py b/software/tests/test_multiport_illumination_bugs.py index 36e3b9a8a..6f51a3db0 100644 --- a/software/tests/test_multiport_illumination_bugs.py +++ b/software/tests/test_multiport_illumination_bugs.py @@ -374,13 +374,13 @@ def test_version_0_0_indicates_legacy(self): mcu = Microcontroller(sim, reset_and_initialize=False) # Before any command, version might be (0, 0) - # After command, SimSerial returns 1.1 + # After command, SimSerial returns its declared version mcu.turn_off_all_ports() mcu.wait_till_operation_is_completed() # Should now have version from SimSerial - assert mcu.firmware_version == (1, 1) + assert mcu.firmware_version == (SimSerial.FIRMWARE_VERSION_MAJOR, SimSerial.FIRMWARE_VERSION_MINOR) mcu.close() def test_supports_multi_port_false_for_v0(self): diff --git a/software/tests/test_multiport_illumination_edge_cases.py b/software/tests/test_multiport_illumination_edge_cases.py index 3e6092890..9782443ea 100644 --- a/software/tests/test_multiport_illumination_edge_cases.py +++ b/software/tests/test_multiport_illumination_edge_cases.py @@ -249,12 +249,11 @@ def mcu(self): def test_firmware_version_detected_at_init(self, mcu): """Firmware version should be detected during Microcontroller init.""" # Version is read from response byte 22 (nibble-encoded) - # SimSerial reports version 1.1 (multi-port + watchdog) # Early detection sends TURN_OFF_ALL_PORTS during __init__ assert hasattr(mcu, "firmware_version") assert mcu.firmware_version is not None # Version should already be populated - no need to send a command first - assert mcu.firmware_version == (1, 1) + assert mcu.firmware_version == (SimSerial.FIRMWARE_VERSION_MAJOR, SimSerial.FIRMWARE_VERSION_MINOR) def test_supports_multi_port_accurate_at_init(self, mcu): """supports_multi_port() should return accurate result immediately after init.""" diff --git a/software/tests/test_multiport_illumination_protocol.py b/software/tests/test_multiport_illumination_protocol.py index b9dd39d25..aa3210087 100644 --- a/software/tests/test_multiport_illumination_protocol.py +++ b/software/tests/test_multiport_illumination_protocol.py @@ -484,8 +484,8 @@ def test_version_detected_after_any_command(self, mcu): mcu.turn_off_all_ports() mcu.wait_till_operation_is_completed() - # Now version should be detected (SimSerial reports 1.1) - assert mcu.firmware_version == (1, 1) + # Now version should be detected from SimSerial + assert mcu.firmware_version == (SimSerial.FIRMWARE_VERSION_MAJOR, SimSerial.FIRMWARE_VERSION_MINOR) def test_supports_multi_port_true_for_v1(self, mcu): """supports_multi_port() should return True for v1.0+.""" @@ -507,7 +507,8 @@ def test_version_persists_across_commands(self, mcu): mcu.wait_till_operation_is_completed() v3 = mcu.firmware_version - assert v1 == v2 == v3 == (1, 1) + expected = (SimSerial.FIRMWARE_VERSION_MAJOR, SimSerial.FIRMWARE_VERSION_MINOR) + assert v1 == v2 == v3 == expected class TestMultiPortMaskEdgeCases: diff --git a/software/tests/test_watchdog.py b/software/tests/test_watchdog.py index ce5d35899..da72ca084 100644 --- a/software/tests/test_watchdog.py +++ b/software/tests/test_watchdog.py @@ -125,8 +125,8 @@ def test_double_start_stops_first(self, mcu): class TestFirmwareVersionForWatchdog: - def test_version_detected_as_1_1(self, mcu): - """SimSerial should report firmware version 1.1.""" + def test_version_detected_as_latest(self, mcu): + """SimSerial should report the latest firmware version.""" mcu.turn_off_all_ports() mcu.wait_till_operation_is_completed() - assert mcu.firmware_version == (1, 1) + assert mcu.firmware_version == (SimSerial.FIRMWARE_VERSION_MAJOR, SimSerial.FIRMWARE_VERSION_MINOR)