From 232422dabb00deee8fe54d6fe0edce0d46692e56 Mon Sep 17 00:00:00 2001 From: You Yan Date: Thu, 16 Apr 2026 18:02:12 -0700 Subject: [PATCH 1/5] fix: detect JobRunner subprocess death and stop memory-profiler ESRCH spam A JobRunner subprocess dying mid-acquisition (e.g. segfault in a fork()-unsafe native library) was previously undetected: the parent kept dispatching save jobs into a queue nothing was consuming, and the acquisition appeared to complete normally while hundreds of timepoints were lost. The only visible symptom was a flood of memory-profiler tracebacks about /proc//smaps_rollup. - JobRunner: spawn a daemon watchdog thread in start() that blocks on self.sentinel. When the subprocess exits, it distinguishes expected shutdown (via _shutdown_event) from unexpected death and invokes a registered handler with the exitcode. kill() now also sets _shutdown_event so intentional termination is not flagged. - MultiPointWorker: registers a handler that logs the failure and calls request_abort_fn(), so the acquisition loop exits on the next abort check instead of silently rotting for hours. - memory_profiler._get_linux_pss_mb: catch ProcessLookupError in addition to FileNotFoundError/PermissionError/ValueError. Reading /proc//smaps_rollup returns ESRCH, which is a sibling of FileNotFoundError under OSError and was not being caught. Co-Authored-By: Claude Opus 4.7 (1M context) --- software/control/core/job_processing.py | 55 ++++++++++++++++++++- software/control/core/memory_profiler.py | 4 +- software/control/core/multi_point_worker.py | 10 ++++ 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/software/control/core/job_processing.py b/software/control/core/job_processing.py index bcf183df0..6288b3c10 100644 --- a/software/control/core/job_processing.py +++ b/software/control/core/job_processing.py @@ -1,12 +1,14 @@ import abc import multiprocessing +import multiprocessing.connection import queue import os +import threading import time import json from datetime import datetime from contextlib import contextmanager -from typing import ClassVar, Dict, Generic, List, Optional, Set, Tuple, TypeVar, Union +from typing import Callable, ClassVar, Dict, Generic, List, Optional, Set, Tuple, TypeVar, Union from uuid import uuid4 from dataclasses import dataclass, field @@ -1064,6 +1066,12 @@ def __init__( self._bp_pending_bytes = bp_pending_bytes self._bp_capacity_event = bp_capacity_event + # Watchdog for detecting unexpected subprocess death (segfault, OOM kill, etc.). + # Without this, a dead JobRunner silently rots the acquisition: the parent keeps + # queuing save jobs that no one consumes. + self._on_unexpected_exit: Optional[Callable[[Optional[int]], None]] = None + self._watchdog: Optional[threading.Thread] = None + # Clean up stale metadata files from previous crashed acquisitions # Only run when explicitly requested (i.e., when OME-TIFF saving is being used) if cleanup_stale_ome_files: @@ -1071,6 +1079,51 @@ def __init__( if removed: self._log.info(f"Cleaned up {len(removed)} stale OME-TIFF metadata files") + def set_unexpected_exit_handler(self, handler: Optional[Callable[[Optional[int]], None]]) -> None: + """Register a callback to invoke if the subprocess dies without a clean shutdown. + + The handler is called from the watchdog thread with the subprocess exitcode + (which may be None, a positive int, or a negative signal number on POSIX). + """ + self._on_unexpected_exit = handler + + def start(self): + super().start() + # Watchdog must start after super().start() so self.pid and self.sentinel are set. + self._watchdog = threading.Thread( + target=self._watch_subprocess, + daemon=True, + name=f"JobRunner-watchdog[{self.pid}]", + ) + self._watchdog.start() + + def kill(self): + # Mark as expected so the watchdog treats the exit as intentional. + if self._shutdown_event is not None: + self._shutdown_event.set() + super().kill() + + def _watch_subprocess(self) -> None: + """Block until the subprocess exits, then distinguish expected vs. unexpected death.""" + # Capture references; shutdown() clears self._shutdown_event after join(). + shutdown_event = self._shutdown_event + pid = self.pid + multiprocessing.connection.wait([self.sentinel]) + exitcode = self.exitcode + if shutdown_event is not None and shutdown_event.is_set(): + self._log.info(f"JobRunner PID={pid} exited cleanly (exitcode={exitcode})") + return + self._log.error( + f"JobRunner PID={pid} died UNEXPECTEDLY (exitcode={exitcode}). " + f"Pending save jobs will not complete." + ) + handler = self._on_unexpected_exit + if handler is not None: + try: + handler(exitcode) + except Exception: + self._log.exception("JobRunner unexpected-exit handler raised") + def dispatch(self, job: Job): # Inject acquisition_info into SaveOMETiffJob instances before serialization. # The job object is pickled when placed in the queue, so injection must happen here. diff --git a/software/control/core/memory_profiler.py b/software/control/core/memory_profiler.py index 8d6ab5421..92dc20212 100644 --- a/software/control/core/memory_profiler.py +++ b/software/control/core/memory_profiler.py @@ -259,7 +259,9 @@ def _get_linux_pss_mb(pid: int) -> float: pss_total_kb += int(parts[1]) return pss_total_kb / 1024 - except (FileNotFoundError, PermissionError, ValueError): + except (FileNotFoundError, PermissionError, ValueError, ProcessLookupError): + # ProcessLookupError (errno ESRCH) can occur when the process exited between + # PID enumeration and the smaps_rollup read, or when the PID is a zombie. pass return 0.0 diff --git a/software/control/core/multi_point_worker.py b/software/control/core/multi_point_worker.py index fa5fabb71..30c75faca 100644 --- a/software/control/core/multi_point_worker.py +++ b/software/control/core/multi_point_worker.py @@ -370,9 +370,19 @@ def __init__( # Subprocess starts warming up in background - don't block here self._job_runners.append((job_class, job_runner)) + if job_runner is not None: + job_runner.set_unexpected_exit_handler(self._on_job_runner_died) self._abort_on_failed_job = abort_on_failed_jobs self._first_job_dispatched = False # Track if we've waited for subprocess warmup + def _on_job_runner_died(self, exitcode: Optional[int]) -> None: + """Invoked by JobRunner's watchdog when a subprocess dies unexpectedly.""" + self._log.error( + f"JobRunner subprocess died unexpectedly (exitcode={exitcode}); aborting acquisition." + ) + self._acquisition_error_count += 1 + self.request_abort_fn() + def update_use_piezo(self, value): self.use_piezo = value self._log.info(f"MultiPointWorker: updated use_piezo to {value}") From 3d8058d240290c61c29824667dc9fb2821158742 Mon Sep 17 00:00:00 2001 From: You Yan Date: Thu, 16 Apr 2026 18:14:12 -0700 Subject: [PATCH 2/5] style: apply Black formatting Co-Authored-By: Claude Opus 4.7 (1M context) --- software/control/core/job_processing.py | 3 +-- software/control/core/multi_point_worker.py | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/software/control/core/job_processing.py b/software/control/core/job_processing.py index 6288b3c10..64007f65d 100644 --- a/software/control/core/job_processing.py +++ b/software/control/core/job_processing.py @@ -1114,8 +1114,7 @@ def _watch_subprocess(self) -> None: self._log.info(f"JobRunner PID={pid} exited cleanly (exitcode={exitcode})") return self._log.error( - f"JobRunner PID={pid} died UNEXPECTEDLY (exitcode={exitcode}). " - f"Pending save jobs will not complete." + f"JobRunner PID={pid} died UNEXPECTEDLY (exitcode={exitcode}). " f"Pending save jobs will not complete." ) handler = self._on_unexpected_exit if handler is not None: diff --git a/software/control/core/multi_point_worker.py b/software/control/core/multi_point_worker.py index 30c75faca..3afb47325 100644 --- a/software/control/core/multi_point_worker.py +++ b/software/control/core/multi_point_worker.py @@ -377,9 +377,7 @@ def __init__( def _on_job_runner_died(self, exitcode: Optional[int]) -> None: """Invoked by JobRunner's watchdog when a subprocess dies unexpectedly.""" - self._log.error( - f"JobRunner subprocess died unexpectedly (exitcode={exitcode}); aborting acquisition." - ) + self._log.error(f"JobRunner subprocess died unexpectedly (exitcode={exitcode}); aborting acquisition.") self._acquisition_error_count += 1 self.request_abort_fn() From 5267dd4caed3d019479fcfdbc712192c10b50419 Mon Sep 17 00:00:00 2001 From: You Yan Date: Fri, 17 Apr 2026 11:07:27 -0700 Subject: [PATCH 3/5] =?UTF-8?q?fix:=20address=20Copilot=20review=20?= =?UTF-8?q?=E2=80=94=20terminate()=20+=20early=20handler=20registration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Override JobRunner.terminate() to set _shutdown_event so an intentional terminate() (e.g., from MultiPointController.close) is not reported as "died UNEXPECTEDLY" by the watchdog. Matches the kill() override. - Register the unexpected-exit handler on freshly created JobRunners before start(), eliminating the race where the subprocess could die during warmup before the handler was installed. - Register the handler on pre-warmed runners the moment MultiPointWorker adopts them (before set_acquisition_info), narrowing the uncovered window to the pre-handoff phase only. Co-Authored-By: Claude Opus 4.7 (1M context) --- software/control/core/job_processing.py | 6 ++++++ software/control/core/multi_point_worker.py | 10 ++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/software/control/core/job_processing.py b/software/control/core/job_processing.py index 64007f65d..8590f1db0 100644 --- a/software/control/core/job_processing.py +++ b/software/control/core/job_processing.py @@ -1103,6 +1103,12 @@ def kill(self): self._shutdown_event.set() super().kill() + def terminate(self): + # Mark as expected so the watchdog treats the exit as intentional. + if self._shutdown_event is not None: + self._shutdown_event.set() + super().terminate() + def _watch_subprocess(self) -> None: """Block until the subprocess exits, then distinguish expected vs. unexpected death.""" # Capture references; shutdown() clears self._shutdown_event after join(). diff --git a/software/control/core/multi_point_worker.py b/software/control/core/multi_point_worker.py index 3afb47325..5ce756778 100644 --- a/software/control/core/multi_point_worker.py +++ b/software/control/core/multi_point_worker.py @@ -335,6 +335,11 @@ def __init__( if prewarmed_job_runner.is_ready(): self._log.info(f"Using pre-warmed job runner for {job_class.__name__} jobs") job_runner = prewarmed_job_runner + # Register abort handler as early as possible on adoption. The + # window between controller-side start() and this point remains + # uncovered for pre-warmed runners, but beyond here any + # unexpected death triggers an abort. + job_runner.set_unexpected_exit_handler(self._on_job_runner_died) # Configure it with current acquisition settings job_runner.set_acquisition_info(self.acquisition_info) if zarr_writer_info: @@ -366,12 +371,13 @@ def __init__( # Pass zarr writer info for ZARR_V3 format zarr_writer_info=zarr_writer_info, ) + # Register abort handler before start() so the watchdog always has + # a handler available, even if the subprocess dies during warmup. + job_runner.set_unexpected_exit_handler(self._on_job_runner_died) job_runner.start() # Subprocess starts warming up in background - don't block here self._job_runners.append((job_class, job_runner)) - if job_runner is not None: - job_runner.set_unexpected_exit_handler(self._on_job_runner_died) self._abort_on_failed_job = abort_on_failed_jobs self._first_job_dispatched = False # Track if we've waited for subprocess warmup From 24751ec9478365503ea3a7a19cbfc479fd804349 Mon Sep 17 00:00:00 2001 From: You Yan Date: Sat, 18 Apr 2026 00:03:53 -0700 Subject: [PATCH 4/5] refactor: simplify watchdog intent tracking and trim comments - Replace the "shutdown_event is set" signal with a dedicated _intentional_exit bool. _shutdown_event is a multiprocessing primitive that shutdown() nulls during cleanup, which opened a narrow race where the watchdog could read None and misclassify an intentional shutdown as unexpected death. The new flag survives cleanup and is set by all three explicit-stop paths: kill(), terminate(), and shutdown(). - Drop narrative comments that restate what the code already says, per project style. Keep the WHY comment about _intentional_exit vs _shutdown_event. - Collapse a split f-string log message to a single literal. Co-Authored-By: Claude Opus 4.7 (1M context) --- software/control/core/job_processing.py | 23 ++++++++------------- software/control/core/multi_point_worker.py | 9 +++----- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/software/control/core/job_processing.py b/software/control/core/job_processing.py index 8590f1db0..8a72615d2 100644 --- a/software/control/core/job_processing.py +++ b/software/control/core/job_processing.py @@ -1066,11 +1066,12 @@ def __init__( self._bp_pending_bytes = bp_pending_bytes self._bp_capacity_event = bp_capacity_event - # Watchdog for detecting unexpected subprocess death (segfault, OOM kill, etc.). - # Without this, a dead JobRunner silently rots the acquisition: the parent keeps - # queuing save jobs that no one consumes. self._on_unexpected_exit: Optional[Callable[[Optional[int]], None]] = None self._watchdog: Optional[threading.Thread] = None + # Set by kill()/terminate()/shutdown() so the watchdog can distinguish + # intentional exit from segfault/OOM. Separate from _shutdown_event, which + # is a multiprocessing primitive that shutdown() nulls during cleanup. + self._intentional_exit = False # Clean up stale metadata files from previous crashed acquisitions # Only run when explicitly requested (i.e., when OME-TIFF saving is being used) @@ -1089,7 +1090,6 @@ def set_unexpected_exit_handler(self, handler: Optional[Callable[[Optional[int]] def start(self): super().start() - # Watchdog must start after super().start() so self.pid and self.sentinel are set. self._watchdog = threading.Thread( target=self._watch_subprocess, daemon=True, @@ -1098,29 +1098,23 @@ def start(self): self._watchdog.start() def kill(self): - # Mark as expected so the watchdog treats the exit as intentional. - if self._shutdown_event is not None: - self._shutdown_event.set() + self._intentional_exit = True super().kill() def terminate(self): - # Mark as expected so the watchdog treats the exit as intentional. - if self._shutdown_event is not None: - self._shutdown_event.set() + self._intentional_exit = True super().terminate() def _watch_subprocess(self) -> None: """Block until the subprocess exits, then distinguish expected vs. unexpected death.""" - # Capture references; shutdown() clears self._shutdown_event after join(). - shutdown_event = self._shutdown_event pid = self.pid multiprocessing.connection.wait([self.sentinel]) exitcode = self.exitcode - if shutdown_event is not None and shutdown_event.is_set(): + if self._intentional_exit: self._log.info(f"JobRunner PID={pid} exited cleanly (exitcode={exitcode})") return self._log.error( - f"JobRunner PID={pid} died UNEXPECTEDLY (exitcode={exitcode}). " f"Pending save jobs will not complete." + f"JobRunner PID={pid} died UNEXPECTEDLY (exitcode={exitcode}). Pending save jobs will not complete." ) handler = self._on_unexpected_exit if handler is not None: @@ -1232,6 +1226,7 @@ def shutdown(self, timeout_s=1.0): # Guard against double shutdown if self._shutdown_event is None: return + self._intentional_exit = True self._shutdown_event.set() # Send sentinel to wake up worker blocked on queue.get() try: diff --git a/software/control/core/multi_point_worker.py b/software/control/core/multi_point_worker.py index 5ce756778..5786d09ef 100644 --- a/software/control/core/multi_point_worker.py +++ b/software/control/core/multi_point_worker.py @@ -335,10 +335,8 @@ def __init__( if prewarmed_job_runner.is_ready(): self._log.info(f"Using pre-warmed job runner for {job_class.__name__} jobs") job_runner = prewarmed_job_runner - # Register abort handler as early as possible on adoption. The - # window between controller-side start() and this point remains - # uncovered for pre-warmed runners, but beyond here any - # unexpected death triggers an abort. + # Pre-warmed runners were started by the controller without a + # handler; the pre-handoff window stays uncovered by design. job_runner.set_unexpected_exit_handler(self._on_job_runner_died) # Configure it with current acquisition settings job_runner.set_acquisition_info(self.acquisition_info) @@ -371,8 +369,7 @@ def __init__( # Pass zarr writer info for ZARR_V3 format zarr_writer_info=zarr_writer_info, ) - # Register abort handler before start() so the watchdog always has - # a handler available, even if the subprocess dies during warmup. + # Must precede start() so the watchdog covers warmup-time deaths. job_runner.set_unexpected_exit_handler(self._on_job_runner_died) job_runner.start() # Subprocess starts warming up in background - don't block here From 9ae07e330334f80005e263c82a94a990df15cb27 Mon Sep 17 00:00:00 2001 From: You Yan Date: Sun, 26 Apr 2026 21:09:12 -0700 Subject: [PATCH 5/5] test: add watchdog regression tests; close pre-warm adoption window - Add tests/control/core/test_job_runner_watchdog.py covering SIGKILL detection, intentional kill/terminate/shutdown suppression, handler exception isolation, and the _intentional_exit-survives-shutdown regression from commit 24751ec9. - multi_point_worker: check is_alive() alongside is_ready() before adopting a pre-warmed runner. is_ready() reads a multiprocessing.Event the subprocess sets early in run(); once the subprocess dies the Event remains set in shared memory, so is_ready() alone can't distinguish a live runner from a corpse. Without is_alive(), a runner that segfaults during pre-warm would be adopted into the acquisition and resume the silent-rot failure mode this PR is meant to fix. Co-Authored-By: Claude Opus 4.7 (1M context) --- software/control/core/multi_point_worker.py | 13 +- .../control/core/test_job_runner_watchdog.py | 147 ++++++++++++++++++ 2 files changed, 153 insertions(+), 7 deletions(-) create mode 100644 software/tests/control/core/test_job_runner_watchdog.py diff --git a/software/control/core/multi_point_worker.py b/software/control/core/multi_point_worker.py index 5786d09ef..3df033581 100644 --- a/software/control/core/multi_point_worker.py +++ b/software/control/core/multi_point_worker.py @@ -332,27 +332,26 @@ def __init__( if Acquisition.USE_MULTIPROCESSING: # Try to use pre-warmed runner for the first job class if can_use_prewarmed and not used_prewarmed: - if prewarmed_job_runner.is_ready(): + # is_alive() must be checked alongside is_ready(): the subprocess sets + # _ready_event early in run() and the Event survives in shared memory + # after death, so is_ready() alone can't detect a corpse. + if prewarmed_job_runner.is_alive() and prewarmed_job_runner.is_ready(): self._log.info(f"Using pre-warmed job runner for {job_class.__name__} jobs") job_runner = prewarmed_job_runner - # Pre-warmed runners were started by the controller without a - # handler; the pre-handoff window stays uncovered by design. job_runner.set_unexpected_exit_handler(self._on_job_runner_died) - # Configure it with current acquisition settings job_runner.set_acquisition_info(self.acquisition_info) if zarr_writer_info: job_runner.set_zarr_writer_info(zarr_writer_info) used_prewarmed = True else: self._log.warning( - f"Pre-warmed job runner not ready (possibly hung during warmup), " + f"Pre-warmed job runner unavailable (died or hung during warmup); " f"shutting it down and creating new one for {job_class.__name__}" ) - # Shutdown the hung pre-warmed runner to avoid resource leak try: prewarmed_job_runner.shutdown(timeout_s=1.0) except Exception as e: - self._log.error(f"Error shutting down hung pre-warmed runner: {e}") + self._log.error(f"Error shutting down unusable pre-warmed runner: {e}") # Don't try to use pre-warmed runner again for subsequent job classes can_use_prewarmed = False diff --git a/software/tests/control/core/test_job_runner_watchdog.py b/software/tests/control/core/test_job_runner_watchdog.py new file mode 100644 index 000000000..cde9dacb3 --- /dev/null +++ b/software/tests/control/core/test_job_runner_watchdog.py @@ -0,0 +1,147 @@ +"""Tests for JobRunner watchdog (unexpected subprocess death detection). + +These tests cover the watchdog thread that distinguishes intentional shutdown +from unexpected subprocess death (segfault, SIGKILL, OOM kill) and invokes a +registered handler so an acquisition can abort instead of silently rotting. +""" + +import os +import signal +import threading +import time + +import pytest + +from control.core.job_processing import JobRunner + + +@pytest.fixture +def runner(): + """Provide an unstarted JobRunner; ensure cleanup even if the test crashes mid-run.""" + r = JobRunner() + r.daemon = True + yield r + if r.is_alive(): + try: + r.kill() + r.join(timeout=2.0) + except Exception: + pass + + +# Watchdog runs in a daemon thread; allow it to finish after the sentinel fires. +_WATCHDOG_GRACE_S = 0.3 + + +class TestWatchdogUnexpectedDeath: + """Verify the watchdog detects unexpected subprocess death and invokes the handler.""" + + def test_sigkill_fires_handler_with_negative_exitcode(self, runner): + handler_fired = threading.Event() + received_exitcode = [] + + def handler(exitcode): + received_exitcode.append(exitcode) + handler_fired.set() + + runner.set_unexpected_exit_handler(handler) + runner.start() + assert runner.wait_ready(timeout_s=5.0) + + os.kill(runner.pid, signal.SIGKILL) + + assert handler_fired.wait(timeout=5.0), "Watchdog handler did not fire after SIGKILL" + assert received_exitcode == [-signal.SIGKILL] + + +class TestWatchdogIntentionalExit: + """Verify intentional stop paths (kill/terminate/shutdown) do NOT fire the handler.""" + + def test_kill_does_not_fire_handler(self, runner): + handler_fired = threading.Event() + runner.set_unexpected_exit_handler(lambda ec: handler_fired.set()) + runner.start() + assert runner.wait_ready(timeout_s=5.0) + + runner.kill() + runner.join(timeout=2.0) + time.sleep(_WATCHDOG_GRACE_S) + + assert not handler_fired.is_set(), "Handler fired despite intentional kill()" + + def test_terminate_does_not_fire_handler(self, runner): + handler_fired = threading.Event() + runner.set_unexpected_exit_handler(lambda ec: handler_fired.set()) + runner.start() + assert runner.wait_ready(timeout_s=5.0) + + runner.terminate() + runner.join(timeout=2.0) + time.sleep(_WATCHDOG_GRACE_S) + + assert not handler_fired.is_set(), "Handler fired despite intentional terminate()" + + def test_shutdown_does_not_fire_handler(self, runner): + handler_fired = threading.Event() + runner.set_unexpected_exit_handler(lambda ec: handler_fired.set()) + runner.start() + assert runner.wait_ready(timeout_s=5.0) + + runner.shutdown(timeout_s=2.0) + time.sleep(_WATCHDOG_GRACE_S) + + assert not handler_fired.is_set(), "Handler fired despite intentional shutdown()" + + +class TestWatchdogResilience: + """Verify the watchdog is robust to handler misbehavior and shutdown ordering.""" + + def test_handler_exception_does_not_propagate(self, runner): + # The watchdog daemon thread must catch handler exceptions (it logs them). + # If propagation happened, the test process would not reach the post-join asserts. + runner.set_unexpected_exit_handler(lambda ec: (_ for _ in ()).throw(RuntimeError("boom"))) + runner.start() + assert runner.wait_ready(timeout_s=5.0) + + os.kill(runner.pid, signal.SIGKILL) + runner.join(timeout=5.0) + time.sleep(_WATCHDOG_GRACE_S) + + assert not runner.is_alive() + + def test_intentional_exit_survives_shutdown_cleanup(self, runner): + """Regression: shutdown() nulls _shutdown_event during cleanup. The intent flag + must be a separate attribute that survives that nullification, or the watchdog + could read None and misclassify intentional shutdown as unexpected death. + """ + handler_fired = threading.Event() + runner.set_unexpected_exit_handler(lambda ec: handler_fired.set()) + runner.start() + assert runner.wait_ready(timeout_s=5.0) + + runner.shutdown(timeout_s=2.0) + + assert runner._intentional_exit is True + assert runner._shutdown_event is None + + time.sleep(_WATCHDOG_GRACE_S) + assert not handler_fired.is_set() + + +class TestPreWarmedAdoption: + """Document the load-bearing assumption behind the is_alive() check at adoption.""" + + def test_is_ready_returns_true_for_dead_subprocess(self, runner): + """is_ready() reads a multiprocessing.Event the subprocess sets early in run(). + After SIGKILL the Event remains set in shared memory, so is_ready() alone cannot + distinguish a live runner from a corpse. is_alive() must also be checked before + adopting a pre-warmed runner. + """ + runner.start() + assert runner.wait_ready(timeout_s=5.0) + + os.kill(runner.pid, signal.SIGKILL) + runner.join(timeout=5.0) + + assert runner.is_ready() is True, "is_ready() should still report True even after death" + assert runner.is_alive() is False, "is_alive() should report False after death"