From ee9b3640491eec49fd5fa01c69c34b09d9f6e835 Mon Sep 17 00:00:00 2001 From: Anand Ray Date: Mon, 1 Jun 2026 22:13:07 +0000 Subject: [PATCH 1/2] fix(whisper): bound ctranslate2 version guard + protect explicit CUDA devices Addresses two post-merge nits from kwit75 on #1043: 1. Narrow version guard to (4,7) <= ct2 < (4,8) so the restriction auto-lifts when ctranslate2 4.8+ ships the cuBLAS 12.8.4 fix, rather than trapping 4.8/4.9/5.x indefinitely. 2. Run _check_gpu_compatible() for explicit device='cuda'/'cuda:N' in local mode, not just for auto-detect (device=None). The SIGABRT can occur regardless of how the CUDA device was selected. Co-Authored-By: Claude Sonnet 4.6 --- packages/ai/src/ai/common/models/audio/whisper.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/packages/ai/src/ai/common/models/audio/whisper.py b/packages/ai/src/ai/common/models/audio/whisper.py index 3fc3674a2..40546efa7 100644 --- a/packages/ai/src/ai/common/models/audio/whisper.py +++ b/packages/ai/src/ai/common/models/audio/whisper.py @@ -89,7 +89,8 @@ def _check_gpu_compatible(cls) -> bool: # 1. Version guard: ctranslate2 4.7.x + CUDA 12.8 causes a # tcache_thread_shutdown() SIGABRT during GPU transcription on H200 # (heap corruption in cuBLAS 12.8.4). Exit non-zero to force CPU. - # Remove this guard once ctranslate2 ships a fix. + # Upper bound at 4.8 so the guard lifts automatically once + # ctranslate2 ships a fix (expected in 4.8+). # 2. StorageView sanity: verify a CUDA StorageView can be created via # the documented from_array() API (no direct (shape,dtype,device) # constructor exists in the Python bindings). @@ -102,7 +103,7 @@ def _check_gpu_compatible(cls) -> bool: 'except (ValueError, AttributeError):\n' ' ct2 = (999, 999)\n' 'cuda = torch.version.cuda or ""\n' - 'if ct2 >= (4, 7) and cuda.startswith("12.8"):\n' + 'if (4, 7) <= ct2 < (4, 8) and cuda.startswith("12.8"):\n' ' sys.exit(1)\n' 't = torch.zeros(1, dtype=torch.float32, device="cuda")\n' 'sv = ctranslate2.StorageView.from_array(t)\n' @@ -213,6 +214,15 @@ def load( device = 'cuda' else: device = 'cpu' + elif device != 'cpu' and not WhisperLoader._check_gpu_compatible(): + # Explicit cuda / cuda:N requested but probe failed — fall back to CPU + # so the same SIGABRT protection applies regardless of how the caller + # specified the device. + logger.warning( + 'ctranslate2 CUDA probe failed for explicit device=%r — Whisper will use CPU instead.', + device, + ) + device = 'cpu' if device == 'cpu': gpu_index = -1 From 1c5a953dbf66f7dd33392c318ad022cd510c7e7e Mon Sep 17 00:00:00 2001 From: Anand Ray Date: Mon, 1 Jun 2026 22:34:53 +0000 Subject: [PATCH 2/2] ci: retrigger CI (Windows HuggingFace network flake)