From ce4479d704e54585fe4583a4cf5699f099f83458 Mon Sep 17 00:00:00 2001 From: arpbansal Date: Wed, 18 Mar 2026 17:36:34 +0000 Subject: [PATCH 1/2] log added for gpu-container issue --- src/gen_worker/pipeline_loader.py | 27 +++++++++++++++++++++++++++ src/gen_worker/worker.py | 16 ++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/src/gen_worker/pipeline_loader.py b/src/gen_worker/pipeline_loader.py index ea20b2f..69c8ca3 100644 --- a/src/gen_worker/pipeline_loader.py +++ b/src/gen_worker/pipeline_loader.py @@ -1525,12 +1525,39 @@ async def load( # Move to device with OOM handling try: if config.device == "cuda" and torch.cuda.is_available(): + logger.info( + "Moving pipeline to CUDA for %s (%.1f GB) ...", + model_id, + model_size_gb, + ) pipeline = pipeline.to("cuda") + logger.info("Pipeline moved to CUDA successfully for %s", model_id) + else: + logger.warning( + "CUDA not available (device=%s, cuda.is_available=%s), " + "pipeline will remain on CPU for %s", + config.device, + torch.cuda.is_available(), + model_id, + ) except torch.cuda.OutOfMemoryError as e: flush_memory() raise CudaOutOfMemoryError( model_id, model_size_gb, get_available_vram_gb() ) from e + except RuntimeError as e: + logger.error( + "CUDA RuntimeError moving %s to GPU: %s — falling back to CPU", + model_id, + e, + ) + except Exception as e: + logger.error( + "Unexpected error moving %s to GPU (%s: %s) — falling back to CPU", + model_id, + type(e).__name__, + e, + ) # Apply VAE optimizations (always enabled) if config.enable_vae_tiling or config.enable_vae_slicing: diff --git a/src/gen_worker/worker.py b/src/gen_worker/worker.py index 351c359..565ce6e 100644 --- a/src/gen_worker/worker.py +++ b/src/gen_worker/worker.py @@ -4666,7 +4666,15 @@ def _resolve_injected_value(self, ctx: RequestContext, requested_type: Any, mode model_source = str(model_id) preload_kwargs = {} + logger.info( + "Loading from_pretrained: source=%s type=%s kwargs=%s", + model_source, type_qualname(requested_type), list(preload_kwargs.keys()), + ) obj = from_pretrained(model_source, **preload_kwargs) + logger.info( + "from_pretrained complete: source=%s (%.1fs)", + model_source, time.monotonic() - t_pi0, + ) if rm is not None: rm.add_pipeline_init_time(int((time.monotonic() - t_pi0) * 1000)) if isinstance(requested_type, type) and not isinstance(obj, requested_type): @@ -4704,6 +4712,10 @@ def _resolve_injected_value(self, ctx: RequestContext, requested_type: Any, mode torch_dtype = kwargs.get("torch_dtype") if isinstance(kwargs, dict) else None except Exception: torch_dtype = None + logger.info( + "Moving model to device=%s dtype=%s model=%s ...", + str(ctx.device), torch_dtype, model_id, + ) try: if torch_dtype is not None: obj = obj.to(str(ctx.device), dtype=torch_dtype) @@ -4712,6 +4724,10 @@ def _resolve_injected_value(self, ctx: RequestContext, requested_type: Any, mode except TypeError: # Some objects implement .to(device) but not dtype kwarg. obj = obj.to(str(ctx.device)) + logger.info( + "Model moved to device=%s successfully model=%s", + str(ctx.device), model_id, + ) if rm is not None: rm.add_gpu_load_time(int((time.monotonic() - t_to0) * 1000)) From 7b32aaaf75a8409679790c52184363be68e8a562 Mon Sep 17 00:00:00 2001 From: arpbansal Date: Thu, 19 Mar 2026 18:01:47 +0000 Subject: [PATCH 2/2] shows silent failure in model loading --- src/gen_worker/worker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gen_worker/worker.py b/src/gen_worker/worker.py index 565ce6e..3dfab72 100644 --- a/src/gen_worker/worker.py +++ b/src/gen_worker/worker.py @@ -4120,6 +4120,7 @@ async def consume_async() -> None: logger.info("Task %s completed successfully.", request_id) except Exception as e: + logger.exception("Task %s failed: %s", request_id, e) error_type, retryable, safe_message, error_message = self._map_exception(e) if inference_watchdog is not None: inference_watchdog.cancel()