diff --git a/cloud_pipelines_backend/launchers/kubernetes_launchers.py b/cloud_pipelines_backend/launchers/kubernetes_launchers.py index 77267b6..60f6a93 100644 --- a/cloud_pipelines_backend/launchers/kubernetes_launchers.py +++ b/cloud_pipelines_backend/launchers/kubernetes_launchers.py @@ -501,6 +501,11 @@ def launch_container_task( body=pod, _request_timeout=self._request_timeout, ) + except k8s_client_lib.ApiException as ex: + k8s_message = _extract_kubernetes_error_message(ex) + raise interfaces.LauncherError( + k8s_message or f"Failed to create pod: Kubernetes API error ({ex.status} {ex.reason})" + ) from ex except Exception as ex: raise interfaces.LauncherError( f"Failed to create pod: {_kubernetes_serialize(pod)}" @@ -1138,6 +1143,11 @@ def launch_container_task( body=job, _request_timeout=self._request_timeout, ) + except k8s_client_lib.ApiException as ex: + k8s_message = _extract_kubernetes_error_message(ex) + raise interfaces.LauncherError( + k8s_message or f"Failed to create Kubernetes Job: Kubernetes API error ({ex.status} {ex.reason})" + ) from ex except Exception as ex: raise interfaces.LauncherError( f"Failed to create Kubernetes Job: {_kubernetes_serialize(job)}" @@ -1682,6 +1692,20 @@ def _kubernetes_serialize(obj) -> dict[str, Any]: return shallow_client.sanitize_for_serialization(obj) +def _extract_kubernetes_error_message(ex: k8s_client_lib.ApiException) -> str | None: + """Extract the human-readable message from a Kubernetes ApiException body. + + The Kubernetes API returns structured JSON error bodies with a top-level + 'message' field that contains the admission webhook rejection reason — + far more actionable than a raw pod/job spec dump. + """ + try: + body = json.loads(ex.body) + return body.get("message") or None + except Exception: + return None + + def _kubernetes_deserialize(obj_dict: dict[str, Any], cls: typing.Type[_T]) -> _T: shallow_client = k8s_client_lib.ApiClient.__new__(k8s_client_lib.ApiClient) return shallow_client._ApiClient__deserialize(obj_dict, cls) diff --git a/cloud_pipelines_backend/orchestrator_sql.py b/cloud_pipelines_backend/orchestrator_sql.py index 0241c38..dfc4a43 100644 --- a/cloud_pipelines_backend/orchestrator_sql.py +++ b/cloud_pipelines_backend/orchestrator_sql.py @@ -586,6 +586,11 @@ def generate_execution_log_uri( bts.ContainerExecutionStatus.SYSTEM_ERROR ) record_system_error_exception(execution=execution, exception=ex) + if execution.extra_data is None: + execution.extra_data = {} + execution.extra_data[ + bts.EXECUTION_NODE_EXTRA_DATA_ORCHESTRATION_ERROR_MESSAGE_KEY + ] = str(ex) _mark_all_downstream_executions_as_skipped( session=session, execution=execution )