Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions cloud_pipelines_backend/launchers/kubernetes_launchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,11 @@ def launch_container_task(
body=pod,
_request_timeout=self._request_timeout,
)
except k8s_client_lib.ApiException as ex:
k8s_message = _extract_kubernetes_error_message(ex)
raise interfaces.LauncherError(
k8s_message or f"Failed to create pod: Kubernetes API error ({ex.status} {ex.reason})"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we include the Pod that we were trying to create (as the current message does)? I think it's very important for debugging pod creation issues.

) from ex
except Exception as ex:
raise interfaces.LauncherError(
f"Failed to create pod: {_kubernetes_serialize(pod)}"
Expand Down Expand Up @@ -1138,6 +1143,11 @@ def launch_container_task(
body=job,
_request_timeout=self._request_timeout,
)
except k8s_client_lib.ApiException as ex:
k8s_message = _extract_kubernetes_error_message(ex)
raise interfaces.LauncherError(
k8s_message or f"Failed to create Kubernetes Job: Kubernetes API error ({ex.status} {ex.reason})"
) from ex
except Exception as ex:
raise interfaces.LauncherError(
f"Failed to create Kubernetes Job: {_kubernetes_serialize(job)}"
Expand Down Expand Up @@ -1682,6 +1692,20 @@ def _kubernetes_serialize(obj) -> dict[str, Any]:
return shallow_client.sanitize_for_serialization(obj)


def _extract_kubernetes_error_message(ex: k8s_client_lib.ApiException) -> str | None:
"""Extract the human-readable message from a Kubernetes ApiException body.

The Kubernetes API returns structured JSON error bodies with a top-level
'message' field that contains the admission webhook rejection reason —
far more actionable than a raw pod/job spec dump.
"""
try:
body = json.loads(ex.body)
return body.get("message") or None
except Exception:
return None


def _kubernetes_deserialize(obj_dict: dict[str, Any], cls: typing.Type[_T]) -> _T:
shallow_client = k8s_client_lib.ApiClient.__new__(k8s_client_lib.ApiClient)
return shallow_client._ApiClient__deserialize(obj_dict, cls)
Expand Down
5 changes: 5 additions & 0 deletions cloud_pipelines_backend/orchestrator_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,11 @@ def generate_execution_log_uri(
bts.ContainerExecutionStatus.SYSTEM_ERROR
)
record_system_error_exception(execution=execution, exception=ex)
if execution.extra_data is None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. Why do you want to put this information in the another field? The error message (brief, full) is already put in extra_data.
P.S. There is _record_orchestration_error_message function that does slightly more.

execution.extra_data = {}
execution.extra_data[
bts.EXECUTION_NODE_EXTRA_DATA_ORCHESTRATION_ERROR_MESSAGE_KEY
] = str(ex)
_mark_all_downstream_executions_as_skipped(
session=session, execution=execution
)
Expand Down