From b65064516f07613f10c31d97394126ccf11a36ac Mon Sep 17 00:00:00 2001 From: Mikhail Kuklin <57541345+MikhailKuklin@users.noreply.github.com> Date: Thu, 4 Jun 2026 16:56:08 +0300 Subject: [PATCH 1/2] Use wall-clock time for SQL timeout (not accumulated sleep counter) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The polling loop in SQLExecutor.execute() incremented an `elapsed` counter by `poll_interval` each iteration, which only tracks sleep time and ignores how long each `get_statement` RPC takes. Under warehouse load, get_statement can take several seconds per call, so each iteration is much longer than poll_interval. The counter undercounts real wall clock, and the configured timeout fires later than intended — or, for sufficiently slow RPCs, the query completes naturally before the timeout ever fires. Empirically reproduced (2026-06-04): a query with `timeout=900` on a loaded warehouse ran 1999 seconds wall clock before completing naturally, never hitting the timeout — the counter only reached ~666. Fix: use `time.monotonic()` for the timeout boundary. The timeout now fires at real wall clock regardless of RPC latency. Validated against the fix: same query, same timeout, fired at 903s wall clock with the statement reaching `CANCELED`. Error message also now reports actual elapsed time, not the configured limit. The error message changes from: 'SQL query timed out after {timeout} seconds...' to: 'SQL query timed out after {elapsed_wall:.1f} seconds (limit: {timeout}s)...' so users can tell at a glance whether the timeout fired close to the configured boundary or whether something delayed cancellation. --- .../sql/sql_utils/executor.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/databricks-tools-core/databricks_tools_core/sql/sql_utils/executor.py b/databricks-tools-core/databricks_tools_core/sql/sql_utils/executor.py index 73d4b420..1dbe966b 100644 --- a/databricks-tools-core/databricks_tools_core/sql/sql_utils/executor.py +++ b/databricks-tools-core/databricks_tools_core/sql/sql_utils/executor.py @@ -107,11 +107,19 @@ def execute( statement_id = response.statement_id logger.debug(f"Statement submitted with ID: {statement_id}") - # Poll for completion + # Poll for completion. + # + # Use time.monotonic() for the timeout boundary instead of incrementing + # a counter by poll_interval each iteration. The counter approach + # tracks only sleep time and ignores how long each get_statement RPC + # takes — under warehouse load, get_statement can take several seconds + # per call, so the counter undercounts wall clock and the configured + # timeout fires much later than intended (or, for very slow RPCs, + # never fires before the statement completes naturally). poll_interval = 2 - elapsed = 0 + start_time = time.monotonic() - while elapsed < timeout: + while time.monotonic() - start_time < timeout: try: status = self.client.statement_execution.get_statement(statement_id=statement_id) except Exception as e: @@ -136,12 +144,12 @@ def execute( # Still running, wait and poll again time.sleep(poll_interval) - elapsed += poll_interval # Timeout reached - cancel the statement self._cancel_statement(statement_id) + elapsed_wall = time.monotonic() - start_time raise SQLExecutionError( - f"SQL query timed out after {timeout} seconds and was canceled. " + f"SQL query timed out after {elapsed_wall:.1f} seconds (limit: {timeout}s) and was canceled. " f"Consider increasing the timeout or optimizing the query. " f"Statement ID: {statement_id}" ) @@ -185,3 +193,4 @@ def _cancel_statement(self, statement_id: str) -> None: logger.debug(f"Canceled statement {statement_id}") except Exception as e: logger.warning(f"Failed to cancel statement {statement_id}: {e}") + From f870874d31e157b819c4305f5578110c717ce2ed Mon Sep 17 00:00:00 2001 From: Mikhail Kuklin <57541345+MikhailKuklin@users.noreply.github.com> Date: Thu, 4 Jun 2026 16:57:42 +0300 Subject: [PATCH 2/2] Fix trailing whitespace in previous commit --- .../databricks_tools_core/sql/sql_utils/executor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/databricks-tools-core/databricks_tools_core/sql/sql_utils/executor.py b/databricks-tools-core/databricks_tools_core/sql/sql_utils/executor.py index 1dbe966b..6219dd61 100644 --- a/databricks-tools-core/databricks_tools_core/sql/sql_utils/executor.py +++ b/databricks-tools-core/databricks_tools_core/sql/sql_utils/executor.py @@ -193,4 +193,3 @@ def _cancel_statement(self, statement_id: str) -> None: logger.debug(f"Canceled statement {statement_id}") except Exception as e: logger.warning(f"Failed to cancel statement {statement_id}: {e}") -