From 9b1190853667851a699994a44ffd995388f2cc9f Mon Sep 17 00:00:00 2001
From: Jason Robert <jasont.robert@gmail.com>
Date: Fri, 15 May 2026 09:41:19 -0400
Subject: [PATCH] fix(copilot): tolerate SDK metadata parsing errors when
 listing models

Copilot SDK >=0.3.0 eagerly parses every model in its `models.list`
response with dataclass `from_dict` helpers. When any required field is
missing the parser raises `ValueError` and the entire listing fails.
`claude-opus-4.7-1m-internal` ships a `billing` object without the
required `multiplier` field, which makes `ModelBilling.from_dict`
raise `ValueError("Missing required field 'multiplier' in
ModelBilling")`.

Both `get_max_prompt_tokens` and `_validate_reasoning_effort_for_model`
caught only `(TimeoutError, ProviderError, OSError, RuntimeError)` at
the SDK boundary, so the `ValueError` leaked through and surfaced as
`Dialog turn failed: ...` after exhausting the retry loop. This was
particularly disruptive for workflows that configure
`reasoning.effort` on the Copilot provider, since the validation step
runs before `create_session` and blocks every agent invocation.

Broaden both catches to `Exception` (still excludes
`asyncio.CancelledError`/`KeyboardInterrupt`/`SystemExit`) and
document the rationale at both call sites. The behavior matches the
existing docstring contract: "capability metadata must never block a
workflow that the SDK might otherwise accept."

Updated `test_unexpected_exception_propagates` (which asserted the
opposite contract) into `test_value_error_from_sdk_parser_returns_none`
and added `test_value_error_from_sdk_parser_skips_validation` covering
the reasoning-effort path with the exact upstream error message.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 CHANGELOG.md                         | 23 +++++++++++++
 src/conductor/providers/copilot.py   | 22 ++++++++++--
 tests/test_providers/test_copilot.py | 50 ++++++++++++++++++++++++----
 3 files changed, 87 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bec70a..895003b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased](https://github.com/microsoft/conductor/compare/v0.1.16...HEAD)
 
+### Fixed
+- Workflows that configure `reasoning.effort` (or workflow-wide
+  `runtime.default_reasoning_effort`) on the Copilot provider were broken
+  for **every named Copilot model** when running against
+  `github-copilot-sdk` 0.3.0. The SDK's `models.list` response includes a
+  `billing` object on every model, but none of them currently ship the
+  `multiplier` field that the SDK's `ModelBilling.from_dict` parser
+  treats as required — so every model in the response triggers
+  `ValueError("Missing required field 'multiplier' in ModelBilling")`,
+  which kills the entire `list_models()` call. The error then leaked
+  through the narrow `except` tuple in
+  `_validate_reasoning_effort_for_model` (and `get_max_prompt_tokens`),
+  poisoned the retry loop, and surfaced as `Dialog turn failed: …` after
+  three wasted attempts. (`get_max_prompt_tokens` was rescued by the
+  engine's outer `except Exception`, so context-window metadata was
+  silently unavailable rather than fatal.)
+  Both metadata methods now catch any `Exception` raised at the SDK
+  boundary and treat the failure as "metadata unavailable" — validation
+  is skipped permissively and the configured `reasoning_effort` is
+  forwarded to `create_session` as before.
+  `asyncio.CancelledError`/`KeyboardInterrupt`/`SystemExit` (all
+  `BaseException` subclasses) still propagate.
+
 ## [0.1.16](https://github.com/microsoft/conductor/compare/v0.1.15...v0.1.16) - 2026-05-14
 
 ### Added
diff --git a/src/conductor/providers/copilot.py b/src/conductor/providers/copilot.py
index 03c3304..c270761 100644
--- a/src/conductor/providers/copilot.py
+++ b/src/conductor/providers/copilot.py
@@ -1935,13 +1935,22 @@ async def get_max_prompt_tokens(self, model: str) -> int | None:
         Returns ``None`` in mock-handler mode, when the SDK is unavailable,
         when no match is found, or when the SDK call fails — context-window
         metadata must never block workflow execution.
+
+        Catches ``Exception`` (not ``BaseException``) at the SDK boundary so
+        ``asyncio.CancelledError``/``KeyboardInterrupt``/``SystemExit`` still
+        propagate. The broad catch is required because Copilot SDK >=0.3.0
+        eagerly parses every entry in the ``models.list`` response with
+        dataclass ``from_dict`` helpers that raise ``ValueError`` on any
+        missing required field — e.g. ``ModelBilling`` requires ``multiplier``,
+        and certain models (such as ``claude-opus-4.7-1m-internal``) ship a
+        ``billing`` object without one, which kills the entire listing.
         """
         if self._mock_handler is not None or not COPILOT_SDK_AVAILABLE:
             return None
         try:
             await self._ensure_client_started()
             models = await self._client.list_models()
-        except (TimeoutError, ProviderError, OSError, RuntimeError) as e:
+        except Exception as e:
             logger.debug("Failed to list Copilot models for %r: %s", model, e)
             return None
         by_id = {info.id: info for info in models}
@@ -1967,6 +1976,15 @@ async def _validate_reasoning_effort_for_model(
         — capability metadata must never block a workflow that the SDK might
         otherwise accept.
 
+        Catches ``Exception`` (not ``BaseException``) at the SDK boundary so
+        ``asyncio.CancelledError``/``KeyboardInterrupt``/``SystemExit`` still
+        propagate. The broad catch is required because Copilot SDK >=0.3.0
+        eagerly parses every entry in the ``models.list`` response with
+        dataclass ``from_dict`` helpers that raise ``ValueError`` on any
+        missing required field — e.g. ``ModelBilling`` requires ``multiplier``,
+        and certain models (such as ``claude-opus-4.7-1m-internal``) ship a
+        ``billing`` object without one, which kills the entire listing.
+
         Skipped entirely in mock-handler mode and when the SDK is not
         installed.
         """
@@ -1975,7 +1993,7 @@ async def _validate_reasoning_effort_for_model(
         try:
             await self._ensure_client_started()
             models = await self._client.list_models()
-        except (TimeoutError, ProviderError, OSError, RuntimeError) as e:
+        except Exception as e:
             logger.debug(
                 "Failed to list Copilot models for reasoning_effort validation of %r: %s",
                 model,
diff --git a/tests/test_providers/test_copilot.py b/tests/test_providers/test_copilot.py
index 7985aa2..c75533d 100644
--- a/tests/test_providers/test_copilot.py
+++ b/tests/test_providers/test_copilot.py
@@ -916,16 +916,23 @@ async def list_models() -> list[Any]:
         assert await provider.get_max_prompt_tokens("gpt-4o") == 128000
 
     @pytest.mark.asyncio
-    async def test_unexpected_exception_propagates(self) -> None:
-        """Non-SDK exceptions (programming errors) are not swallowed by the
-        provider — they bubble up so the engine's outer safety net handles them."""
+    async def test_value_error_from_sdk_parser_returns_none(self) -> None:
+        """SDK schema-parsing failures (e.g. ``ValueError`` from
+        ``ModelBilling.from_dict`` when the API omits the ``multiplier``
+        field) must be soft-swallowed — context-window metadata must
+        never block workflow execution.
+
+        Regression for github-copilot-sdk 0.3.0, where ``list_models()``
+        eagerly parses every model and a single malformed entry (observed
+        for ``claude-opus-4.7-1m-internal``) raises ``ValueError`` and
+        kills the whole call.
+        """
 
         async def list_models() -> list[Any]:
-            raise ValueError("bug")
+            raise ValueError("Missing required field 'multiplier' in ModelBilling")
 
         provider = self._provider_with_list_models(list_models)
-        with pytest.raises(ValueError):
-            await provider.get_max_prompt_tokens("gpt-4o")
+        assert await provider.get_max_prompt_tokens("gpt-4o") is None
 
     @pytest.mark.asyncio
     async def test_alias_resolves_via_match_model_id(self) -> None:
@@ -1106,6 +1113,37 @@ async def list_models() -> list[Any]:
         await provider.execute(agent=agent, context={}, rendered_prompt="Plan")
         assert captured["create_session_kwargs"]["reasoning_effort"] == "xhigh"
 
+    @pytest.mark.asyncio
+    async def test_value_error_from_sdk_parser_skips_validation(
+        self, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """SDK schema-parsing failures during ``list_models()`` must not block
+        execution — validation is skipped permissively and the configured
+        ``reasoning_effort`` is still forwarded to ``create_session``.
+
+        Regression for github-copilot-sdk 0.3.0: ``ModelBilling.from_dict``
+        raises ``ValueError("Missing required field 'multiplier' in
+        ModelBilling")`` for models like ``claude-opus-4.7-1m-internal``,
+        which previously leaked through the narrow except tuple in
+        ``_validate_reasoning_effort_for_model`` and surfaced as a
+        ``Dialog turn failed: …`` error after exhausting the retry loop.
+        """
+        from conductor.config.schema import ReasoningConfig
+
+        async def list_models() -> list[Any]:
+            raise ValueError("Missing required field 'multiplier' in ModelBilling")
+
+        captured: dict[str, Any] = {}
+        provider = await self._build_provider(captured, monkeypatch, list_models_impl=list_models)
+        agent = AgentDef(
+            name="planner",
+            model="claude-opus-4.7-1m-internal",
+            prompt="Plan",
+            reasoning=ReasoningConfig(effort="xhigh"),
+        )
+        await provider.execute(agent=agent, context={}, rendered_prompt="Plan")
+        assert captured["create_session_kwargs"]["reasoning_effort"] == "xhigh"
+
     @pytest.mark.asyncio
     async def test_validation_error_not_retried_in_execute_with_retry(
         self, monkeypatch: pytest.MonkeyPatch