From 9b1190853667851a699994a44ffd995388f2cc9f Mon Sep 17 00:00:00 2001 From: Jason Robert Date: Fri, 15 May 2026 09:41:19 -0400 Subject: [PATCH] fix(copilot): tolerate SDK metadata parsing errors when listing models Copilot SDK >=0.3.0 eagerly parses every model in its `models.list` response with dataclass `from_dict` helpers. When any required field is missing the parser raises `ValueError` and the entire listing fails. `claude-opus-4.7-1m-internal` ships a `billing` object without the required `multiplier` field, which makes `ModelBilling.from_dict` raise `ValueError("Missing required field 'multiplier' in ModelBilling")`. Both `get_max_prompt_tokens` and `_validate_reasoning_effort_for_model` caught only `(TimeoutError, ProviderError, OSError, RuntimeError)` at the SDK boundary, so the `ValueError` leaked through and surfaced as `Dialog turn failed: ...` after exhausting the retry loop. This was particularly disruptive for workflows that configure `reasoning.effort` on the Copilot provider, since the validation step runs before `create_session` and blocks every agent invocation. Broaden both catches to `Exception` (still excludes `asyncio.CancelledError`/`KeyboardInterrupt`/`SystemExit`) and document the rationale at both call sites. The behavior matches the existing docstring contract: "capability metadata must never block a workflow that the SDK might otherwise accept." Updated `test_unexpected_exception_propagates` (which asserted the opposite contract) into `test_value_error_from_sdk_parser_returns_none` and added `test_value_error_from_sdk_parser_skips_validation` covering the reasoning-effort path with the exact upstream error message. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 23 +++++++++++++ src/conductor/providers/copilot.py | 22 ++++++++++-- tests/test_providers/test_copilot.py | 50 ++++++++++++++++++++++++---- 3 files changed, 87 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bec70a..895003b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased](https://github.com/microsoft/conductor/compare/v0.1.16...HEAD) +### Fixed +- Workflows that configure `reasoning.effort` (or workflow-wide + `runtime.default_reasoning_effort`) on the Copilot provider were broken + for **every named Copilot model** when running against + `github-copilot-sdk` 0.3.0. The SDK's `models.list` response includes a + `billing` object on every model, but none of them currently ship the + `multiplier` field that the SDK's `ModelBilling.from_dict` parser + treats as required — so every model in the response triggers + `ValueError("Missing required field 'multiplier' in ModelBilling")`, + which kills the entire `list_models()` call. The error then leaked + through the narrow `except` tuple in + `_validate_reasoning_effort_for_model` (and `get_max_prompt_tokens`), + poisoned the retry loop, and surfaced as `Dialog turn failed: …` after + three wasted attempts. (`get_max_prompt_tokens` was rescued by the + engine's outer `except Exception`, so context-window metadata was + silently unavailable rather than fatal.) + Both metadata methods now catch any `Exception` raised at the SDK + boundary and treat the failure as "metadata unavailable" — validation + is skipped permissively and the configured `reasoning_effort` is + forwarded to `create_session` as before. + `asyncio.CancelledError`/`KeyboardInterrupt`/`SystemExit` (all + `BaseException` subclasses) still propagate. + ## [0.1.16](https://github.com/microsoft/conductor/compare/v0.1.15...v0.1.16) - 2026-05-14 ### Added diff --git a/src/conductor/providers/copilot.py b/src/conductor/providers/copilot.py index 03c3304..c270761 100644 --- a/src/conductor/providers/copilot.py +++ b/src/conductor/providers/copilot.py @@ -1935,13 +1935,22 @@ async def get_max_prompt_tokens(self, model: str) -> int | None: Returns ``None`` in mock-handler mode, when the SDK is unavailable, when no match is found, or when the SDK call fails — context-window metadata must never block workflow execution. + + Catches ``Exception`` (not ``BaseException``) at the SDK boundary so + ``asyncio.CancelledError``/``KeyboardInterrupt``/``SystemExit`` still + propagate. The broad catch is required because Copilot SDK >=0.3.0 + eagerly parses every entry in the ``models.list`` response with + dataclass ``from_dict`` helpers that raise ``ValueError`` on any + missing required field — e.g. ``ModelBilling`` requires ``multiplier``, + and certain models (such as ``claude-opus-4.7-1m-internal``) ship a + ``billing`` object without one, which kills the entire listing. """ if self._mock_handler is not None or not COPILOT_SDK_AVAILABLE: return None try: await self._ensure_client_started() models = await self._client.list_models() - except (TimeoutError, ProviderError, OSError, RuntimeError) as e: + except Exception as e: logger.debug("Failed to list Copilot models for %r: %s", model, e) return None by_id = {info.id: info for info in models} @@ -1967,6 +1976,15 @@ async def _validate_reasoning_effort_for_model( — capability metadata must never block a workflow that the SDK might otherwise accept. + Catches ``Exception`` (not ``BaseException``) at the SDK boundary so + ``asyncio.CancelledError``/``KeyboardInterrupt``/``SystemExit`` still + propagate. The broad catch is required because Copilot SDK >=0.3.0 + eagerly parses every entry in the ``models.list`` response with + dataclass ``from_dict`` helpers that raise ``ValueError`` on any + missing required field — e.g. ``ModelBilling`` requires ``multiplier``, + and certain models (such as ``claude-opus-4.7-1m-internal``) ship a + ``billing`` object without one, which kills the entire listing. + Skipped entirely in mock-handler mode and when the SDK is not installed. """ @@ -1975,7 +1993,7 @@ async def _validate_reasoning_effort_for_model( try: await self._ensure_client_started() models = await self._client.list_models() - except (TimeoutError, ProviderError, OSError, RuntimeError) as e: + except Exception as e: logger.debug( "Failed to list Copilot models for reasoning_effort validation of %r: %s", model, diff --git a/tests/test_providers/test_copilot.py b/tests/test_providers/test_copilot.py index 7985aa2..c75533d 100644 --- a/tests/test_providers/test_copilot.py +++ b/tests/test_providers/test_copilot.py @@ -916,16 +916,23 @@ async def list_models() -> list[Any]: assert await provider.get_max_prompt_tokens("gpt-4o") == 128000 @pytest.mark.asyncio - async def test_unexpected_exception_propagates(self) -> None: - """Non-SDK exceptions (programming errors) are not swallowed by the - provider — they bubble up so the engine's outer safety net handles them.""" + async def test_value_error_from_sdk_parser_returns_none(self) -> None: + """SDK schema-parsing failures (e.g. ``ValueError`` from + ``ModelBilling.from_dict`` when the API omits the ``multiplier`` + field) must be soft-swallowed — context-window metadata must + never block workflow execution. + + Regression for github-copilot-sdk 0.3.0, where ``list_models()`` + eagerly parses every model and a single malformed entry (observed + for ``claude-opus-4.7-1m-internal``) raises ``ValueError`` and + kills the whole call. + """ async def list_models() -> list[Any]: - raise ValueError("bug") + raise ValueError("Missing required field 'multiplier' in ModelBilling") provider = self._provider_with_list_models(list_models) - with pytest.raises(ValueError): - await provider.get_max_prompt_tokens("gpt-4o") + assert await provider.get_max_prompt_tokens("gpt-4o") is None @pytest.mark.asyncio async def test_alias_resolves_via_match_model_id(self) -> None: @@ -1106,6 +1113,37 @@ async def list_models() -> list[Any]: await provider.execute(agent=agent, context={}, rendered_prompt="Plan") assert captured["create_session_kwargs"]["reasoning_effort"] == "xhigh" + @pytest.mark.asyncio + async def test_value_error_from_sdk_parser_skips_validation( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """SDK schema-parsing failures during ``list_models()`` must not block + execution — validation is skipped permissively and the configured + ``reasoning_effort`` is still forwarded to ``create_session``. + + Regression for github-copilot-sdk 0.3.0: ``ModelBilling.from_dict`` + raises ``ValueError("Missing required field 'multiplier' in + ModelBilling")`` for models like ``claude-opus-4.7-1m-internal``, + which previously leaked through the narrow except tuple in + ``_validate_reasoning_effort_for_model`` and surfaced as a + ``Dialog turn failed: …`` error after exhausting the retry loop. + """ + from conductor.config.schema import ReasoningConfig + + async def list_models() -> list[Any]: + raise ValueError("Missing required field 'multiplier' in ModelBilling") + + captured: dict[str, Any] = {} + provider = await self._build_provider(captured, monkeypatch, list_models_impl=list_models) + agent = AgentDef( + name="planner", + model="claude-opus-4.7-1m-internal", + prompt="Plan", + reasoning=ReasoningConfig(effort="xhigh"), + ) + await provider.execute(agent=agent, context={}, rendered_prompt="Plan") + assert captured["create_session_kwargs"]["reasoning_effort"] == "xhigh" + @pytest.mark.asyncio async def test_validation_error_not_retried_in_execute_with_retry( self, monkeypatch: pytest.MonkeyPatch