From 09eeae7d8d95d67db19865e435592c4a23118ed0 Mon Sep 17 00:00:00 2001 From: Nabin Mulepati Date: Fri, 8 May 2026 18:15:07 -0600 Subject: [PATCH] refactor: remove ModelProviderRegistry.default and require ModelConfig.provider (#590) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final cleanup after the #589 deprecation cycle. Providers are now a named list referenced by name end-to-end — no hidden registry default, no YAML default leaking into DataDesigner construction (#588), and no ambiguity about which provider wins. ModelConfig.provider becomes a required str. ModelProviderRegistry.default and the associated validators, get_default_provider_name() helper, CLI "Change default provider" workflow, and the "Default" column in `data-designer config list` are removed. resolve_model_provider_registry() drops its default_provider_name kwarg, and DataDesigner.__init__ no longer threads a YAML default through registry construction. Pydantic v2's extra="ignore" default lets existing on-disk YAMLs with default: keys load cleanly, so users on the deprecation cycle migrate without breakage. --- .../configure-model-settings-with-the-cli.md | 9 - docs/concepts/models/custom-model-settings.md | 7 - .../concepts/models/default-model-settings.md | 7 - docs/concepts/models/model-configs.md | 5 +- docs/concepts/models/model-providers.md | 8 - .../configure-model-settings-with-the-cli.mdx | 7 - .../concepts/models/custom-model-settings.mdx | 6 +- .../models/default-model-settings.mdx | 4 - .../pages/concepts/models/model-configs.mdx | 6 +- .../pages/concepts/models/model-providers.mdx | 4 - .../config/default_model_settings.py | 26 --- .../src/data_designer/config/models.py | 25 +-- .../data_designer/config/testing/fixtures.py | 1 + .../tests/config/test_config_builder.py | 3 + .../config/test_default_model_settings.py | 59 ------ .../tests/config/test_models.py | 36 +--- .../data_designer/engine/model_provider.py | 72 +------ .../tests/engine/test_model_provider.py | 139 ++----------- .../src/data_designer/cli/README.md | 4 - .../src/data_designer/cli/commands/list.py | 7 +- .../cli/controllers/provider_controller.py | 45 +---- .../data_designer/cli/forms/model_builder.py | 12 +- .../cli/repositories/provider_repository.py | 32 +-- .../cli/services/provider_service.py | 31 +-- .../cli/utils/agent_introspection.py | 20 +- .../cli/utils/agent_text_formatter.py | 37 +--- .../data_designer/interface/data_designer.py | 56 +----- packages/data-designer/tests/cli/conftest.py | 2 +- .../cli/controllers/test_model_controller.py | 2 +- .../controllers/test_provider_controller.py | 54 +---- .../tests/cli/forms/test_model_builder.py | 26 ++- .../repositories/test_provider_repository.py | 78 +------- .../cli/services/test_provider_service.py | 30 +-- .../test_agent_introspection_integration.py | 14 +- .../cli/utils/test_agent_text_formatter.py | 11 +- .../tests/interface/test_data_designer.py | 189 +----------------- scripts/benchmarks/benchmark_engine_v2.py | 2 +- 37 files changed, 153 insertions(+), 923 deletions(-) diff --git a/docs/concepts/models/configure-model-settings-with-the-cli.md b/docs/concepts/models/configure-model-settings-with-the-cli.md index e7c1b5edd..c4a273c3f 100644 --- a/docs/concepts/models/configure-model-settings-with-the-cli.md +++ b/docs/concepts/models/configure-model-settings-with-the-cli.md @@ -65,14 +65,6 @@ data-designer config providers **Delete all providers**: Remove all providers and their associated models. -**Change default provider**: Set which provider is used by default. This option is only available when multiple providers are configured. - -!!! warning "Deprecated: 'Change default provider' workflow" - The "Change default provider" workflow is **deprecated** and will be removed in a future - release alongside the registry-level default. Specify `provider=` explicitly on each - `ModelConfig` instead — the workflow now emits a `DeprecationWarning` when entered. - See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). - ## Managing Model Configurations Run the interactive model configuration command: @@ -117,7 +109,6 @@ data-designer config list This command displays: - **Model Providers**: All configured providers with their endpoints (API keys are masked) -- **Default Provider**: The currently selected default provider _(deprecated; see issue #589)_ - **Model Configurations**: All configured models with their settings ## Resetting Configurations diff --git a/docs/concepts/models/custom-model-settings.md b/docs/concepts/models/custom-model-settings.md index be73ae408..42e32d15c 100644 --- a/docs/concepts/models/custom-model-settings.md +++ b/docs/concepts/models/custom-model-settings.md @@ -90,13 +90,6 @@ preview_result.display_sample_record() !!! note "Default Providers Always Available" When you only specify `model_configs`, the default model providers (NVIDIA, OpenAI, and OpenRouter) are still available. You only need to create custom providers if you want to connect to different endpoints or modify provider settings. -!!! warning "Always specify `provider=` on `ModelConfig`" - Leaving `provider` unset (or passing `provider=None`) on `ModelConfig` is **deprecated**. - The legacy "implicit default provider" routing — used when `provider` is omitted — emits - a `DeprecationWarning` and will be removed in a future release. Always reference the - intended provider by name, as the examples below do. See - [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). - !!! tip "Mixing Custom and Default Models" When you provide custom `model_configs` to `DataDesignerConfigBuilder`, they **replace** the defaults entirely. To use custom model configs in addition to the default configs, use the add_model_config method: diff --git a/docs/concepts/models/default-model-settings.md b/docs/concepts/models/default-model-settings.md index 50c957c93..450c7d135 100644 --- a/docs/concepts/models/default-model-settings.md +++ b/docs/concepts/models/default-model-settings.md @@ -110,13 +110,6 @@ Both methods operate on the same files, ensuring consistency across your entire !!! warning "Hosted Provider Data Handling" The default model providers call hosted endpoints operated by NVIDIA, OpenAI, OpenRouter, or their upstream providers. Provider terms and privacy practices apply independently of Data Designer, and free or trial endpoints may log request data for security, operations, or product improvement. Do not submit confidential information or personal data, including faces, voices, screenshots, regulated data, or other sensitive content, unless the selected provider and endpoint are approved for your use case. -!!! warning "Deprecated: implicit default provider routing" - The `default:` key in `~/.data-designer/model_providers.yaml` and the registry-level - "default provider" concept are **deprecated** and will be removed in a future release. - Specify `provider=` explicitly on every `ModelConfig` instead — the built-in defaults - above already do this, and a `DeprecationWarning` is now emitted whenever the legacy - routing is exercised. See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). - !!! tip "Environment Variables" Store your API keys in environment variables rather than hardcoding them in your scripts: diff --git a/docs/concepts/models/model-configs.md b/docs/concepts/models/model-configs.md index 888a7bdca..61854002b 100644 --- a/docs/concepts/models/model-configs.md +++ b/docs/concepts/models/model-configs.md @@ -15,9 +15,12 @@ The `ModelConfig` class has the following fields: | `alias` | `str` | Yes | Unique identifier for this model configuration (e.g., `"my-text-model"`, `"reasoning-model"`) | | `model` | `str` | Yes | Model identifier as recognized by the provider (e.g., `"nvidia/nemotron-3-nano-30b-a3b"`, `"gpt-4"`) | | `inference_parameters` | `InferenceParamsT` | No | Controls model behavior during generation. Use `ChatCompletionInferenceParams` for text/code/structured generation or `EmbeddingInferenceParams` for embeddings. Defaults to `ChatCompletionInferenceParams()` if not provided. The generation type is automatically determined by the inference parameters type. See [Inference Parameters](inference-parameters.md) for details. | -| `provider` | `str` | No | Reference to the name of the Provider to use (e.g., `"nvidia"`, `"openai"`, `"openrouter"`). If not specified, one set as the default provider, which may resolve to the first provider if there are more than one | +| `provider` | `str` | Yes | Reference to the name of the Provider to use (e.g., `"nvidia"`, `"openai"`, `"openrouter"`). | | `skip_health_check` | `bool` | No | Whether to skip the health check for this model. Defaults to `False`. Set to `True` to skip health checks when you know the model is accessible or want to defer validation. | +!!! warning "Upgrade note" + Every `ModelConfig` must now specify `provider`. Existing `model_configs.yaml` entries from older releases that omit `provider` or set it to `null` must be updated with an explicit provider name before loading. Agent tooling that parses `data-designer agent context` should read each model alias item's `provider` field; the top-level `default_provider` and per-item `configured_provider` / `effective_provider` fields are no longer emitted. + ## Examples diff --git a/docs/concepts/models/model-providers.md b/docs/concepts/models/model-providers.md index f8625ae9b..9d397a87a 100644 --- a/docs/concepts/models/model-providers.md +++ b/docs/concepts/models/model-providers.md @@ -6,14 +6,6 @@ Model providers are external services that host and serve models. Data Designer A `ModelProvider` defines how Data Designer connects to a provider's API endpoint. When you create a `ModelConfig`, you reference a provider by name, and Data Designer uses that provider's settings to make API calls to the appropriate endpoint. -!!! warning "Deprecated: implicit default provider routing" - Earlier versions of Data Designer let you omit `provider=` on `ModelConfig` and - fall back to a registry-level default — including the `default:` key in - `~/.data-designer/model_providers.yaml`. That implicit routing is **deprecated** - and will be removed in a future release. Always reference a provider by name on - every `ModelConfig`. A `DeprecationWarning` is now emitted when the legacy path - is exercised. See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). - ## ModelProvider Configuration The `ModelProvider` class has the following fields: diff --git a/fern/versions/latest/pages/concepts/models/configure-model-settings-with-the-cli.mdx b/fern/versions/latest/pages/concepts/models/configure-model-settings-with-the-cli.mdx index aa467edf9..ed177a092 100644 --- a/fern/versions/latest/pages/concepts/models/configure-model-settings-with-the-cli.mdx +++ b/fern/versions/latest/pages/concepts/models/configure-model-settings-with-the-cli.mdx @@ -74,12 +74,6 @@ data-designer config providers **Delete all providers**: Remove all providers and their associated models. -**Change default provider**: Set which provider is used by default. This option is only available when multiple providers are configured. - - - The "Change default provider" workflow is **deprecated** and will be removed in a future release alongside the registry-level default. Specify `provider=` explicitly on each `ModelConfig` instead — the workflow now emits a `DeprecationWarning` when entered. See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). - - ## Managing Model Configurations Run the interactive model configuration command: @@ -128,7 +122,6 @@ data-designer config list This command displays: - **Model Providers**: All configured providers with their endpoints (API keys are masked) -- **Default Provider**: The currently selected default provider _(deprecated; see [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589))_ - **Model Configurations**: All configured models with their settings ## Resetting Configurations diff --git a/fern/versions/latest/pages/concepts/models/custom-model-settings.mdx b/fern/versions/latest/pages/concepts/models/custom-model-settings.mdx index 22dfafbac..d4e741059 100644 --- a/fern/versions/latest/pages/concepts/models/custom-model-settings.mdx +++ b/fern/versions/latest/pages/concepts/models/custom-model-settings.mdx @@ -94,9 +94,9 @@ preview_result.display_sample_record() When you only specify `model_configs`, the default model providers (NVIDIA, OpenAI, and OpenRouter) are still available. You only need to create custom providers if you want to connect to different endpoints or modify provider settings. - - Leaving `provider` unset (or passing `provider=None`) on `ModelConfig` is **deprecated**. The legacy "implicit default provider" routing — used when `provider` is omitted — emits a `DeprecationWarning` and will be removed in a future release. Always reference the intended provider by name, as the examples below do. See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). - + + Every custom `ModelConfig` must reference the intended provider by name. The examples below use the built-in `nvidia` provider. + When you provide custom `model_configs` to `DataDesignerConfigBuilder`, they **replace** the defaults entirely. To use custom model configs in addition to the default configs, use the add_model_config method: diff --git a/fern/versions/latest/pages/concepts/models/default-model-settings.mdx b/fern/versions/latest/pages/concepts/models/default-model-settings.mdx index 803573770..a52688e50 100644 --- a/fern/versions/latest/pages/concepts/models/default-model-settings.mdx +++ b/fern/versions/latest/pages/concepts/models/default-model-settings.mdx @@ -116,10 +116,6 @@ Both methods operate on the same files, ensuring consistency across your entire The default model providers call hosted endpoints operated by NVIDIA, OpenAI, OpenRouter, or their upstream providers. Provider terms and privacy practices apply independently of Data Designer, and free or trial endpoints may log request data for security, operations, or product improvement. Do not submit confidential information or personal data, including faces, voices, screenshots, regulated data, or other sensitive content, unless the selected provider and endpoint are approved for your use case. - - The `default:` key in `~/.data-designer/model_providers.yaml` and the registry-level "default provider" concept are **deprecated** and will be removed in a future release. Specify `provider=` explicitly on every `ModelConfig` instead — the built-in defaults above already do this, and a `DeprecationWarning` is now emitted whenever the legacy routing is exercised. See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). - - Store your API keys in environment variables rather than hardcoding them in your scripts: diff --git a/fern/versions/latest/pages/concepts/models/model-configs.mdx b/fern/versions/latest/pages/concepts/models/model-configs.mdx index a784b5746..7024aac88 100644 --- a/fern/versions/latest/pages/concepts/models/model-configs.mdx +++ b/fern/versions/latest/pages/concepts/models/model-configs.mdx @@ -18,9 +18,13 @@ The `ModelConfig` class has the following fields: | `alias` | `str` | Yes | Unique identifier for this model configuration (e.g., `"my-text-model"`, `"reasoning-model"`) | | `model` | `str` | Yes | Model identifier as recognized by the provider (e.g., `"nvidia/nemotron-3-nano-30b-a3b"`, `"gpt-4"`) | | `inference_parameters` | `InferenceParamsT` | No | Controls model behavior during generation. Use `ChatCompletionInferenceParams` for text/code/structured generation or `EmbeddingInferenceParams` for embeddings. Defaults to `ChatCompletionInferenceParams()` if not provided. The generation type is automatically determined by the inference parameters type. See [Inference Parameters](/concepts/models/inference-parameters) for details. | -| `provider` | `str` | No | Reference to the name of the Provider to use (e.g., `"nvidia"`, `"openai"`, `"openrouter"`). If not specified, one set as the default provider, which may resolve to the first provider if there are more than one | +| `provider` | `str` | Yes | Reference to the name of the Provider to use (e.g., `"nvidia"`, `"openai"`, `"openrouter"`). | | `skip_health_check` | `bool` | No | Whether to skip the health check for this model. Defaults to `False`. Set to `True` to skip health checks when you know the model is accessible or want to defer validation. | + +Every `ModelConfig` must now specify `provider`. Existing `model_configs.yaml` entries from older releases that omit `provider` or set it to `null` must be updated with an explicit provider name before loading. Agent tooling that parses `data-designer agent context` should read each model alias item's `provider` field; the top-level `default_provider` and per-item `configured_provider` / `effective_provider` fields are no longer emitted. + + ## Examples diff --git a/fern/versions/latest/pages/concepts/models/model-providers.mdx b/fern/versions/latest/pages/concepts/models/model-providers.mdx index 02bb4115b..a51b80d84 100644 --- a/fern/versions/latest/pages/concepts/models/model-providers.mdx +++ b/fern/versions/latest/pages/concepts/models/model-providers.mdx @@ -9,10 +9,6 @@ Model providers are external services that host and serve models. Data Designer A `ModelProvider` defines how Data Designer connects to a provider's API endpoint. When you create a `ModelConfig`, you reference a provider by name, and Data Designer uses that provider's settings to make API calls to the appropriate endpoint. - - Earlier versions of Data Designer let you omit `provider=` on `ModelConfig` and fall back to a registry-level default — including the `default:` key in `~/.data-designer/model_providers.yaml`. That implicit routing is **deprecated** and will be removed in a future release. Always reference a provider by name on every `ModelConfig`. A `DeprecationWarning` is now emitted when the legacy path is exercised. See [issue #589](https://github.com/NVIDIA-NeMo/DataDesigner/issues/589). - - ## ModelProvider Configuration The `ModelProvider` class has the following fields: diff --git a/packages/data-designer-config/src/data_designer/config/default_model_settings.py b/packages/data-designer-config/src/data_designer/config/default_model_settings.py index 8a0366733..8185c3468 100644 --- a/packages/data-designer-config/src/data_designer/config/default_model_settings.py +++ b/packages/data-designer-config/src/data_designer/config/default_model_settings.py @@ -24,7 +24,6 @@ PREDEFINED_PROVIDERS_MODEL_MAP, ) from data_designer.config.utils.io_helpers import load_config_file, save_config_file -from data_designer.config.utils.warning_helpers import warn_at_caller logger = logging.getLogger(__name__) @@ -95,31 +94,6 @@ def get_default_providers() -> list[ModelProvider]: return [] -def get_default_provider_name() -> str | None: - """Return the YAML's ``default:`` provider name, if set. - - Deprecated: this function and the underlying YAML key are deprecated and - will be removed in a future release. Specify ``provider=`` explicitly on - each ``ModelConfig`` instead. See issue #589. - """ - default = _get_default_providers_file_content(MODEL_PROVIDERS_FILE_PATH).get("default") - if default is not None: - # ``warn_at_caller`` (rather than ``warnings.warn(stacklevel=2)``) so the - # warning attributes to the user's call site rather than this library - # module. The only real call path is ``DataDesigner.__init__``, which - # is itself a ``data_designer`` frame; under default Python filters, - # library-attributed ``DeprecationWarning`` entries are silenced - # (``ignore::DeprecationWarning``), so library attribution = invisible - # warning. See PR #594 review. - warn_at_caller( - f"The 'default:' key in {MODEL_PROVIDERS_FILE_PATH} is deprecated and will " - "be removed in a future release. Remove it and specify provider= explicitly " - "on each ModelConfig instead. See issue #589.", - DeprecationWarning, - ) - return default - - def resolve_seed_default_model_settings() -> None: if not MODEL_CONFIGS_FILE_PATH.exists(): logger.debug( diff --git a/packages/data-designer-config/src/data_designer/config/models.py b/packages/data-designer-config/src/data_designer/config/models.py index 482f78308..f6352fa09 100644 --- a/packages/data-designer-config/src/data_designer/config/models.py +++ b/packages/data-designer-config/src/data_designer/config/models.py @@ -31,7 +31,6 @@ load_image_path_to_base64, ) from data_designer.config.utils.io_helpers import smart_load_yaml -from data_designer.config.utils.warning_helpers import warn_at_caller logger = logging.getLogger(__name__) @@ -504,17 +503,15 @@ class ModelConfig(ConfigBase): model: Model identifier (e.g., from build.nvidia.com or other providers). inference_parameters: Inference parameters for the model (temperature, top_p, max_tokens, etc.). The generation_type is determined by the type of inference_parameters. - provider: Name of the model provider. Required in a future release. Leaving - ``provider`` unset (or ``None``) currently routes through the registry's - implicit default and is **deprecated**; specify ``provider=`` explicitly. - See issue #589. + provider: Name of the model provider. Must match the ``name`` field of a + ``ModelProvider`` registered with the surrounding ``DataDesigner`` instance. skip_health_check: Whether to skip the health check for this model. Defaults to False. """ alias: str model: str inference_parameters: InferenceParamsT = Field(default_factory=ChatCompletionInferenceParams) - provider: str | None = None + provider: str skip_health_check: bool = False @property @@ -539,22 +536,6 @@ def _convert_inference_parameters(cls, value: Any) -> Any: return ChatCompletionInferenceParams(**value) return value - @model_validator(mode="after") - def _warn_on_implicit_provider(self) -> Self: - if self.provider is None: - # Use ``warn_at_caller`` so the warning is attributed to the user's - # ``ModelConfig(...)`` / ``model_validate(...)`` call rather than a - # pydantic-internal frame. Without this, every call dedupes to the - # same pydantic line and only the first emission is shown. See - # PR #594 review. - warn_at_caller( - f"ModelConfig.provider=None is deprecated and will be required in a future release. " - f"Specify provider= explicitly on ModelConfig(alias={self.alias!r}, ...). " - "See issue #589.", - DeprecationWarning, - ) - return self - class ModelProvider(ConfigBase): """Configuration for a custom model provider. diff --git a/packages/data-designer-config/src/data_designer/config/testing/fixtures.py b/packages/data-designer-config/src/data_designer/config/testing/fixtures.py index 8fece3bf8..8735e28b5 100644 --- a/packages/data-designer-config/src/data_designer/config/testing/fixtures.py +++ b/packages/data-designer-config/src/data_designer/config/testing/fixtures.py @@ -32,6 +32,7 @@ def stub_data_designer_config_str() -> str: model_configs: - alias: my_own_code_model model: openai/meta/llama-3.3-70b-instruct + provider: openai inference_parameters: temperature: distribution_type: uniform diff --git a/packages/data-designer-config/tests/config/test_config_builder.py b/packages/data-designer-config/tests/config/test_config_builder.py index 01580ae5d..28e3de8c2 100644 --- a/packages/data-designer-config/tests/config/test_config_builder.py +++ b/packages/data-designer-config/tests/config/test_config_builder.py @@ -196,6 +196,7 @@ def test_from_config_auto_wraps_bare_dict() -> None: { "alias": "test-model", "model": "openai/meta/llama-3.3-70b-instruct", + "provider": "openai", } ], "columns": [ @@ -219,6 +220,7 @@ def test_from_config_passthrough_when_already_wrapped() -> None: { "alias": "test-model", "model": "openai/meta/llama-3.3-70b-instruct", + "provider": "openai", } ], "columns": [ @@ -253,6 +255,7 @@ def test_from_config_auto_wraps_bare_json_file() -> None: { "alias": "test-model", "model": "openai/meta/llama-3.3-70b-instruct", + "provider": "openai", } ], "columns": [ diff --git a/packages/data-designer-config/tests/config/test_default_model_settings.py b/packages/data-designer-config/tests/config/test_default_model_settings.py index 4821d0c94..24c98af51 100644 --- a/packages/data-designer-config/tests/config/test_default_model_settings.py +++ b/packages/data-designer-config/tests/config/test_default_model_settings.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 import json -import warnings from pathlib import Path from unittest.mock import patch @@ -14,7 +13,6 @@ get_builtin_model_providers, get_default_inference_parameters, get_default_model_configs, - get_default_provider_name, get_default_providers, get_providers_with_missing_api_keys, resolve_seed_default_model_settings, @@ -146,63 +144,6 @@ def test_get_default_providers_path_does_not_exist(): get_default_providers() -def test_get_default_provider_name_with_default_key(tmp_path: Path): - """When the YAML carries a non-None ``default:``, the function must - return that value AND emit a ``DeprecationWarning`` (regression for #589). - """ - providers_file_path = tmp_path / "providers.yaml" - providers_file_path.write_text( - json.dumps(dict(providers=[p.model_dump() for p in get_builtin_model_providers()], default="nvidia")) - ) - with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=providers_file_path): - with pytest.warns(DeprecationWarning, match="'default:' key.*is deprecated"): - assert get_default_provider_name() == "nvidia" - - -def test_get_default_provider_name_without_default_key(tmp_path: Path): - """Pin the post-deprecation happy path: a YAML without ``default:`` must - return ``None`` and NOT emit a ``DeprecationWarning``. - """ - providers_file_path = tmp_path / "providers.yaml" - providers_file_path.write_text(json.dumps({"providers": [p.model_dump() for p in get_builtin_model_providers()]})) - with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=providers_file_path): - with warnings.catch_warnings(): - warnings.simplefilter("error", DeprecationWarning) - assert get_default_provider_name() is None - - -def test_get_default_provider_name_warning_attributes_to_user_frame(tmp_path: Path): - """Regression for PR #594 review (andreatgretel): the YAML-default warning - must attribute to the user's call site, not to ``default_model_settings.py``. - Python's default filter ignores library-attributed ``DeprecationWarning`` - entries, so the previous ``stacklevel=2`` attribution rendered the warning - invisible under default filters on the only real call path - (``DataDesigner.__init__``). See issue #589. - """ - providers_file_path = tmp_path / "providers.yaml" - providers_file_path.write_text( - json.dumps(dict(providers=[p.model_dump() for p in get_builtin_model_providers()], default="nvidia")) - ) - with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=providers_file_path): - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always", DeprecationWarning) - assert get_default_provider_name() == "nvidia" - - matches = [w for w in caught if "'default:' key" in str(w.message)] - assert len(matches) == 1, [str(w.message) for w in caught] - assert matches[0].filename == __file__, ( - f"Warning attributed to {matches[0].filename!r} (line {matches[0].lineno}) " - f"instead of the test file. Library-attributed DeprecationWarnings are " - f"silenced under default filters." - ) - - -def test_get_default_provider_name_path_does_not_exist(): - with patch("data_designer.config.default_model_settings.MODEL_PROVIDERS_FILE_PATH", new=Path("non_existent_path")): - with pytest.raises(FileNotFoundError, match=r"Default model providers file not found at 'non_existent_path'"): - get_default_provider_name() - - def test_get_nvidia_api_key(): with patch("data_designer.config.utils.visualization.os.getenv", return_value="nvidia_api_key"): assert get_nvidia_api_key() == "nvidia_api_key" diff --git a/packages/data-designer-config/tests/config/test_models.py b/packages/data-designer-config/tests/config/test_models.py index c5bacd818..6cd192024 100644 --- a/packages/data-designer-config/tests/config/test_models.py +++ b/packages/data-designer-config/tests/config/test_models.py @@ -4,7 +4,6 @@ import base64 import json import tempfile -import warnings from collections import Counter from pathlib import Path @@ -482,38 +481,25 @@ def test_model_config_construction(): assert model_config.generation_type == GenerationType.IMAGE -def test_model_config_provider_none_emits_deprecation_warning(): - """Regression for #589: omitting ``provider=`` (or passing ``provider=None``) - on a ``ModelConfig`` is deprecated; construction must emit a - ``DeprecationWarning`` pointing users at the explicit-provider migration. +def test_model_config_provider_required(): + """Regression for #590: ``provider`` is required on ``ModelConfig`` — + construction without it (or with ``provider=None``) must raise. """ - with pytest.warns(DeprecationWarning, match="ModelConfig.provider=None is deprecated"): - ModelConfig(alias="legacy", model="legacy-model") + with pytest.raises(ValidationError, match="provider"): + ModelConfig(alias="legacy", model="legacy-model") # type: ignore[call-arg] - with pytest.warns(DeprecationWarning, match="ModelConfig.provider=None is deprecated"): - ModelConfig(alias="legacy", model="legacy-model", provider=None) + with pytest.raises(ValidationError, match="provider"): + ModelConfig(alias="legacy", model="legacy-model", provider=None) # type: ignore[arg-type] -def test_model_config_provider_none_via_model_validate_emits_deprecation_warning(): - """Regression for #589 / PR #594 review: deserialising legacy on-disk configs - via ``ModelConfig.model_validate(...)`` must surface the same - ``DeprecationWarning`` as direct construction. Both paths funnel through - the same validator today, so this pin protects against a future refactor - that, e.g., only runs the validator on construction and not on revalidation. +def test_model_config_provider_required_via_model_validate(): + """Regression for #590: deserialising legacy on-disk configs via + ``ModelConfig.model_validate(...)`` without ``provider:`` must raise. """ - with pytest.warns(DeprecationWarning, match="ModelConfig.provider=None is deprecated"): + with pytest.raises(ValidationError, match="provider"): ModelConfig.model_validate({"alias": "legacy", "model": "legacy-model"}) -def test_model_config_with_provider_does_not_warn(): - """Pin the post-deprecation happy path: specifying ``provider=`` must not - emit any deprecation warning. - """ - with warnings.catch_warnings(): - warnings.simplefilter("error", DeprecationWarning) - ModelConfig(alias="modern", model="modern-model", provider="some-provider") - - def test_model_config_generation_type_from_dict(): # Test that generation_type in dict is used to create the right inference params type model_config = ModelConfig.model_validate( diff --git a/packages/data-designer-engine/src/data_designer/engine/model_provider.py b/packages/data-designer-engine/src/data_designer/engine/model_provider.py index d3d8dc7be..566de79e1 100644 --- a/packages/data-designer-engine/src/data_designer/engine/model_provider.py +++ b/packages/data-designer-engine/src/data_designer/engine/model_provider.py @@ -5,21 +5,21 @@ from functools import cached_property -from pydantic import BaseModel, field_validator, model_validator -from typing_extensions import Self +from pydantic import BaseModel, field_validator from data_designer.config.mcp import MCPProviderT from data_designer.config.models import ModelProvider -from data_designer.config.utils.warning_helpers import warn_at_caller from data_designer.engine.errors import NoModelProvidersError, UnknownProviderError class ModelProviderRegistry(BaseModel): + """Registry of model providers. + + Inherits from ``BaseModel`` directly so pydantic's default ``extra="ignore"`` + drops legacy ``default:`` keys from older serialized registries. + """ + providers: list[ModelProvider] - default: str | None = None - """Deprecated: registry-level default provider. Will be removed in a future - release; specify ``provider=`` explicitly on each ``ModelConfig`` instead. - See issue #589.""" @field_validator("providers", mode="after") @classmethod @@ -42,73 +42,21 @@ def validate_providers_have_unique_names(cls, v: list[ModelProvider]) -> list[Mo raise ValueError(f"Model providers must have unique names, found duplicates: {dupes}") return v - @model_validator(mode="after") - def check_implicit_default(self) -> Self: - if self.default is None and len(self.providers) != 1: - raise ValueError("A default provider must be specified if multiple model providers are defined") - return self - - @model_validator(mode="after") - def check_default_exists(self) -> Self: - if self.default and self.default not in self._providers_dict: - raise ValueError(f"Specified default {self.default!r} not found in providers list") - return self - - @model_validator(mode="after") - def _warn_on_explicit_default(self) -> Self: - # Fires only when the caller actually passed a non-None ``default=``. - # The ``model_fields_set`` guard distinguishes "caller opted into the - # deprecated field" from "field at its default value of None", and the - # ``self.default is not None`` clause additionally lets callers - # explicitly opt *out* via ``default=None`` without tripping the - # warning. ``resolve_model_provider_registry`` avoids passing - # ``default=`` in the single-provider case so common construction paths - # stay quiet. ``warn_at_caller`` keeps attribution and dedup correct - # under pydantic's validator dispatch. See issue #589 / PR #594 review. - if "default" in self.model_fields_set and self.default is not None: - warn_at_caller( - "ModelProviderRegistry.default is deprecated and will be removed in a " - "future release. Specify provider= explicitly on each ModelConfig " - "instead of relying on a registry-level default. See issue #589.", - DeprecationWarning, - ) - return self - - def get_default_provider_name(self) -> str: - return self.default or self.providers[0].name - @cached_property def _providers_dict(self) -> dict[str, ModelProvider]: return {p.name: p for p in self.providers} - def get_provider(self, name: str | None) -> ModelProvider: - if name is None: - name = self.get_default_provider_name() - + def get_provider(self, name: str) -> ModelProvider: try: return self._providers_dict[name] except KeyError: raise UnknownProviderError(f"No provider named {name!r} registered") -def resolve_model_provider_registry( - model_providers: list[ModelProvider], default_provider_name: str | None = None -) -> ModelProviderRegistry: +def resolve_model_provider_registry(model_providers: list[ModelProvider]) -> ModelProviderRegistry: if len(model_providers) == 0: raise NoModelProvidersError("At least one model provider must be defined") - # In the single-provider case, the registry's ``get_default_provider_name`` - # falls back to ``providers[0].name`` when ``default`` is unset, so we can - # avoid passing ``default=`` and keep the common construction path quiet - # under the #589 deprecation warning. The multi-provider case still - # requires ``default`` (per ``check_implicit_default``); callers who supply - # multiple providers with no explicit default fall back to first-wins, - # matching the contract pinned in #588. - if len(model_providers) == 1 and default_provider_name is None: - return ModelProviderRegistry(providers=model_providers) - return ModelProviderRegistry( - providers=model_providers, - default=default_provider_name or model_providers[0].name, - ) + return ModelProviderRegistry(providers=model_providers) class MCPProviderRegistry(BaseModel): diff --git a/packages/data-designer-engine/tests/engine/test_model_provider.py b/packages/data-designer-engine/tests/engine/test_model_provider.py index 72006f824..1bd6fff45 100644 --- a/packages/data-designer-engine/tests/engine/test_model_provider.py +++ b/packages/data-designer-engine/tests/engine/test_model_provider.py @@ -1,8 +1,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import warnings - import pytest from data_designer.config.mcp import LocalStdioMCPProvider @@ -27,47 +25,18 @@ def stub_bar_provider(): def test_must_have_at_least_one_provider(): - with pytest.raises(ValueError): - ModelProviderRegistry(providers=[], default="a") - with pytest.raises(ValueError): ModelProviderRegistry(providers=[]) -def test_defined_default_must_exist(stub_foo_provider: ModelProvider): - with pytest.raises(ValueError): - ModelProviderRegistry(providers=[stub_foo_provider], default="bar") - - -def test_multiple_providers_requires_explicit_default( - stub_foo_provider: ModelProvider, stub_bar_provider: ModelProvider -): - with pytest.raises(ValueError): - ModelProviderRegistry(providers=[stub_foo_provider, stub_bar_provider]) - - -def test_implicit_default(stub_foo_provider: ModelProvider): - registry = ModelProviderRegistry(providers=[stub_foo_provider]) - - assert registry.get_provider(None) == stub_foo_provider - - def test_no_duplicate_provider_names(stub_foo_provider: ModelProvider): with pytest.raises(ValueError): - ModelProviderRegistry(providers=[stub_foo_provider, stub_foo_provider], default="foo") + ModelProviderRegistry(providers=[stub_foo_provider, stub_foo_provider]) def test_get_provider(stub_foo_provider: ModelProvider, stub_bar_provider: ModelProvider): - # Multi-provider construction with an explicit default exercises the #589 - # deprecation path; wrap so this test stays green if the project ever runs - # with ``-W error::DeprecationWarning``. - with pytest.warns(DeprecationWarning, match="ModelProviderRegistry.default is deprecated"): - registry = ModelProviderRegistry( - providers=[stub_foo_provider, stub_bar_provider], - default="foo", - ) - - assert registry.get_provider(None) == stub_foo_provider + registry = ModelProviderRegistry(providers=[stub_foo_provider, stub_bar_provider]) + assert registry.get_provider("foo") == stub_foo_provider assert registry.get_provider("bar") == stub_bar_provider @@ -80,23 +49,20 @@ def test_resolve_model_provider_registry(stub_foo_provider: ModelProvider) -> No registry = resolve_model_provider_registry([stub_foo_provider]) assert len(registry.providers) == 1 - assert registry.get_default_provider_name() == "foo" + assert registry.get_provider("foo") == stub_foo_provider -def test_resolve_model_provider_registry_with_explicit_default( +def test_resolve_model_provider_registry_multiple_providers( stub_foo_provider: ModelProvider, stub_bar_provider: ModelProvider ) -> None: - """Test resolve_model_provider_registry with explicit default. - - The multi-provider/explicit-default path is the deprecated one (see #589), - so the construction emits a ``DeprecationWarning``. Wrap the call in - ``pytest.warns`` so this test stays green if the project ever runs under - ``-W error::DeprecationWarning``. + """Multi-provider registries no longer require a registry-level default — + callers route by name on each ``ModelConfig``. See issue #590. """ - with pytest.warns(DeprecationWarning, match="ModelProviderRegistry.default is deprecated"): - registry = resolve_model_provider_registry([stub_foo_provider, stub_bar_provider], default_provider_name="bar") + registry = resolve_model_provider_registry([stub_foo_provider, stub_bar_provider]) - assert registry.get_default_provider_name() == "bar" + assert len(registry.providers) == 2 + assert registry.get_provider("foo") == stub_foo_provider + assert registry.get_provider("bar") == stub_bar_provider def test_resolve_model_provider_registry_empty_error() -> None: @@ -105,84 +71,15 @@ def test_resolve_model_provider_registry_empty_error() -> None: resolve_model_provider_registry([]) -def test_explicit_default_emits_deprecation_warning(stub_foo_provider: ModelProvider) -> None: - """Regression for #589: passing ``default=`` explicitly to ``ModelProviderRegistry`` - must emit a ``DeprecationWarning``. The registry-level default field is on its - way out; users should specify ``provider=`` per ``ModelConfig`` instead. +def test_registry_ignores_legacy_default_key(stub_foo_provider: ModelProvider) -> None: + """Regression for #590: pydantic v2's ``extra="ignore"`` default silently + drops the legacy ``default:`` key carried over from older serialised + registries, so ``model_validate`` of an old payload still loads cleanly. """ - with pytest.warns(DeprecationWarning, match="ModelProviderRegistry.default is deprecated"): - ModelProviderRegistry(providers=[stub_foo_provider], default="foo") + registry = ModelProviderRegistry.model_validate({"providers": [stub_foo_provider.model_dump()], "default": "foo"}) - -def test_no_default_does_not_emit_deprecation_warning(stub_foo_provider: ModelProvider) -> None: - """Pin the post-deprecation happy path: omitting ``default=`` (single-provider - case) must NOT emit a warning, since callers haven't opted into the deprecated - field. - """ - with warnings.catch_warnings(): - warnings.simplefilter("error", DeprecationWarning) - ModelProviderRegistry(providers=[stub_foo_provider]) - - -def test_explicit_default_none_does_not_emit_deprecation_warning(stub_foo_provider: ModelProvider) -> None: - """Pin the predicate tightening from PR #594 review: passing ``default=None`` - explicitly is semantically equivalent to omitting it (caller is opting *out* - of a registry-level default), so the deprecation must NOT fire. - """ - with warnings.catch_warnings(): - warnings.simplefilter("error", DeprecationWarning) - ModelProviderRegistry(providers=[stub_foo_provider], default=None) - - -def test_explicit_default_warning_attributes_to_user_frame( - stub_foo_provider: ModelProvider, stub_bar_provider: ModelProvider -) -> None: - """Regression for PR #594 review (andreatgretel): the ``default=`` deprecation - warning must attribute to the *user's* call site, not the pydantic-internal - or ``data_designer`` library frame that emits it. Library-attributed - ``DeprecationWarning`` entries are silenced under Python's default - ``ignore::DeprecationWarning`` filter, so attribution determines whether - the warning is actually visible. - - Construction goes through ``resolve_model_provider_registry`` so the walk - has to escape both pydantic (validator dispatch) and ``data_designer`` - (the helper that builds the registry) before landing on the test frame. - """ - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always", DeprecationWarning) - resolve_model_provider_registry([stub_foo_provider, stub_bar_provider], default_provider_name="bar") - - matches = [w for w in caught if "ModelProviderRegistry.default is deprecated" in str(w.message)] - assert len(matches) == 1, [str(w.message) for w in caught] - assert matches[0].filename == __file__, ( - f"Warning attributed to {matches[0].filename!r} (line {matches[0].lineno}) " - f"instead of the test file. Library-attributed DeprecationWarnings are " - f"silenced under default filters." - ) - - -def test_resolve_single_provider_quiet_under_deprecation(stub_foo_provider: ModelProvider) -> None: - """Pin the q3 tweak: ``resolve_model_provider_registry`` skips ``default=`` - in the single-provider case so common construction paths stay quiet under - the #589 deprecation warning. - """ - with warnings.catch_warnings(): - warnings.simplefilter("error", DeprecationWarning) - registry = resolve_model_provider_registry([stub_foo_provider]) - - assert registry.get_default_provider_name() == "foo" - - -def test_resolve_multi_provider_emits_deprecation_warning( - stub_foo_provider: ModelProvider, stub_bar_provider: ModelProvider -) -> None: - """Multi-provider registries currently require ``default``, so - ``resolve_model_provider_registry`` keeps passing it. That construction - path is the deprecated one users should migrate off; the warning fires - accordingly. - """ - with pytest.warns(DeprecationWarning, match="ModelProviderRegistry.default is deprecated"): - resolve_model_provider_registry([stub_foo_provider, stub_bar_provider]) + assert len(registry.providers) == 1 + assert not hasattr(registry, "default") def test_mcp_provider_registry_empty() -> None: diff --git a/packages/data-designer/src/data_designer/cli/README.md b/packages/data-designer/src/data_designer/cli/README.md index ab0447902..19d153a83 100644 --- a/packages/data-designer/src/data_designer/cli/README.md +++ b/packages/data-designer/src/data_designer/cli/README.md @@ -88,7 +88,6 @@ The CLI follows a **layered architecture** pattern, separating concerns into dis - Validate business rules (e.g., unique names, required fields) - Implement CRUD operations with validation - Coordinate between multiple repositories when needed - - Handle default management (e.g., default provider selection) - **Files**: - `mcp_provider_service.py`: MCP provider configuration business logic - `model_service.py`: Model configuration business logic @@ -105,7 +104,6 @@ The CLI follows a **layered architecture** pattern, separating concerns into dis - `delete()`: Delete single item - `delete_by_aliases()`: Batch delete (models only) - `find_by_provider()`: Find models by provider (models only) -- `set_default()`, `get_default()`: Manage default provider (providers only) #### 4. **Repositories** (`repositories/`) - **Purpose**: Handle data persistence and read-only reference metadata @@ -191,7 +189,6 @@ providers: endpoint: https://api.openai.com/v1 provider_type: openai api_key: OPENAI_API_KEY -default: nvidia ``` ### `~/.data-designer/model_configs.yaml` @@ -297,7 +294,6 @@ data-designer config providers # - Update an existing provider # - Delete a provider (with associated model cleanup) # - Delete all providers -# - Change default provider ``` ### Configure Models diff --git a/packages/data-designer/src/data_designer/cli/commands/list.py b/packages/data-designer/src/data_designer/cli/commands/list.py index d83797c17..d4ee430a7 100644 --- a/packages/data-designer/src/data_designer/cli/commands/list.py +++ b/packages/data-designer/src/data_designer/cli/commands/list.py @@ -92,12 +92,8 @@ def display_providers(provider_repo: ProviderRepository) -> None: table.add_column("Endpoint", style=NordColor.NORD4.value) table.add_column("Type", style=NordColor.NORD9.value, no_wrap=True) table.add_column("API Key", style=NordColor.NORD7.value) - table.add_column("Default", style=NordColor.NORD13.value, justify="center") - - default_name = provider_registry.default or provider_registry.providers[0].name for provider in provider_registry.providers: - is_default = "✓" if provider.name == default_name else "" api_key_display = _mask_api_key(provider.api_key) table.add_row( @@ -105,7 +101,6 @@ def display_providers(provider_repo: ProviderRepository) -> None: provider.endpoint, provider.provider_type, api_key_display, - is_default, ) console.print(table) @@ -145,7 +140,7 @@ def display_models(model_repo: ModelRepository) -> None: table.add_row( mc.alias, mc.model, - mc.provider or "(default)", + mc.provider, params_display, ) diff --git a/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py b/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py index c72087036..f445393f0 100644 --- a/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py +++ b/packages/data-designer/src/data_designer/cli/controllers/provider_controller.py @@ -27,7 +27,6 @@ print_warning, select_with_arrows, ) -from data_designer.config.utils.warning_helpers import warn_at_caller if TYPE_CHECKING: from data_designer.engine.model_provider import ModelProvider @@ -70,7 +69,6 @@ def run(self) -> None: "update": self._handle_update, "delete": self._handle_delete, "delete_all": self._handle_delete_all, - "change_default": self._handle_change_default, } handler = mode_handlers.get(mode) @@ -113,14 +111,9 @@ def _select_mode(self) -> str | None: "update": "Update an existing provider", "delete": "Delete a provider", "delete_all": "Delete all providers", + "exit": "Exit without changes", } - # Only show change_default if multiple providers - if len(self.service.list_all()) > 1: - options["change_default"] = "Change default provider" - - options["exit"] = "Exit without changes" - result = select_with_arrows( options, "What would you like to do?", @@ -287,42 +280,6 @@ def _handle_delete_all(self) -> None: except Exception as e: print_error(f"Failed to delete all providers: {e}") - def _handle_change_default(self) -> None: - """Handle changing the default provider.""" - deprecation_msg = ( - "The 'Change default provider' workflow is deprecated and will be removed " - "in a future release. Specify provider= explicitly on each ModelConfig " - "instead of relying on a registry-level default. See issue #589." - ) - print_warning(deprecation_msg) - # ``print_warning`` always shows the user the message in the console, - # but ``warnings.warn`` is what's observable to programmatic callers - # (``pytest.warns``, ``filterwarnings("error", ...)``). With - # ``stacklevel=2`` attribution lands on the menu dispatcher in this - # same module — a ``data_designer.cli.*`` frame — and Python's default - # ``ignore::DeprecationWarning`` filter silences it. ``warn_at_caller`` - # walks past every ``data_designer.*`` frame so the warning attributes - # to the user's call site and stays visible. See PR #594 review. - warn_at_caller(deprecation_msg, DeprecationWarning) - console.print() - - providers = self.service.list_all() - current_default = self.service.get_default() - - print_info(f"Current default: {current_default}") - console.print() - - # Select new default - selected_name = self._select_provider(providers, "Select new default provider", default=current_default) - if selected_name is None: - return - if selected_name and selected_name != current_default: - try: - self.service.set_default(selected_name) - print_success(f"Default provider changed to '{selected_name}'") - except ValueError as e: - print_error(f"Failed to change default: {e}") - def _select_provider(self, providers: list[ModelProvider], prompt: str, default: str | None = None) -> str | None: """Helper to select a provider from list.""" options = {p.name: f"{p.name} ({p.endpoint})" for p in providers} diff --git a/packages/data-designer/src/data_designer/cli/forms/model_builder.py b/packages/data-designer/src/data_designer/cli/forms/model_builder.py index e1d637df3..321813a5c 100644 --- a/packages/data-designer/src/data_designer/cli/forms/model_builder.py +++ b/packages/data-designer/src/data_designer/cli/forms/model_builder.py @@ -287,13 +287,21 @@ def run(self, initial_data: dict[str, Any] | None = None) -> ModelConfig | None: def build_config(self, form_data: dict[str, Any]) -> ModelConfig: """Build ModelConfig from form data.""" - # Determine provider if "provider" in form_data: provider = form_data["provider"] elif len(self.available_providers) == 1: provider = self.available_providers[0] + elif self.available_providers: + raise ValueError( + "Cannot build ModelConfig: multiple providers are configured but none " + "was specified in form data. Specify provider explicitly." + ) else: - provider = None + raise ValueError( + "Cannot build ModelConfig: no provider specified in form data and no " + "available providers configured. Add a provider first via " + "`data-designer config providers`." + ) # Get generation type (from form data, used to determine which inference params to create) generation_type = form_data.get("generation_type", GenerationType.CHAT_COMPLETION) diff --git a/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py b/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py index f815ef127..b088300e8 100644 --- a/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py +++ b/packages/data-designer/src/data_designer/cli/repositories/provider_repository.py @@ -11,14 +11,16 @@ from data_designer.config.models import ModelProvider from data_designer.config.utils.constants import MODEL_PROVIDERS_FILE_NAME from data_designer.config.utils.io_helpers import load_config_file, save_config_file -from data_designer.config.utils.warning_helpers import warn_at_caller class ModelProviderRegistry(BaseModel): - """Registry for model provider configurations.""" + """Registry for model provider configurations. + + Inherits from ``BaseModel`` directly so pydantic's default ``extra="ignore"`` + drops legacy ``default:`` keys from older on-disk YAMLs. + """ providers: list[ModelProvider] - default: str | None = None class ProviderRepository(ConfigRepository[ModelProviderRegistry]): @@ -30,7 +32,12 @@ def config_file(self) -> Path: return self.config_dir / MODEL_PROVIDERS_FILE_NAME def load(self) -> ModelProviderRegistry | None: - """Load provider configuration from file.""" + """Load provider configuration from file. + + ``extra="ignore"`` (pydantic v2 default) silently drops any legacy + ``default:`` key carried over from older YAMLs, so existing on-disk + configs continue to load cleanly after #590 dropped the field. + """ if not self.exists(): return None @@ -39,23 +46,6 @@ def load(self) -> ModelProviderRegistry | None: except Exception: return None - # Emit the deprecation warning *outside* the validation try/except below. - # ``DeprecationWarning`` is an ``Exception`` subclass, so under - # ``filterwarnings("error", DeprecationWarning)`` a warn raised inside - # the catch-all would be silently swallowed and ``load`` would drop the - # registry. ``warn_at_caller`` (rather than ``warnings.warn(stacklevel=2)``) - # so the warning attributes to the user's call site rather than a - # ``data_designer.cli.*`` frame; under default Python filters, - # library-attributed ``DeprecationWarning`` entries are silenced - # (``ignore::DeprecationWarning``). See PR #594 review. - if config_dict.get("default") is not None: - warn_at_caller( - f"The 'default:' key in {self.config_file} is deprecated and will " - "be removed in a future release. Remove it and specify provider= " - "explicitly on each ModelConfig instead. See issue #589.", - DeprecationWarning, - ) - try: return ModelProviderRegistry.model_validate(config_dict) except Exception: diff --git a/packages/data-designer/src/data_designer/cli/services/provider_service.py b/packages/data-designer/src/data_designer/cli/services/provider_service.py index 02bb05daa..be32fea37 100644 --- a/packages/data-designer/src/data_designer/cli/services/provider_service.py +++ b/packages/data-designer/src/data_designer/cli/services/provider_service.py @@ -29,7 +29,7 @@ def add(self, provider: ModelProvider) -> None: Raises: ValueError: If provider name already exists """ - registry = self.repository.load() or ModelProviderRegistry(providers=[], default=None) + registry = self.repository.load() or ModelProviderRegistry(providers=[]) if any(p.name == provider.name for p in registry.providers): raise ValueError(f"Provider '{provider.name}' already exists") @@ -61,9 +61,6 @@ def update(self, original_name: str, updated_provider: ModelProvider) -> None: registry.providers[index] = updated_provider - if registry.default == original_name and updated_provider.name != original_name: - registry.default = updated_provider.name - self.repository.save(registry) def delete(self, name: str) -> None: @@ -81,33 +78,7 @@ def delete(self, name: str) -> None: registry.providers = [p for p in registry.providers if p.name != name] - if registry.default == name: - registry.default = registry.providers[0].name if registry.providers else None - if registry.providers: self.repository.save(registry) else: self.repository.delete() - - def set_default(self, name: str) -> None: - """Set the default provider. - - Raises: - ValueError: If provider not found - """ - registry = self.repository.load() - if not registry: - raise ValueError("No providers configured") - - if not any(p.name == name for p in registry.providers): - raise ValueError(f"Provider '{name}' not found") - - registry.default = name - self.repository.save(registry) - - def get_default(self) -> str | None: - """Get the default provider name.""" - registry = self.repository.load() - if not registry: - return None - return registry.default or (registry.providers[0].name if registry.providers else None) diff --git a/packages/data-designer/src/data_designer/cli/utils/agent_introspection.py b/packages/data-designer/src/data_designer/cli/utils/agent_introspection.py index b91f5d4d3..d77176abc 100644 --- a/packages/data-designer/src/data_designer/cli/utils/agent_introspection.py +++ b/packages/data-designer/src/data_designer/cli/utils/agent_introspection.py @@ -170,38 +170,29 @@ def get_model_aliases_state(config_dir: Path) -> dict[str, Any]: return { "model_config_present": False, "provider_config_present": provider_registry is not None, - "default_provider": None if provider_registry is None else provider_registry.default, "items": items, } providers_by_name: dict[str, Any] = {} missing_key_names: set[str] = set() - default_provider: str | None = None if provider_registry is not None: providers_by_name = {p.name: p for p in provider_registry.providers} - default_provider = provider_registry.default or ( - provider_registry.providers[0].name if provider_registry.providers else None - ) missing_key_names = {p.name for p in get_providers_with_missing_api_keys(provider_registry.providers)} for mc in sorted(model_registry.model_configs, key=lambda m: m.alias): - effective = mc.provider or default_provider usable = True reason: str | None = None - if effective is None: - usable, reason = False, "No model provider is configured." - elif effective not in providers_by_name: - usable, reason = False, f"Provider {effective!r} is not configured." - elif effective in missing_key_names: - usable, reason = False, f"Provider {effective!r} is missing an API key." + if mc.provider not in providers_by_name: + usable, reason = False, f"Provider {mc.provider!r} is not configured." + elif mc.provider in missing_key_names: + usable, reason = False, f"Provider {mc.provider!r} is missing an API key." items.append( { "model_alias": mc.alias, "model": mc.model, "generation_type": getattr(mc.generation_type, "value", str(mc.generation_type)), - "configured_provider": mc.provider, - "effective_provider": effective, + "provider": mc.provider, "usable": usable, "reason": reason, } @@ -210,7 +201,6 @@ def get_model_aliases_state(config_dir: Path) -> dict[str, Any]: return { "model_config_present": True, "provider_config_present": provider_registry is not None, - "default_provider": default_provider, "items": items, } diff --git a/packages/data-designer/src/data_designer/cli/utils/agent_text_formatter.py b/packages/data-designer/src/data_designer/cli/utils/agent_text_formatter.py index b138c0821..4745db141 100644 --- a/packages/data-designer/src/data_designer/cli/utils/agent_text_formatter.py +++ b/packages/data-designer/src/data_designer/cli/utils/agent_text_formatter.py @@ -60,16 +60,11 @@ def format_types_text(data: dict[str, Any]) -> str: def format_model_aliases_text(state: dict[str, Any]) -> str: - """Format model aliases as a text table with provider summary.""" - lines: list[str] = [f"default_provider: {state.get('default_provider') or '(none)'}", ""] - lines.append( - _format_table( - state.get("items", []), - ["model_alias", "model", "generation_type", "effective_provider", "usable", "reason"], - column_labels={"effective_provider": "provider"}, - ) + """Format model aliases as a text table.""" + return _format_table( + state.get("items", []), + ["model_alias", "model", "generation_type", "provider", "usable", "reason"], ) - return "\n".join(lines) def format_persona_datasets_text(state: dict[str, Any]) -> str: @@ -87,21 +82,14 @@ def _format_family_header(info: dict[str, Any]) -> str: return "\n".join(lines) -def _format_table( - items: list[dict[str, Any]], - columns: list[str], - *, - column_labels: dict[str, str] | None = None, -) -> str: - labels = {col: (column_labels or {}).get(col, col) for col in columns} - +def _format_table(items: list[dict[str, Any]], columns: list[str]) -> str: if not items: return "(no items)" - col_widths = {col: max(len(labels[col]), max(len(_cell(row.get(col))) for row in items)) for col in columns} + col_widths = {col: max(len(col), max(len(_cell(row.get(col))) for row in items)) for col in columns} lines: list[str] = [] - lines.append(" ".join(f"{labels[col]:<{col_widths[col]}}" for col in columns)) + lines.append(" ".join(f"{col:<{col_widths[col]}}" for col in columns)) lines.append(" ".join("-" * col_widths[col] for col in columns)) for row in items: lines.append(" ".join(f"{_cell(row.get(col)):<{col_widths[col]}}" for col in columns)) @@ -117,15 +105,10 @@ def _format_model_aliases_context(state: dict[str, Any]) -> str: "No usable model aliases. Tell the user the issue and that they need to configure models" " -- for example, using `data-designer config models` and `data-designer config providers`." ) - lines: list[str] = [f"default_provider: {state.get('default_provider') or '(none)'}", ""] - lines.append( - _format_table( - usable, - ["model_alias", "model", "generation_type", "effective_provider"], - column_labels={"effective_provider": "provider"}, - ) + return _format_table( + usable, + ["model_alias", "model", "generation_type", "provider"], ) - return "\n".join(lines) def _strip_config_prefix(path: str) -> str: diff --git a/packages/data-designer/src/data_designer/interface/data_designer.py b/packages/data-designer/src/data_designer/interface/data_designer.py index ed42d6e0b..dae041859 100644 --- a/packages/data-designer/src/data_designer/interface/data_designer.py +++ b/packages/data-designer/src/data_designer/interface/data_designer.py @@ -17,7 +17,6 @@ from data_designer.config.data_designer_config import DataDesignerConfig from data_designer.config.default_model_settings import ( get_default_model_configs, - get_default_provider_name, get_default_providers, get_providers_with_missing_api_keys, resolve_seed_default_model_settings, @@ -159,52 +158,9 @@ def __init__( self._throttle_manager: ThrottleManager = self._create_throttle_manager() self._managed_assets_path = Path(managed_assets_path or MANAGED_ASSETS_PATH) self._person_reader = person_reader - # Only consult the YAML's `default:` key when we are also falling back to - # the YAML's `providers:` list. A user-supplied `model_providers` list - # owns its own default (first wins), so the YAML default must not leak - # in and either (a) hard-fail validation when the YAML names a provider - # absent from the supplied list or (b) silently override the - # documented first-wins ordering. See issue #588. - if model_providers is None: - self._model_providers = self._resolve_model_providers(None) - default_provider_name = get_default_provider_name() - else: - self._model_providers = self._resolve_model_providers(model_providers) - default_provider_name = None + self._model_providers = self._resolve_model_providers(model_providers) self._mcp_providers = mcp_providers or [] - # Suppress ``ModelProviderRegistry._warn_on_explicit_default`` whenever - # *we* are filling ``default=`` on the user's behalf rather than the - # user actively opting into the deprecated registry-level default. Two - # such cases: - # 1. ``model_providers is None`` — the caller passed nothing, so we - # load the YAML's ``providers:`` list and (in the multi-provider - # case) ``resolve_model_provider_registry`` synthesises - # ``default=providers[0].name`` to satisfy ``check_implicit_default``. - # The fresh-install YAML ships three providers and no ``default:`` - # key, so this fires for every default ``DataDesigner()`` - # construction. The user has no actionable lever here, and the - # warning's "Specify provider= on each ModelConfig" remediation - # doesn't apply when they haven't built a ``ModelConfig`` at all. - # 2. ``default_provider_name is not None`` — the YAML carried a - # ``default:`` key and ``get_default_provider_name`` already - # emitted the YAML-level ``DeprecationWarning``. The registry - # warning would fire for the same root cause, so suppress it to - # avoid double-warning. See PR #594 review. - # Users who hand-construct a multi-provider list in Python still see - # the warning (they wrote the multi-provider intent themselves), and - # users who hand-construct ``ModelProviderRegistry(default=...)`` - # directly always see it — those are the entry points #589 targets. - library_synthesised_default = model_providers is None or default_provider_name is not None - with warnings.catch_warnings(): - if library_synthesised_default: - warnings.filterwarnings( - "ignore", - message="ModelProviderRegistry.default is deprecated", - category=DeprecationWarning, - ) - self._model_provider_registry = resolve_model_provider_registry( - self._model_providers, default_provider_name - ) + self._model_provider_registry = resolve_model_provider_registry(self._model_providers) self._seed_reader_registry = SeedReaderRegistry(readers=seed_readers or DEFAULT_SEED_READERS) @property @@ -579,11 +535,9 @@ def model_provider_registry(self) -> ModelProviderRegistry: """Get the resolved model provider registry. Returns: - The ModelProviderRegistry containing the providers and default - resolved at construction time. The default is taken from the - first user-supplied provider when ``model_providers`` was passed - to the constructor; otherwise from the YAML's ``default:`` key - when set, falling back to the first provider in the YAML list. + The ModelProviderRegistry containing the providers resolved at + construction time, either the user-supplied ``model_providers`` + list or the providers loaded from the YAML. """ return self._model_provider_registry diff --git a/packages/data-designer/tests/cli/conftest.py b/packages/data-designer/tests/cli/conftest.py index 020ffc113..256bf3d42 100644 --- a/packages/data-designer/tests/cli/conftest.py +++ b/packages/data-designer/tests/cli/conftest.py @@ -96,7 +96,7 @@ def stub_model_service(tmp_path: Path, stub_model_configs: list[ModelConfig]) -> @pytest.fixture def stub_provider_service(tmp_path: Path, stub_model_providers: list[ModelProvider]) -> ProviderService: repository = ProviderRepository(tmp_path) - repository.save(ModelProviderRegistry(providers=stub_model_providers, default=stub_model_providers[0].name)) + repository.save(ModelProviderRegistry(providers=stub_model_providers)) return ProviderService(repository) diff --git a/packages/data-designer/tests/cli/controllers/test_model_controller.py b/packages/data-designer/tests/cli/controllers/test_model_controller.py index d6a5ac165..3eba44da0 100644 --- a/packages/data-designer/tests/cli/controllers/test_model_controller.py +++ b/packages/data-designer/tests/cli/controllers/test_model_controller.py @@ -16,7 +16,7 @@ def controller(tmp_path: Path, stub_model_providers: list) -> ModelController: """Create a controller instance for testing.""" provider_repo = ProviderRepository(tmp_path) - provider_repo.save(ModelProviderRegistry(providers=stub_model_providers, default=stub_model_providers[0].name)) + provider_repo.save(ModelProviderRegistry(providers=stub_model_providers)) return ModelController(tmp_path) diff --git a/packages/data-designer/tests/cli/controllers/test_provider_controller.py b/packages/data-designer/tests/cli/controllers/test_provider_controller.py index 265f393a3..b69d617a0 100644 --- a/packages/data-designer/tests/cli/controllers/test_provider_controller.py +++ b/packages/data-designer/tests/cli/controllers/test_provider_controller.py @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import warnings from pathlib import Path from unittest.mock import MagicMock, patch @@ -24,9 +23,7 @@ def controller_with_providers( controller: ProviderController, stub_model_providers: list[ModelProvider] ) -> ProviderController: """Create a controller instance with existing providers.""" - controller.repository.save( - ModelProviderRegistry(providers=stub_model_providers, default=stub_model_providers[0].name) - ) + controller.repository.save(ModelProviderRegistry(providers=stub_model_providers)) return controller @@ -192,55 +189,6 @@ def test_run_updates_provider( assert controller_with_providers.service.get_by_name("test-provider-1") is None -@patch("data_designer.cli.controllers.provider_controller.select_with_arrows") -def test_run_changes_default_provider( - mock_select: MagicMock, - controller_with_providers: ProviderController, -) -> None: - """Test run can change the default provider through change_default mode.""" - mock_select.side_effect = ["change_default", "test-provider-2"] - - # Verify initial default - assert controller_with_providers.service.get_default() == "test-provider-1" - - controller_with_providers.run() - - # Verify default was actually changed - assert controller_with_providers.service.get_default() == "test-provider-2" - - -@patch("data_designer.cli.controllers.provider_controller.select_with_arrows") -def test_handle_change_default_emits_deprecation_warning( - mock_select: MagicMock, - controller_with_providers: ProviderController, -) -> None: - """Regression for #589: entering the 'Change default provider' workflow - must emit a ``DeprecationWarning`` so users see the migration nudge before - setting another value that's also slated for removal. - - Also pins the attribution contract from PR #594 review: the warning must - land on the caller's frame (this test file), not on a - ``data_designer.cli.*`` library frame. Library attribution falls under - Python's default ``ignore::DeprecationWarning`` filter and would silently - suppress the user-facing nudge for any caller that isn't using - ``simplefilter("always")``. - """ - mock_select.side_effect = ["change_default", "test-provider-2"] - - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always") - controller_with_providers.run() - - deprecations = [ - w - for w in caught - if issubclass(w.category, DeprecationWarning) - and "'Change default provider' workflow is deprecated" in str(w.message) - ] - assert len(deprecations) == 1 - assert deprecations[0].filename == __file__ - - @patch("data_designer.cli.controllers.provider_controller.confirm_action", return_value=False) @patch("data_designer.cli.controllers.provider_controller.select_with_arrows") def test_run_respects_delete_cancellation( diff --git a/packages/data-designer/tests/cli/forms/test_model_builder.py b/packages/data-designer/tests/cli/forms/test_model_builder.py index 1c276054a..f52ff44f8 100644 --- a/packages/data-designer/tests/cli/forms/test_model_builder.py +++ b/packages/data-designer/tests/cli/forms/test_model_builder.py @@ -244,8 +244,8 @@ def test_build_config_infers_single_available_provider() -> None: assert config.provider == "openai" -def test_build_config_sets_provider_none_when_unavailable() -> None: - """Test build_config sets provider to None when no providers available.""" +def test_build_config_raises_when_no_providers_available() -> None: + """Test build_config raises ValueError when no providers available and none specified.""" builder = ModelFormBuilder(available_providers=[]) form_data = { "alias": "my-model", @@ -257,9 +257,25 @@ def test_build_config_sets_provider_none_when_unavailable() -> None: }, } - config = builder.build_config(form_data) + with pytest.raises(ValueError, match="no provider specified"): + builder.build_config(form_data) - assert config.provider is None + +def test_build_config_raises_when_multiple_providers_missing_selection() -> None: + """Test build_config asks for an explicit provider when several are available.""" + builder = ModelFormBuilder(available_providers=["openai", "anthropic"]) + form_data = { + "alias": "my-model", + "model": "gpt-4", + "inference_parameters": { + "temperature": 0.7, + "top_p": 0.9, + "max_tokens": 2048, + }, + } + + with pytest.raises(ValueError, match="multiple providers are configured"): + builder.build_config(form_data) def test_build_config_creates_valid_model_config() -> None: @@ -289,7 +305,7 @@ def test_build_config_creates_valid_model_config() -> None: def test_build_config_converts_max_tokens_to_int() -> None: """Test build_config handles numeric values in inference parameters.""" - builder = ModelFormBuilder() + builder = ModelFormBuilder(available_providers=["openai"]) form_data = { "alias": "my-model", "model": "gpt-4", diff --git a/packages/data-designer/tests/cli/repositories/test_provider_repository.py b/packages/data-designer/tests/cli/repositories/test_provider_repository.py index e62a59ccd..60b8067fd 100644 --- a/packages/data-designer/tests/cli/repositories/test_provider_repository.py +++ b/packages/data-designer/tests/cli/repositories/test_provider_repository.py @@ -1,11 +1,8 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import warnings from pathlib import Path -import pytest - from data_designer.cli.repositories.provider_repository import ModelProviderRegistry, ProviderRepository from data_designer.config.models import ModelProvider from data_designer.config.utils.constants import MODEL_PROVIDERS_FILE_NAME @@ -23,10 +20,6 @@ def test_load_does_not_exist(): def test_load_exists(tmp_path: Path, stub_model_providers: list[ModelProvider]): - # Roundtrip test for the load/save cycle. We deliberately leave ``default`` - # unset so this test does not exercise the deprecated YAML ``default:`` path - # — that path is covered by ``test_load_with_yaml_default_emits_deprecation_warning`` - # below. See issue #589. providers_file_path = tmp_path / MODEL_PROVIDERS_FILE_NAME save_config_file( providers_file_path, @@ -38,76 +31,27 @@ def test_load_exists(tmp_path: Path, stub_model_providers: list[ModelProvider]): def test_save(tmp_path: Path, stub_model_providers: list[ModelProvider]): - # As above, leave ``default`` unset so the roundtrip stays clear of the - # YAML-default deprecation. See issue #589. repository = ProviderRepository(tmp_path) repository.save(ModelProviderRegistry(providers=stub_model_providers)) assert repository.load() is not None assert repository.load().providers == stub_model_providers -def test_load_with_yaml_default_emits_deprecation_warning( - tmp_path: Path, stub_model_providers: list[ModelProvider] -) -> None: - """Regression for #589: when the on-disk providers YAML carries a non-None - ``default:`` key, ``ProviderRepository.load`` must emit a - ``DeprecationWarning`` so users see the migration nudge regardless of which - entry point reads the file. - """ - providers_file_path = tmp_path / MODEL_PROVIDERS_FILE_NAME - save_config_file( - providers_file_path, - ModelProviderRegistry(providers=stub_model_providers, default=stub_model_providers[0].name).model_dump(), - ) - repository = ProviderRepository(tmp_path) - - with pytest.warns(DeprecationWarning, match="'default:' key.*is deprecated"): - registry = repository.load() - assert registry is not None - assert registry.default == stub_model_providers[0].name - - -def test_load_with_yaml_default_attributes_warning_to_caller( - tmp_path: Path, stub_model_providers: list[ModelProvider] -) -> None: - """Regression for PR #594 review: the YAML-default ``DeprecationWarning`` - must attribute to the *caller's* frame (this test file), not to a - ``data_designer.cli.*`` library frame. Library-attributed - ``DeprecationWarning`` entries fall under Python's default - ``ignore::DeprecationWarning`` filter and are silenced, so attribution at - a library frame == invisible warning. ``warn_at_caller`` keeps this - visible; a regression to ``warnings.warn(stacklevel=2)`` would land on - ``provider_repository.py`` and silently break the user nudge. +def test_load_silently_ignores_legacy_default_key(tmp_path: Path, stub_model_providers: list[ModelProvider]) -> None: + """Regression for #590: pydantic v2's ``extra="ignore"`` default silently + drops the legacy ``default:`` key from older on-disk YAMLs, so existing + user configs continue to load cleanly after the field was removed. """ providers_file_path = tmp_path / MODEL_PROVIDERS_FILE_NAME save_config_file( providers_file_path, - ModelProviderRegistry(providers=stub_model_providers, default=stub_model_providers[0].name).model_dump(), + { + "providers": [p.model_dump() for p in stub_model_providers], + "default": stub_model_providers[0].name, + }, ) repository = ProviderRepository(tmp_path) - - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always") - repository.load() - - deprecations = [w for w in caught if issubclass(w.category, DeprecationWarning)] - assert len(deprecations) == 1 - assert deprecations[0].filename == __file__ - - -def test_load_without_yaml_default_does_not_warn(tmp_path: Path, stub_model_providers: list[ModelProvider]) -> None: - """Pin the post-deprecation happy path: a YAML without a ``default:`` key - must load cleanly with no ``DeprecationWarning``. - """ - providers_file_path = tmp_path / MODEL_PROVIDERS_FILE_NAME - save_config_file( - providers_file_path, - ModelProviderRegistry(providers=stub_model_providers).model_dump(exclude_none=True), - ) - repository = ProviderRepository(tmp_path) - - with warnings.catch_warnings(): - warnings.simplefilter("error", DeprecationWarning) - registry = repository.load() + registry = repository.load() assert registry is not None - assert registry.default is None + assert registry.providers == stub_model_providers + assert not hasattr(registry, "default") diff --git a/packages/data-designer/tests/cli/services/test_provider_service.py b/packages/data-designer/tests/cli/services/test_provider_service.py index 53921d3e6..2390eb9ab 100644 --- a/packages/data-designer/tests/cli/services/test_provider_service.py +++ b/packages/data-designer/tests/cli/services/test_provider_service.py @@ -98,34 +98,6 @@ def test_delete_last_provider(stub_provider_service: ProviderService, stub_model assert stub_provider_service.list_all() == [] -def test_set_default(stub_provider_service: ProviderService, stub_model_providers: list[ModelProvider]): - stub_provider_service.set_default("test-provider-2") - assert stub_provider_service.get_default() == "test-provider-2" - - -def test_set_default_no_registry(tmp_path: Path): - """Test set_default when no registry exists.""" - service = ProviderService(ProviderRepository(tmp_path)) - with pytest.raises(ValueError, match="No providers configured"): - service.set_default("test-provider-1") - - -def test_set_default_nonexistent_provider(stub_provider_service: ProviderService): - """Test set_default with a provider that doesn't exist.""" - with pytest.raises(ValueError, match="Provider 'nonexistent' not found"): - stub_provider_service.set_default("nonexistent") - - -def test_get_default(stub_provider_service: ProviderService, stub_model_providers: list[ModelProvider]): - assert stub_provider_service.get_default() == "test-provider-1" - - -def test_get_default_no_registry(tmp_path: Path): - """Test get_default when no registry exists.""" - service = ProviderService(ProviderRepository(tmp_path)) - assert service.get_default() is None - - def test_delete_provider_with_associated_models( tmp_path: Path, stub_model_providers: list[ModelProvider], stub_model_configs: list[ModelConfig] ): @@ -140,7 +112,7 @@ def test_delete_provider_with_associated_models( from data_designer.cli.repositories.provider_repository import ModelProviderRegistry, ProviderRepository provider_repo = ProviderRepository(tmp_path) - provider_repo.save(ModelProviderRegistry(providers=stub_model_providers, default="test-provider-1")) + provider_repo.save(ModelProviderRegistry(providers=stub_model_providers)) model_repo = ModelRepository(tmp_path) model_repo.save(ModelConfigRegistry(model_configs=stub_model_configs)) diff --git a/packages/data-designer/tests/cli/utils/test_agent_introspection_integration.py b/packages/data-designer/tests/cli/utils/test_agent_introspection_integration.py index 6a0297346..3d92daf2e 100644 --- a/packages/data-designer/tests/cli/utils/test_agent_introspection_integration.py +++ b/packages/data-designer/tests/cli/utils/test_agent_introspection_integration.py @@ -33,7 +33,6 @@ def test_get_model_aliases_state_reports_provider_status(tmp_path: Path) -> None api_key="MISSING_PROVIDER_KEY", ), ], - default="provider-a", ) ) @@ -44,7 +43,7 @@ def test_get_model_aliases_state_reports_provider_status(tmp_path: Path) -> None ModelConfig( alias="alpha", model="model-alpha", - provider=None, + provider="provider-a", inference_parameters=ChatCompletionInferenceParams(), ), ModelConfig( @@ -67,14 +66,12 @@ def test_get_model_aliases_state_reports_provider_status(tmp_path: Path) -> None assert payload["model_config_present"] is True assert payload["provider_config_present"] is True - assert payload["default_provider"] == "provider-a" assert payload["items"] == [ { "model_alias": "alpha", "model": "model-alpha", "generation_type": "chat-completion", - "configured_provider": None, - "effective_provider": "provider-a", + "provider": "provider-a", "usable": True, "reason": None, }, @@ -82,8 +79,7 @@ def test_get_model_aliases_state_reports_provider_status(tmp_path: Path) -> None "model_alias": "beta", "model": "model-beta", "generation_type": "chat-completion", - "configured_provider": "provider-b", - "effective_provider": "provider-b", + "provider": "provider-b", "usable": False, "reason": "Provider 'provider-b' is missing an API key.", }, @@ -91,8 +87,7 @@ def test_get_model_aliases_state_reports_provider_status(tmp_path: Path) -> None "model_alias": "gamma", "model": "model-gamma", "generation_type": "chat-completion", - "configured_provider": "provider-missing", - "effective_provider": "provider-missing", + "provider": "provider-missing", "usable": False, "reason": "Provider 'provider-missing' is not configured.", }, @@ -105,7 +100,6 @@ def test_get_model_aliases_state_handles_missing_local_files(tmp_path: Path) -> assert payload == { "model_config_present": False, "provider_config_present": False, - "default_provider": None, "items": [], } diff --git a/packages/data-designer/tests/cli/utils/test_agent_text_formatter.py b/packages/data-designer/tests/cli/utils/test_agent_text_formatter.py index 8e686e792..ab08e947b 100644 --- a/packages/data-designer/tests/cli/utils/test_agent_text_formatter.py +++ b/packages/data-designer/tests/cli/utils/test_agent_text_formatter.py @@ -26,7 +26,7 @@ def test_format_context_text_includes_config_module_path() -> None: "columns": [{"type": "a", "description": "A thing."}], }, "state": { - "model_aliases": {"default_provider": None, "items": []}, + "model_aliases": {"items": []}, "persona_datasets": {"items": []}, }, "operations": [{"command_pattern": "agent context", "description": "Bootstrap payload."}], @@ -52,7 +52,6 @@ def test_format_context_text_no_usable_aliases_shows_warning() -> None: "types": {}, "state": { "model_aliases": { - "default_provider": "nvidia", "items": [{"model_alias": "bad", "usable": False, "reason": "missing key"}], }, "persona_datasets": {"items": []}, @@ -120,13 +119,12 @@ def test_format_types_text_empty_items() -> None: def test_format_model_aliases_text_with_items() -> None: state: dict[str, Any] = { - "default_provider": "nvidia", "items": [ { "model_alias": "test", "model": "meta/llama-3", "generation_type": "chat", - "effective_provider": "nvidia", + "provider": "nvidia", "usable": True, "reason": None, }, @@ -134,16 +132,15 @@ def test_format_model_aliases_text_with_items() -> None: } result = format_model_aliases_text(state) - assert "default_provider: nvidia" in result assert "test" in result assert "meta/llama-3" in result + assert "nvidia" in result def test_format_model_aliases_text_empty() -> None: - state: dict[str, Any] = {"default_provider": None, "items": []} + state: dict[str, Any] = {"items": []} result = format_model_aliases_text(state) - assert "default_provider: (none)" in result assert "(no items)" in result diff --git a/packages/data-designer/tests/interface/test_data_designer.py b/packages/data-designer/tests/interface/test_data_designer.py index 0c5f6d1fe..d46813a04 100644 --- a/packages/data-designer/tests/interface/test_data_designer.py +++ b/packages/data-designer/tests/interface/test_data_designer.py @@ -501,78 +501,13 @@ def test_init_with_path_object(stub_artifact_path, stub_model_providers): assert designer is not None -def test_init_user_supplied_providers_ignore_unrelated_yaml_default( +def test_init_no_user_providers_loads_from_yaml( stub_artifact_path: Path, - stub_model_providers: list[ModelProvider], stub_managed_assets_path: Path, ) -> None: - """Regression for #588: a YAML ``default:`` that names a provider absent - from a user-supplied ``model_providers`` list must not leak into - construction. - - Pre-fix this raised ``ValidationError: Specified default 'unrelated' not - found in providers list``. - """ - with patch.object(dd_mod, "get_default_provider_name", return_value="unrelated"): - data_designer = DataDesigner( - artifact_path=stub_artifact_path, - model_providers=stub_model_providers, - secret_resolver=PlaintextResolver(), - managed_assets_path=stub_managed_assets_path, - ) - - assert data_designer.model_provider_registry.get_default_provider_name() == "stub-model-provider" - - -def test_init_user_supplied_providers_preserve_first_wins_over_yaml_default( - stub_artifact_path: Path, - stub_managed_assets_path: Path, -) -> None: - """Regression for #588: when the YAML ``default:`` matches a user-supplied - provider that isn't first in the list, the documented ``model_providers[0]`` - "first wins" behavior must not be silently overridden. - """ - user_providers = [ - ModelProvider( - name="first-provider", - endpoint="https://first.example.com/v1", - api_key="FIRST_API_KEY", - ), - ModelProvider( - name="second-provider", - endpoint="https://second.example.com/v1", - api_key="SECOND_API_KEY", - ), - ] - - # Multi-provider construction (user-supplied list of length > 1) still - # passes ``default=`` to ``ModelProviderRegistry`` — that's the deprecated - # path under #589 — so the registry-level deprecation fires here. - with ( - patch.object(dd_mod, "get_default_provider_name", return_value="second-provider"), - pytest.warns(DeprecationWarning, match="ModelProviderRegistry.default is deprecated"), - ): - data_designer = DataDesigner( - artifact_path=stub_artifact_path, - model_providers=user_providers, - secret_resolver=PlaintextResolver(), - managed_assets_path=stub_managed_assets_path, - ) - - assert data_designer.model_provider_registry.get_default_provider_name() == "first-provider" - - -def test_init_no_user_providers_uses_yaml_default( - stub_artifact_path: Path, - stub_managed_assets_path: Path, -) -> None: - """Pin the unchanged YAML-fallback path: when the caller omits - ``model_providers``, DataDesigner consults both ``providers:`` and - ``default:`` from the YAML. - - The fix in #588 only changes the user-supplied branch; this test locks the - YAML-fallback branch's contract so a future refactor can't silently regress - it. + """When the caller omits ``model_providers``, DataDesigner falls back to + the YAML's ``providers:`` list. The legacy ``default:`` key is no longer + consulted (see issue #590). """ yaml_providers = [ ModelProvider( @@ -587,126 +522,14 @@ def test_init_no_user_providers_uses_yaml_default( ), ] - with ( - patch.object(dd_mod, "get_default_providers", return_value=yaml_providers), - patch.object(dd_mod, "get_default_provider_name", return_value="yaml-second"), - ): + with patch.object(dd_mod, "get_default_providers", return_value=yaml_providers): data_designer = DataDesigner( artifact_path=stub_artifact_path, secret_resolver=PlaintextResolver(), managed_assets_path=stub_managed_assets_path, ) - assert data_designer.model_provider_registry.get_default_provider_name() == "yaml-second" - - -def test_init_yaml_default_emits_single_deprecation_warning( - stub_artifact_path: Path, - stub_managed_assets_path: Path, -) -> None: - """Regression for PR #594 review: when ``DataDesigner()`` falls back to the - YAML's ``providers:`` and ``default:``, the user should see a single - ``DeprecationWarning`` (the YAML one) rather than a duplicate cascade where - ``ModelProviderRegistry._warn_on_explicit_default`` also fires for the same - root cause. See issue #589. - """ - yaml_providers = [ - ModelProvider( - name="yaml-first", - endpoint="https://yaml-first.example.com/v1", - api_key="yaml-first-key", - ), - ModelProvider( - name="yaml-second", - endpoint="https://yaml-second.example.com/v1", - api_key="yaml-second-key", - ), - ] - - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always", DeprecationWarning) - with ( - patch.object(dd_mod, "get_default_providers", return_value=yaml_providers), - patch.object(dd_mod, "get_default_provider_name") as mock_get_default, - ): - mock_get_default.side_effect = lambda: ( - warnings.warn( - "The 'default:' key in /fake/path is deprecated and will " - "be removed in a future release. Remove it and specify provider= " - "explicitly on each ModelConfig instead. See issue #589.", - DeprecationWarning, - stacklevel=2, - ) - or "yaml-second" - ) - DataDesigner( - artifact_path=stub_artifact_path, - secret_resolver=PlaintextResolver(), - managed_assets_path=stub_managed_assets_path, - ) - - deprecation_messages = [str(w.message) for w in caught if issubclass(w.category, DeprecationWarning)] - yaml_default_warnings = [m for m in deprecation_messages if "'default:' key" in m] - registry_default_warnings = [m for m in deprecation_messages if "ModelProviderRegistry.default is deprecated" in m] - assert len(yaml_default_warnings) == 1, deprecation_messages - assert registry_default_warnings == [], ( - "Registry-level deprecation should be suppressed in the YAML-fallback path " - "to avoid two warnings for the same root cause." - ) - - -def test_init_no_user_providers_no_yaml_default_stays_quiet( - stub_artifact_path: Path, - stub_managed_assets_path: Path, -) -> None: - """Pin the bare-``DataDesigner()`` happy path: when the caller passes - nothing and the YAML carries multiple ``providers:`` but no ``default:`` - key, ``resolve_model_provider_registry`` synthesises - ``default=providers[0].name`` to satisfy ``check_implicit_default``. The - user did not opt into the deprecated registry-level default — the library - filled it in on their behalf — so ``_warn_on_explicit_default`` must stay - quiet. The fresh-install YAML ships exactly this shape (3 providers, no - ``default:``), so a regression here is what every user sees on their first - ``DataDesigner()`` call. - - Counterpart to ``test_init_user_supplied_providers_preserve_first_wins_over_yaml_default``, - which pins that the warning *does* fire when the caller hand-builds a - multi-provider list themselves (they wrote the multi-provider intent, so - the deprecation nudge applies). - """ - yaml_providers = [ - ModelProvider( - name="yaml-first", - endpoint="https://yaml-first.example.com/v1", - api_key="yaml-first-key", - ), - ModelProvider( - name="yaml-second", - endpoint="https://yaml-second.example.com/v1", - api_key="yaml-second-key", - ), - ] - - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("always", DeprecationWarning) - with ( - patch.object(dd_mod, "get_default_providers", return_value=yaml_providers), - patch.object(dd_mod, "get_default_provider_name", return_value=None), - ): - data_designer = DataDesigner( - artifact_path=stub_artifact_path, - secret_resolver=PlaintextResolver(), - managed_assets_path=stub_managed_assets_path, - ) - - deprecation_messages = [str(w.message) for w in caught if issubclass(w.category, DeprecationWarning)] - registry_default_warnings = [m for m in deprecation_messages if "ModelProviderRegistry.default is deprecated" in m] - assert registry_default_warnings == [], ( - "Library-synthesised default must not emit the registry-level deprecation; " - f"the user did not opt into it. Saw: {deprecation_messages}" - ) - # Behavioral pin: first-wins still resolves correctly. - assert data_designer.model_provider_registry.get_default_provider_name() == "yaml-first" + assert [p.name for p in data_designer.model_provider_registry.providers] == ["yaml-first", "yaml-second"] def test_run_config_setting_persists(stub_artifact_path, stub_model_providers): diff --git a/scripts/benchmarks/benchmark_engine_v2.py b/scripts/benchmarks/benchmark_engine_v2.py index 2b4d19523..74c201993 100644 --- a/scripts/benchmarks/benchmark_engine_v2.py +++ b/scripts/benchmarks/benchmark_engine_v2.py @@ -619,7 +619,7 @@ def _run_single_benchmark(settings: BenchmarkSettings, engine_mode: str) -> Benc endpoint="https://mock.local/mcp", api_key="mock-mcp-key", ) - model_provider_registry = resolve_model_provider_registry([provider], default_provider_name=provider.name) + model_provider_registry = resolve_model_provider_registry([provider]) secret_resolver = CompositeResolver([EnvironmentResolver(), PlaintextResolver()]) with tempfile.TemporaryDirectory() as temp_dir: