diff --git a/docs/scenarios/format.md b/docs/scenarios/format.md index c46a764..d7ed8fb 100644 --- a/docs/scenarios/format.md +++ b/docs/scenarios/format.md @@ -10,11 +10,12 @@ This document provides a complete reference for the MCProbe test scenario YAML f ## Schema Overview -A test scenario consists of six top-level sections: +A test scenario consists of seven top-level sections: ```yaml name: string # Required: Scenario identifier description: string # Required: What this scenario tests +skip: bool | string # Optional: Skip scenario (true or reason string) synthetic_user: {...} # Required: User simulation config evaluation: {...} # Required: Success/failure criteria tags: [...] # Optional: Classification tags @@ -45,6 +46,17 @@ config: {...} # Optional: Per-scenario LLM overrides - **Description:** Detailed explanation of what behavior this scenario tests - **Example:** `"Tests the agent's ability to handle ambiguous weather queries by asking for clarification about the city"` +#### `skip` +- **Type:** `bool | string` +- **Required:** No +- **Default:** `false` +- **Description:** Skip this scenario during test execution. Set to `true` to skip without a reason, or provide a string explanation for why the scenario is skipped. +- **Examples:** + - `skip: true` - Skip without reason + - `skip: "Waiting on analytics API implementation"` + - `skip: "Feature not ready for testing"` +- **Pytest Integration:** When set, the scenario appears as SKIPPED in pytest output with the reason displayed + #### `synthetic_user` - **Type:** `SyntheticUserConfig` object - **Required:** Yes @@ -378,6 +390,9 @@ description: | ask appropriate clarifying questions, and provide accurate results with proper error handling. +# Optional: Skip this scenario +# skip: true # or skip: "Reason for skipping" + # Optional: Per-scenario LLM configuration overrides config: judge: @@ -486,9 +501,40 @@ MCProbe validates scenarios according to these rules: 4. **Non-Empty Lists:** `correctness_criteria` must contain at least one item 5. **Enum Values:** Patience, verbosity, and expertise must use valid enum values 6. **Tool Names:** Tool names in `tool_call_criteria` must be non-empty strings +7. **Skip Field:** If provided, `skip` must be a boolean or non-empty string ## Common Patterns +### Skipping Scenarios + +Skip scenarios that are not ready or temporarily disabled: + +```yaml +name: Future Analytics Feature +description: Tests analytics dashboard integration +skip: "Waiting on analytics API implementation" + +synthetic_user: + persona: A data analyst + initial_query: "Show me the analytics dashboard" + +evaluation: + correctness_criteria: + - "Dashboard displays correct metrics" +``` + +Or skip without a reason: + +```yaml +name: Experimental Feature +description: Tests experimental functionality +skip: true + +synthetic_user: + persona: A power user + initial_query: "Enable experimental mode" +``` + ### Testing Clarification ```yaml synthetic_user: diff --git a/src/mcprobe/models/scenario.py b/src/mcprobe/models/scenario.py index 4b2ca91..2c38b5a 100644 --- a/src/mcprobe/models/scenario.py +++ b/src/mcprobe/models/scenario.py @@ -114,6 +114,7 @@ class TestScenario(BaseModel): name: str = Field(..., min_length=1) description: str = Field(..., min_length=1) + skip: bool | str | None = None # True or reason string to skip this scenario synthetic_user: SyntheticUserConfig evaluation: EvaluationConfig tags: list[str] = Field(default_factory=list) diff --git a/src/mcprobe/pytest_plugin/plugin.py b/src/mcprobe/pytest_plugin/plugin.py index fb6b7d1..0387023 100644 --- a/src/mcprobe/pytest_plugin/plugin.py +++ b/src/mcprobe/pytest_plugin/plugin.py @@ -113,6 +113,11 @@ def __init__( def runtest(self) -> None: """Execute the test scenario.""" + # Skip if scenario is marked as skipped + if self.scenario.skip: + reason = self.scenario.skip if isinstance(self.scenario.skip, str) else "" + pytest.skip(reason) + # Get configuration from pytest options pytest_config = self.config diff --git a/tests/unit/test_scenario_skip.py b/tests/unit/test_scenario_skip.py new file mode 100644 index 0000000..05ca04e --- /dev/null +++ b/tests/unit/test_scenario_skip.py @@ -0,0 +1,63 @@ +"""Tests for scenario skip functionality.""" + +from mcprobe.models.scenario import ( + ClarificationBehavior, + EvaluationConfig, + SyntheticUserConfig, + TestScenario, +) + + +def _make_scenario(**kwargs: object) -> TestScenario: + """Create a minimal TestScenario with overrides.""" + defaults: dict[str, object] = { + "name": "Test Scenario", + "description": "A test", + "synthetic_user": SyntheticUserConfig( + persona="User", + initial_query="Hello", + clarification_behavior=ClarificationBehavior(), + max_turns=5, + ), + "evaluation": EvaluationConfig( + correctness_criteria=["Responds"], + ), + } + defaults.update(kwargs) + return TestScenario(**defaults) # type: ignore[arg-type] + + +class TestScenarioSkip: + """Tests for the skip field on TestScenario.""" + + def test_skip_defaults_to_none(self) -> None: + """Skip field is None when not specified.""" + scenario = _make_scenario() + assert scenario.skip is None + + def test_skip_true(self) -> None: + """Skip field accepts True.""" + scenario = _make_scenario(skip=True) + assert scenario.skip is True + + def test_skip_false(self) -> None: + """Skip field accepts False (treated as not skipped).""" + scenario = _make_scenario(skip=False) + assert scenario.skip is False + # False is falsy, so skip check won't trigger + assert not scenario.skip + + def test_skip_with_reason_string(self) -> None: + """Skip field accepts a reason string.""" + scenario = _make_scenario(skip="Feature not ready yet") + assert scenario.skip == "Feature not ready yet" + + def test_skip_truthy_for_bool_and_string(self) -> None: + """Both True and non-empty string are truthy for skip checks.""" + assert _make_scenario(skip=True).skip + assert _make_scenario(skip="reason").skip + + def test_skip_falsy_for_none_and_false(self) -> None: + """None and False are falsy for skip checks.""" + assert not _make_scenario(skip=None).skip + assert not _make_scenario(skip=False).skip