From 694119bd8b8c753e500710369b60efa01bf1d290 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 4 Jun 2026 09:57:21 -0500 Subject: [PATCH 01/36] fix: update execute() to execute_prompt() across codebase --- notebooks/example.ipynb | 20 ++++++-------------- tests/integration/services/test_json_task.py | 12 ++++++------ 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb index a9e6259..2d82478 100644 --- a/notebooks/example.ipynb +++ b/notebooks/example.ipynb @@ -145,7 +145,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "9ebc2c42", "metadata": {}, "outputs": [ @@ -172,7 +172,7 @@ } ], "source": [ - "result = service.execute(\n", + "result = service.execute_prompt(\n", " prompt_name=\"mondo_id_classification\",\n", " prompt_version=\"v1\",\n", " payload={\"text\": \"melanoma\"},\n", @@ -191,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "39284fa7", "metadata": {}, "outputs": [ @@ -215,7 +215,7 @@ } ], "source": [ - "result = service.execute(\n", + "result = service.execute_prompt(\n", " prompt_name=\"mondo_id_classification\",\n", " prompt_version=\"v1\",\n", " payload={\"text\": \"melanoma\"},\n", @@ -223,19 +223,11 @@ ")\n", "result" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7cb6de0e", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "wags-llm (3.13.5)", + "display_name": "wags-llm", "language": "python", "name": "python3" }, @@ -249,7 +241,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.11.14" } }, "nbformat": 4, diff --git a/tests/integration/services/test_json_task.py b/tests/integration/services/test_json_task.py index 49f3cac..4d833bc 100644 --- a/tests/integration/services/test_json_task.py +++ b/tests/integration/services/test_json_task.py @@ -89,7 +89,7 @@ def test_run_success(): prompt_registry=registry, ) - result = service.execute( + result = service.execute_prompt( prompt_name="test_task", prompt_version="v1", payload={"text": "hello"}, @@ -112,13 +112,13 @@ def test_run_uses_cache(): cache=cache, ) - result1 = service.execute( + result1 = service.execute_prompt( prompt_name="test_task", prompt_version="v1", payload={"x": 1}, response_model=ResultModel, ) - result2 = service.execute( + result2 = service.execute_prompt( prompt_name="test_task", prompt_version="v1", payload={"x": 1}, @@ -143,13 +143,13 @@ def test_run_cache_miss_for_different_payload(): cache=cache, ) - service.execute( + service.execute_prompt( prompt_name="test_task", prompt_version="v1", payload={"x": 1}, response_model=ResultModel, ) - service.execute( + service.execute_prompt( prompt_name="test_task", prompt_version="v1", payload={"x": 2}, @@ -170,7 +170,7 @@ def test_run_validation_error(): ) with pytest.raises(RuntimeError, match="Task failed"): - service.execute( + service.execute_prompt( prompt_name="test_task", prompt_version="v1", payload={"text": "hello"}, From 158684ed5229039efa8bcc8c2f0315b31c22919f Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 4 Jun 2026 09:59:16 -0500 Subject: [PATCH 02/36] feat: add skill engineering support --- src/wags_llm/services/structured_task.py | 88 +++++++++++++++++++++--- src/wags_llm/skills/__init__.py | 13 ++++ src/wags_llm/skills/base.py | 66 ++++++++++++++++++ src/wags_llm/skills/registry.py | 59 ++++++++++++++++ 4 files changed, 215 insertions(+), 11 deletions(-) create mode 100644 src/wags_llm/skills/__init__.py create mode 100644 src/wags_llm/skills/base.py create mode 100644 src/wags_llm/skills/registry.py diff --git a/src/wags_llm/services/structured_task.py b/src/wags_llm/services/structured_task.py index 3a59d07..78afb82 100644 --- a/src/wags_llm/services/structured_task.py +++ b/src/wags_llm/services/structured_task.py @@ -24,7 +24,18 @@ from wags_llm.cache.base import BaseCache from wags_llm.client.base import LLMJsonClient from wags_llm.client.exceptions import LLMClientError -from wags_llm.prompts.registry import PromptRegistry, build_empty_registry +from wags_llm.prompts.registry import ( + PromptRegistry, +) +from wags_llm.prompts.registry import ( + build_empty_registry as build_empty_prompt_registry, +) +from wags_llm.skills.registry import ( + SkillRegistry, +) +from wags_llm.skills.registry import ( + build_empty_registry as build_empty_skill_registry, +) _logger = logging.getLogger(__name__) @@ -38,6 +49,7 @@ def __init__( self, client: LLMJsonClient, prompt_registry: PromptRegistry | None = None, + skill_registry: SkillRegistry | None = None, cache: BaseCache | None = None, ) -> None: """Initialize the structured task runner. @@ -47,10 +59,61 @@ def __init__( :param cache: Optional cache for storing and retrieving task results. """ self.client = client - self.prompt_registry = prompt_registry or build_empty_registry() + self.prompt_registry = prompt_registry or build_empty_prompt_registry() + self.skill_registry = skill_registry or build_empty_skill_registry() self.cache = cache - def execute( + def execute_skill( + self, + skill_name: str, + skill_version: str, + payload: Mapping[str, Any], + response_model: type[BaseModel], + ) -> BaseModel: + """Execute a skill and return validated output. + + :param skill_name: Registered skill name. + :param skill_version: Registered skill version. + :param payload: JSON-serializable task data. + :param response_model: Pydantic model for validation. + :return: Validated task result. + :raise RuntimeError: If execution or validation fails. + """ + skill = self.skill_registry.get(skill_name, skill_version) + + if self.cache is not None: + cache_key = self._cache_key( + name=skill_name, + version=skill_version, + payload=payload, + ) + cached = self.cache.get(cache_key) + if cached is not None: + return response_model.model_validate(cached) + else: + cache_key = None + + try: + # NOTE: no need to change this, unless we would like to change the attribute names, but skill.md file is a "system prompt". + invoke_json_response = self.client.invoke_json( + system_prompt=skill.load_skill(), + user_prompt=skill.build_user_prompt(payload=payload), + json_schema=response_model.model_json_schema(), + ) + + result = response_model.model_validate(invoke_json_response.parsed_json) + + if self.cache is not None and cache_key is not None: + self.cache.set(cache_key, result.model_dump()) + + except (LLMClientError, ValidationError) as exc: + msg = f"Task failed: {exc}" + _logger.exception(msg) + raise RuntimeError(msg) from exc + else: + return result + + def execute_prompt( self, prompt_name: str, prompt_version: str, @@ -70,8 +133,8 @@ def execute( if self.cache is not None: cache_key = self._cache_key( - prompt_name=prompt_name, - prompt_version=prompt_version, + name=prompt_name, + version=prompt_version, payload=payload, ) cached = self.cache.get(cache_key) @@ -99,24 +162,27 @@ def execute( else: return result + # NOTE: name and version were originally prompt_name and prompt_version. + # # They were renamed to be generic so that _cache_key() can be shared + # # between execute_prompt() and execute_skill(). def _cache_key( self, - prompt_name: str, - prompt_version: str, + name: str, + version: str, payload: Mapping[str, Any], ) -> str: """Build a cache key for a task run. - :param prompt_name: Registered prompt name. - :param prompt_version: Registered prompt version. + :param name: Registered name. + :param version: Registered version. :param payload: JSON-serializable task data. :return: Stable cache key. """ cache_payload = { "payload": dict(payload), "model": self.client.model_id, - "prompt_name": prompt_name, - "prompt_version": prompt_version, + "name": name, + "version": version, } normalized = json.dumps(cache_payload, sort_keys=True, separators=(",", ":")) cache_key = hashlib.sha256(normalized.encode("utf-8")).hexdigest() diff --git a/src/wags_llm/skills/__init__.py b/src/wags_llm/skills/__init__.py new file mode 100644 index 0000000..cbe0258 --- /dev/null +++ b/src/wags_llm/skills/__init__.py @@ -0,0 +1,13 @@ +"""Skill interfaces and registry. + +Define and manage versioned skill templates. +""" + +from wags_llm.skills.base import BaseSkillTemplate +from wags_llm.skills.registry import SkillRegistry, build_empty_registry + +__all__ = [ + "BaseSkillTemplate", + "SkillRegistry", + "build_empty_registry", +] diff --git a/src/wags_llm/skills/base.py b/src/wags_llm/skills/base.py new file mode 100644 index 0000000..fda015e --- /dev/null +++ b/src/wags_llm/skills/base.py @@ -0,0 +1,66 @@ +"""Skill interface. + +Users extend this to define new skill inputs. +""" + +import logging +from abc import ABC, abstractmethod +from collections.abc import Mapping +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + + +# TODO: ask maintainers if version is needed. +class BaseSkillTemplate(ABC): + """Base skill template. + + :var skill_path: Path to the skill `.md` file. + :var version: Skill version. + """ + + skill_path: str + version: str + + @property + def name(self) -> str: + """Derive skill name from the file stem. + + :return: Skill name string. + """ + return Path(self.skill_path).stem + + # TODO: discuss with maintainers - should BaseSkillTemplate have a + # build_system_prompt() that calls load_skill()? This would make skills + # and prompts share a common interface. + def load_skill(self) -> str: + """Load skill instructions from file. + + :return: Skill instruction string. + :raise FileNotFoundError: If skill_path does not exist. + """ + file_path = Path(self.skill_path) + logger.debug("Loading skill from path: %s", file_path) + + try: + content = file_path.read_text(encoding="utf-8") + except FileNotFoundError as exc: + msg = f"Skill path not found: {file_path}" + raise FileNotFoundError(msg) from exc + + logger.info("Loaded skill from path: %s", file_path) + return content + + @abstractmethod + def build_user_prompt(self, payload: Mapping[str, Any]) -> str: + """Build the user prompt. + + # TODO: discuss with maintainers. Should input formatting instructions + # live in build_user_prompt() or directly in the skill .md file? + # If they live in the .md file, we would need to revisit the user_prompt + # in execute_skill() and potentially remove this method entirely. + + :param payload: JSON-serializable task data. + :return: User prompt string. + """ diff --git a/src/wags_llm/skills/registry.py b/src/wags_llm/skills/registry.py new file mode 100644 index 0000000..89cabcb --- /dev/null +++ b/src/wags_llm/skills/registry.py @@ -0,0 +1,59 @@ +"""skill registry. + +Maps (name, version) -> skill instance. + +Users typically: +* create skills in their project +* register them here or pass a custom registry +""" + +import logging + +from wags_llm.skills.base import BaseSkillTemplate + +_logger = logging.getLogger(__name__) + + +# TODO: discuss with maintainers - registry key is currently (name, version) +# where name is derived from the file stem. Recommend keeping name as the +# key rather than skill_path since skill_path is an internal implementation +# detail and should not be exposed as the public identifier. +class SkillRegistry: + """Store and retrieve skill.""" + + def __init__(self) -> None: + """Initialize an empty skill registry.""" + self._skills: dict[tuple[str, str], BaseSkillTemplate] = {} + + def register(self, skill: BaseSkillTemplate) -> None: + """Register a skill. + + :param skill: skill instance to register. + """ + _logger.debug( + "Registering skill: name='%s', version='%s'", skill.name, skill.version + ) + self._skills[(skill.name, skill.version)] = skill + + def get(self, name: str, version: str) -> BaseSkillTemplate: + """Retrieve a skill by name and version. + + :param name: Skill name. + :param version: Skill version. + :return: Registered skill. + :raise KeyError: If skill is not found. + """ + try: + return self._skills[(name, version)] + except KeyError as exc: + msg = f"Skill not found: ({name}, {version})" + _logger.exception(msg) + raise KeyError(msg) from exc + + +def build_empty_registry() -> SkillRegistry: + """Create an empty skill registry. + + :return: New SkillRegistry instance. + """ + return SkillRegistry() From f0331d263b0bd000a344c55df810ad7aafdabb50 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 4 Jun 2026 10:00:30 -0500 Subject: [PATCH 03/36] test: add unit tests for skills --- tests/unit/skills/entity_detection.md | 1 + tests/unit/skills/test_skill_json_task.py | 210 ++++++++++++++++++++++ tests/unit/skills/test_skill_registry.py | 42 +++++ 3 files changed, 253 insertions(+) create mode 100644 tests/unit/skills/entity_detection.md create mode 100644 tests/unit/skills/test_skill_json_task.py create mode 100644 tests/unit/skills/test_skill_registry.py diff --git a/tests/unit/skills/entity_detection.md b/tests/unit/skills/entity_detection.md new file mode 100644 index 0000000..9d30ed8 --- /dev/null +++ b/tests/unit/skills/entity_detection.md @@ -0,0 +1 @@ +Extract named medical entities from the provided abstract. diff --git a/tests/unit/skills/test_skill_json_task.py b/tests/unit/skills/test_skill_json_task.py new file mode 100644 index 0000000..98da1f1 --- /dev/null +++ b/tests/unit/skills/test_skill_json_task.py @@ -0,0 +1,210 @@ +"""Test that StructuredTaskRunner works correctly for skills""" + +from typing import Any + +import pytest +from pydantic import BaseModel + +from wags_llm.cache.in_memory import InMemoryCache +from wags_llm.client.base import InvokeJsonResponse, LLMJsonClient +from wags_llm.services.structured_task import StructuredTaskRunner +from wags_llm.skills.base import BaseSkillTemplate +from wags_llm.skills.registry import SkillRegistry + +# NOTE: DummyClient, BadClient, and ResultModel are copied from +# tests/unit/services/test_structured_task.py for easier code review. +# TODO: discuss with maintainer if we need to move to test_json_task.py + + +class DummySkill(BaseSkillTemplate): + """Simple skill for service tests.""" + + skill_path = "tests/unit/skills/entity_detection.md" + version = "v1" + + def build_user_prompt(self, payload) -> str: + """Build the user prompt.""" + return f"Payload: {payload}" + + +# NOTE: new added to test missing skill file +class MissingFileSkill(BaseSkillTemplate): + """Missing skill file for service tests.""" + + skill_path = "tests/unit/skills/does_not_exist.md" + version = "v1" + + def build_user_prompt(self, payload) -> str: + """Build the user prompt.""" + return f"Payload: {payload}" + + +class DummyClient(LLMJsonClient): + """Fake client that returns a valid response.""" + + model_id = "dummy-model" + + def __init__(self): + """Initialize the fake client.""" + self.calls = 0 + + def invoke_json( + self, + system_prompt: str, # noqa: ARG002 + user_prompt: str, # noqa: ARG002 + json_schema: dict[str, Any] | None = None, # noqa: ARG002 + ) -> InvokeJsonResponse: + """Return a fixed JSON response.""" + self.calls += 1 + return InvokeJsonResponse( + parsed_json={"value": 1}, + raw_text='{"value": 1}', + ) + + +class BadClient: + """Fake client that returns an invalid response shape.""" + + model_id = "dummy-model" + + def __init__(self): + """Initialize the fake client.""" + self.calls = 0 + + def invoke_json( + self, + system_prompt: str, # noqa: ARG002 + user_prompt: str, # noqa: ARG002 + json_schema: dict[str, Any] | None = None, # noqa: ARG002 + ) -> InvokeJsonResponse: + """Return an invalid JSON shape.""" + self.calls += 1 + return InvokeJsonResponse( + parsed_json={"wrong": "shape"}, + raw_text='{"wrong": "shape"}', + ) + + +class ResultModel(BaseModel): + """Response model for service tests.""" + + value: int + + +def test_execute_skill_success(): + """Test that execute_skill works correctly.""" + registry = SkillRegistry() + registry.register(DummySkill()) + + service = StructuredTaskRunner( + client=DummyClient(), + skill_registry=registry, + ) + + result = service.execute_skill( + skill_name="entity_detection", + skill_version="v1", + payload={"text": "hello"}, + response_model=ResultModel, + ) + + assert result.value == 1 + + +def test_execute_skill_file_not_found(): + """Test that execute_skill raises FileNotFoundError when skill file does not exist.""" + + registry = SkillRegistry() + registry.register(MissingFileSkill()) + + service = StructuredTaskRunner( + client=DummyClient(), + skill_registry=registry, + ) + + with pytest.raises(FileNotFoundError): + service.execute_skill( + skill_name="does_not_exist", + skill_version="v1", + payload={"text": "hello"}, + response_model=ResultModel, + ) + + +def test_execute_skill_uses_cache(): + """Test that execute_skill works correctly with cache.""" + registry = SkillRegistry() + registry.register(DummySkill()) + client = DummyClient() + cache = InMemoryCache() + + service = StructuredTaskRunner( + client=client, + skill_registry=registry, + cache=cache, + ) + + result1 = service.execute_skill( + skill_name="entity_detection", + skill_version="v1", + payload={"x": 1}, + response_model=ResultModel, + ) + result2 = service.execute_skill( + skill_name="entity_detection", + skill_version="v1", + payload={"x": 1}, + response_model=ResultModel, + ) + + assert result1.value == 1 + assert result2.value == 1 + assert client.calls == 1 + + +def test_execute_skill_cache_miss_for_different_payload(): + """Test that execute_skill cache misses on different payload.""" + registry = SkillRegistry() + registry.register(DummySkill()) + client = DummyClient() + cache = InMemoryCache() + + service = StructuredTaskRunner( + client=client, + skill_registry=registry, + cache=cache, + ) + + service.execute_skill( + skill_name="entity_detection", + skill_version="v1", + payload={"x": 1}, + response_model=ResultModel, + ) + service.execute_skill( + skill_name="entity_detection", + skill_version="v1", + payload={"x": 2}, + response_model=ResultModel, + ) + + assert client.calls == 2 + + +def test_execute_skill_validation_error(): + """Test that execute_skill raises RuntimeError when response validation fails.""" + registry = SkillRegistry() + registry.register(DummySkill()) + + service = StructuredTaskRunner( + client=BadClient(), + skill_registry=registry, + ) + + with pytest.raises(RuntimeError, match="Task failed"): + service.execute_skill( + skill_name="entity_detection", + skill_version="v1", + payload={"text": "hello"}, + response_model=ResultModel, + ) diff --git a/tests/unit/skills/test_skill_registry.py b/tests/unit/skills/test_skill_registry.py new file mode 100644 index 0000000..e7a04bf --- /dev/null +++ b/tests/unit/skills/test_skill_registry.py @@ -0,0 +1,42 @@ +import re +from collections.abc import Mapping +from typing import Any + +import pytest + +from wags_llm.skills.base import BaseSkillTemplate +from wags_llm.skills.registry import SkillRegistry, build_empty_registry + + +class DummySkill(BaseSkillTemplate): + skill_path = "skills/entity_detection.md" + version = "v1" + + def build_user_prompt(self, payload: Mapping[str, Any]) -> str: + """Build the user prompt. + + :param payload: JSON-serializable task data. + + Example: + payload = {"text": "hello"} + + :return: User prompt string. + """ + return f"Payload: {payload}" + + +def test_register_and_get_skill(): + registry = SkillRegistry() + skill = DummySkill() + + registry.register(skill) + + assert registry.get("entity_detection", "v1") is skill + + +def test_build_empty_registry(): + registry = build_empty_registry() + with pytest.raises( + KeyError, match=re.escape("'Skill not found: (entity_detection, v1)'") + ): + assert registry.get("entity_detection", "v1") From 42501e154c6c7a0516e14a1f51123860c6b36ce9 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 4 Jun 2026 10:44:34 -0500 Subject: [PATCH 04/36] chore: update notes and comments for maintainer review --- src/wags_llm/services/structured_task.py | 9 ++++++--- src/wags_llm/skills/base.py | 5 +++-- src/wags_llm/skills/registry.py | 2 +- tests/unit/skills/test_skill_json_task.py | 9 +++++---- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/wags_llm/services/structured_task.py b/src/wags_llm/services/structured_task.py index 78afb82..92d5738 100644 --- a/src/wags_llm/services/structured_task.py +++ b/src/wags_llm/services/structured_task.py @@ -94,7 +94,11 @@ def execute_skill( cache_key = None try: - # NOTE: no need to change this, unless we would like to change the attribute names, but skill.md file is a "system prompt". + # NOTE: system_prompt and user_prompt are parameter names from invoke_json() + # and do not need to change. The skill .md file serves the same purpose as + # a system prompt since it contains instructions for the LLM to follow. + # Changing these parameter names in invoke_json() would break both + # execute_prompt() and execute_skill(). invoke_json_response = self.client.invoke_json( system_prompt=skill.load_skill(), user_prompt=skill.build_user_prompt(payload=payload), @@ -163,8 +167,7 @@ def execute_prompt( return result # NOTE: name and version were originally prompt_name and prompt_version. - # # They were renamed to be generic so that _cache_key() can be shared - # # between execute_prompt() and execute_skill(). + # They were renamed to be generic so that _cache_key() can be shared between execute_prompt() and execute_skill(). def _cache_key( self, name: str, diff --git a/src/wags_llm/skills/base.py b/src/wags_llm/skills/base.py index fda015e..ad7fde7 100644 --- a/src/wags_llm/skills/base.py +++ b/src/wags_llm/skills/base.py @@ -31,9 +31,10 @@ def name(self) -> str: """ return Path(self.skill_path).stem - # TODO: discuss with maintainers - should BaseSkillTemplate have a + # NOTE: discuss with maintainers - should BaseSkillTemplate have a # build_system_prompt() that calls load_skill()? This would make skills - # and prompts share a common interface. + # and prompts share a common interface. build_system_prompt() would simply + # be a wrapper that calls load_skill() under the hood. def load_skill(self) -> str: """Load skill instructions from file. diff --git a/src/wags_llm/skills/registry.py b/src/wags_llm/skills/registry.py index 89cabcb..3eb49c9 100644 --- a/src/wags_llm/skills/registry.py +++ b/src/wags_llm/skills/registry.py @@ -14,7 +14,7 @@ _logger = logging.getLogger(__name__) -# TODO: discuss with maintainers - registry key is currently (name, version) +# NOTE: discuss with maintainers - registry key is currently (name, version) # where name is derived from the file stem. Recommend keeping name as the # key rather than skill_path since skill_path is an internal implementation # detail and should not be exposed as the public identifier. diff --git a/tests/unit/skills/test_skill_json_task.py b/tests/unit/skills/test_skill_json_task.py index 98da1f1..e0b8935 100644 --- a/tests/unit/skills/test_skill_json_task.py +++ b/tests/unit/skills/test_skill_json_task.py @@ -11,11 +11,12 @@ from wags_llm.skills.base import BaseSkillTemplate from wags_llm.skills.registry import SkillRegistry -# NOTE: DummyClient, BadClient, and ResultModel are copied from -# tests/unit/services/test_structured_task.py for easier code review. -# TODO: discuss with maintainer if we need to move to test_json_task.py - +# NOTE: DummyClient, BadClient, and ResultModel are copied from +# tests/integration/services/test_json_task.py for easier code review. +# This file follows the same pattern as test_json_task.py but lives under +# unit/skills/ to keep all skill related tests together. +# Discuss with maintainer if this should move to integration/services/. class DummySkill(BaseSkillTemplate): """Simple skill for service tests.""" From 076b70e97a948c5a88f33bf6dca27ef202941744 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 4 Jun 2026 10:53:32 -0500 Subject: [PATCH 05/36] fix: update skill_path in test_skill_registry.py to correct path --- tests/unit/skills/test_skill_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/skills/test_skill_registry.py b/tests/unit/skills/test_skill_registry.py index e7a04bf..0afdc92 100644 --- a/tests/unit/skills/test_skill_registry.py +++ b/tests/unit/skills/test_skill_registry.py @@ -9,7 +9,7 @@ class DummySkill(BaseSkillTemplate): - skill_path = "skills/entity_detection.md" + skill_path = "tests/unit/skills/entity_detection.md" version = "v1" def build_user_prompt(self, payload: Mapping[str, Any]) -> str: From 6dc4393db6e530c82fcb73f8aa23d286ac4b01af Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 4 Jun 2026 16:42:55 -0500 Subject: [PATCH 06/36] fix: clean up build_user_prompt docstring in BaseSkillTemplate --- src/wags_llm/skills/base.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/wags_llm/skills/base.py b/src/wags_llm/skills/base.py index ad7fde7..4e9e6af 100644 --- a/src/wags_llm/skills/base.py +++ b/src/wags_llm/skills/base.py @@ -57,11 +57,6 @@ def load_skill(self) -> str: def build_user_prompt(self, payload: Mapping[str, Any]) -> str: """Build the user prompt. - # TODO: discuss with maintainers. Should input formatting instructions - # live in build_user_prompt() or directly in the skill .md file? - # If they live in the .md file, we would need to revisit the user_prompt - # in execute_skill() and potentially remove this method entirely. - :param payload: JSON-serializable task data. :return: User prompt string. """ From 584e18cc9f0e815dc5b6220014f160de1cfbacbc Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Wed, 10 Jun 2026 10:50:50 -0500 Subject: [PATCH 07/36] refactor: add _check_cache helper and CacheCheckResult to StructuredTaskRunner --- src/wags_llm/services/structured_task.py | 99 ++++++++++++++++-------- 1 file changed, 65 insertions(+), 34 deletions(-) diff --git a/src/wags_llm/services/structured_task.py b/src/wags_llm/services/structured_task.py index 92d5738..a4c45a8 100644 --- a/src/wags_llm/services/structured_task.py +++ b/src/wags_llm/services/structured_task.py @@ -42,6 +42,17 @@ MAX_LOG_CHARS = int(getenv("MAX_LOG_CHARS", "500")) +class CacheCheckResult(BaseModel): + """Result of a cache lookup. + + :var cache_key: The cache key for this request. None if caching is disabled. + :var cached: The validated cached result. None if no cached result was found. + """ + + cache_key: str | None + cached: Any | None + + class StructuredTaskRunner: """Run structured LLM tasks.""" @@ -56,6 +67,7 @@ def __init__( :param client: LLM client used to execute prompts. :param prompt_registry: Registry used to resolve prompts. + :param skill_registry: Registry used to resolve skills. :param cache: Optional cache for storing and retrieving task results. """ self.client = client @@ -81,34 +93,27 @@ def execute_skill( """ skill = self.skill_registry.get(skill_name, skill_version) - if self.cache is not None: - cache_key = self._cache_key( - name=skill_name, - version=skill_version, - payload=payload, - ) - cached = self.cache.get(cache_key) - if cached is not None: - return response_model.model_validate(cached) - else: - cache_key = None + cache_result = self._check_cache( + name=skill_name, + version=skill_version, + payload=payload, + response_model=response_model, + ) + + if cache_result.cached is not None: + return cache_result.cached try: - # NOTE: system_prompt and user_prompt are parameter names from invoke_json() - # and do not need to change. The skill .md file serves the same purpose as - # a system prompt since it contains instructions for the LLM to follow. - # Changing these parameter names in invoke_json() would break both - # execute_prompt() and execute_skill(). invoke_json_response = self.client.invoke_json( - system_prompt=skill.load_skill(), + system_prompt=skill.build_system_prompt(), user_prompt=skill.build_user_prompt(payload=payload), json_schema=response_model.model_json_schema(), ) result = response_model.model_validate(invoke_json_response.parsed_json) - if self.cache is not None and cache_key is not None: - self.cache.set(cache_key, result.model_dump()) + if self.cache is not None and cache_result.cache_key is not None: + self.cache.set(cache_result.cache_key, result.model_dump()) except (LLMClientError, ValidationError) as exc: msg = f"Task failed: {exc}" @@ -135,17 +140,14 @@ def execute_prompt( """ prompt = self.prompt_registry.get(prompt_name, prompt_version) - if self.cache is not None: - cache_key = self._cache_key( - name=prompt_name, - version=prompt_version, - payload=payload, - ) - cached = self.cache.get(cache_key) - if cached is not None: - return response_model.model_validate(cached) - else: - cache_key = None + cache_result = self._check_cache( + name=prompt_name, + version=prompt_version, + payload=payload, + response_model=response_model, + ) + if cache_result.cached is not None: + return cache_result.cached try: invoke_json_response = self.client.invoke_json( @@ -156,8 +158,8 @@ def execute_prompt( result = response_model.model_validate(invoke_json_response.parsed_json) - if self.cache is not None and cache_key is not None: - self.cache.set(cache_key, result.model_dump()) + if self.cache is not None and cache_result.cache_key is not None: + self.cache.set(cache_result.cache_key, result.model_dump()) except (LLMClientError, ValidationError) as exc: msg = f"Task failed: {exc}" @@ -166,8 +168,6 @@ def execute_prompt( else: return result - # NOTE: name and version were originally prompt_name and prompt_version. - # They were renamed to be generic so that _cache_key() can be shared between execute_prompt() and execute_skill(). def _cache_key( self, name: str, @@ -200,3 +200,34 @@ def _cache_key( "Cache lookup using key='%s' (for cache_payload=%s)", cache_key, cache_ctx ) return cache_key + + def _check_cache( + self, + name: str, + version: str, + payload: Mapping[str, Any], + response_model: type[BaseModel], + ) -> CacheCheckResult: + """Check cache for an existing result. + + :param name: Registered name. + :param version: Registered version. + :param payload: JSON-serializable task data. + :param response_model: Pydantic model for validation. + :return: Tuple of (cache_key, validated result). Result is None on cache miss. + """ + if self.cache is not None: + cache_key = self._cache_key( + name=name, + version=version, + payload=payload, + ) + cached = self.cache.get(cache_key) + if cached is not None: + return CacheCheckResult( + cache_key=cache_key, cached=response_model.model_validate(cached) + ) + else: + cache_key = None + + return CacheCheckResult(cache_key=cache_key, cached=None) From 72380af1cef666e8e1bb320632d11dad67764877 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Wed, 10 Jun 2026 10:51:58 -0500 Subject: [PATCH 08/36] chore: rename entity_detection.md to test_skill_v1.md and update format --- tests/unit/skills/entity_detection.md | 1 - tests/unit/skills/test_skill_v1.md | 55 +++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) delete mode 100644 tests/unit/skills/entity_detection.md create mode 100644 tests/unit/skills/test_skill_v1.md diff --git a/tests/unit/skills/entity_detection.md b/tests/unit/skills/entity_detection.md deleted file mode 100644 index 9d30ed8..0000000 --- a/tests/unit/skills/entity_detection.md +++ /dev/null @@ -1 +0,0 @@ -Extract named medical entities from the provided abstract. diff --git a/tests/unit/skills/test_skill_v1.md b/tests/unit/skills/test_skill_v1.md new file mode 100644 index 0000000..c14b955 --- /dev/null +++ b/tests/unit/skills/test_skill_v1.md @@ -0,0 +1,55 @@ +--- +name: test-skill +description: A helpful assistant that processes text input and returns a JSON + object with a value field set to 1. +--- + +# Test Skill + +## Overview + +A simple skill that processes text input and returns a structured JSON response. + +## When to Use + +Use when you need to process a text input and receive a standardized JSON output with a value field. + +## Instructions + +You are a helpful assistant. Return a JSON object with a `value` field set to `1`. + +## Input Format + +The input will be a JSON object with the following structure: + +```json +{ + "text": "the text to process" +} +``` + +## Output Format + +Return a JSON object matching the provided schema: + +```json +{ + "value": 1 +} +``` + +## Examples + +### Input +```json +{ + "text": "hello" +} +``` + +### Output +```json +{ + "value": 1 +} +``` From 22d1f9ac6c982858de97211f834f0fa95582334d Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Wed, 10 Jun 2026 10:54:03 -0500 Subject: [PATCH 09/36] chore: clean up comments in SkillRegistry --- src/wags_llm/skills/registry.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/wags_llm/skills/registry.py b/src/wags_llm/skills/registry.py index 3eb49c9..0c91b2a 100644 --- a/src/wags_llm/skills/registry.py +++ b/src/wags_llm/skills/registry.py @@ -14,10 +14,6 @@ _logger = logging.getLogger(__name__) -# NOTE: discuss with maintainers - registry key is currently (name, version) -# where name is derived from the file stem. Recommend keeping name as the -# key rather than skill_path since skill_path is an internal implementation -# detail and should not be exposed as the public identifier. class SkillRegistry: """Store and retrieve skill.""" From 300d0d576550851fdaea4166d51e760774f64858 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Wed, 10 Jun 2026 10:54:27 -0500 Subject: [PATCH 10/36] feat: derive name/version from filename via regex and add build_system_prompt() --- src/wags_llm/skills/base.py | 74 ++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 17 deletions(-) diff --git a/src/wags_llm/skills/base.py b/src/wags_llm/skills/base.py index 4e9e6af..e6fb5bf 100644 --- a/src/wags_llm/skills/base.py +++ b/src/wags_llm/skills/base.py @@ -4,6 +4,7 @@ """ import logging +import re from abc import ABC, abstractmethod from collections.abc import Mapping from pathlib import Path @@ -12,16 +13,25 @@ logger = logging.getLogger(__name__) -# TODO: ask maintainers if version is needed. +class SkillTemplateError(Exception): + """Raise custom exceptions for SkillTemplateError.""" + + class BaseSkillTemplate(ABC): """Base skill template. :var skill_path: Path to the skill `.md` file. - :var version: Skill version. """ - skill_path: str - version: str + skill_path: Path + + _skill_file_pattern = re.compile(r"^(?P.+)_(?P[^_]+)\.md$") + + def __init__(self) -> None: + """Initialize the skill template and validate the skill filename format. + :raise SkillTemplateError: If skill_path does not follow the required format. + """ + self._skill_filename_match = self._validate_skill_filename() @property def name(self) -> str: @@ -29,30 +39,47 @@ def name(self) -> str: :return: Skill name string. """ - return Path(self.skill_path).stem + return self._skill_filename_match.group("name") + + @property + def version(self) -> str: + """Derive skill version from the file stem. + + :return: Skill version string. + """ + return self._skill_filename_match.group("version") - # NOTE: discuss with maintainers - should BaseSkillTemplate have a - # build_system_prompt() that calls load_skill()? This would make skills - # and prompts share a common interface. build_system_prompt() would simply - # be a wrapper that calls load_skill() under the hood. def load_skill(self) -> str: """Load skill instructions from file. :return: Skill instruction string. - :raise FileNotFoundError: If skill_path does not exist. + :raise ValueError: If skill filename does not follow the required format. + :raise SkillTemplateError: If skill_path does not exist or If skill file cannot be read. """ - file_path = Path(self.skill_path) - logger.debug("Loading skill from path: %s", file_path) + logger.debug("Loading skill from path: %s", self.skill_path) + if not self.skill_path.exists(): + msg = f"Skill path not found: {self.skill_path}" + raise SkillTemplateError(msg) try: - content = file_path.read_text(encoding="utf-8") - except FileNotFoundError as exc: - msg = f"Skill path not found: {file_path}" - raise FileNotFoundError(msg) from exc + content = self.skill_path.read_text(encoding="utf-8") + except OSError as exc: + msg = f"Failed to read skill file: {self.skill_path}" + logger.exception(msg) + raise SkillTemplateError(msg) from exc - logger.info("Loaded skill from path: %s", file_path) + logger.info("Loaded skill from path: %s", self.skill_path) return content + def build_system_prompt(self) -> str: + """Build the system prompt by loading instructions from the skill file. + + :return: Skill instruction string. + :raise ValueError: If skill filename does not follow the required format. + :raise SkillTemplateError: If skill_path does not exist or if skill file cannot be read. + """ + return self.load_skill() + @abstractmethod def build_user_prompt(self, payload: Mapping[str, Any]) -> str: """Build the user prompt. @@ -60,3 +87,16 @@ def build_user_prompt(self, payload: Mapping[str, Any]) -> str: :param payload: JSON-serializable task data. :return: User prompt string. """ + + def _validate_skill_filename(self) -> re.Match: + """Parse the skill filename to extract name and version. + + :return: Regex match object. + :raise SkillTemplateError: If filename does not follow the required format. + """ + name = self.skill_path.name + match = self._skill_file_pattern.search(name) + if not match: + msg = f"Skill filename must follow the format '{{skill_name}}_{{version}}.md', got: '{self.skill_path.name}'" + raise SkillTemplateError(msg) + return match From 6f73db781b7ce98adc4bb23686948e8c44c346c6 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Wed, 10 Jun 2026 10:55:14 -0500 Subject: [PATCH 11/36] test: update skill tests based on PR feedback --- tests/unit/skills/test_skill_json_task.py | 29 +++++++++-------------- tests/unit/skills/test_skill_registry.py | 25 ++++++++++++++----- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/tests/unit/skills/test_skill_json_task.py b/tests/unit/skills/test_skill_json_task.py index e0b8935..68cceed 100644 --- a/tests/unit/skills/test_skill_json_task.py +++ b/tests/unit/skills/test_skill_json_task.py @@ -1,5 +1,6 @@ """Test that StructuredTaskRunner works correctly for skills""" +from pathlib import Path from typing import Any import pytest @@ -8,32 +9,24 @@ from wags_llm.cache.in_memory import InMemoryCache from wags_llm.client.base import InvokeJsonResponse, LLMJsonClient from wags_llm.services.structured_task import StructuredTaskRunner -from wags_llm.skills.base import BaseSkillTemplate +from wags_llm.skills.base import BaseSkillTemplate, SkillTemplateError from wags_llm.skills.registry import SkillRegistry -# NOTE: DummyClient, BadClient, and ResultModel are copied from -# tests/integration/services/test_json_task.py for easier code review. -# This file follows the same pattern as test_json_task.py but lives under -# unit/skills/ to keep all skill related tests together. -# Discuss with maintainer if this should move to integration/services/. class DummySkill(BaseSkillTemplate): """Simple skill for service tests.""" - skill_path = "tests/unit/skills/entity_detection.md" - version = "v1" + skill_path = Path("tests/unit/skills/test_skill_v1.md") def build_user_prompt(self, payload) -> str: """Build the user prompt.""" return f"Payload: {payload}" -# NOTE: new added to test missing skill file class MissingFileSkill(BaseSkillTemplate): """Missing skill file for service tests.""" - skill_path = "tests/unit/skills/does_not_exist.md" - version = "v1" + skill_path = Path("tests/unit/skills/does_not_exist_v1.md") def build_user_prompt(self, payload) -> str: """Build the user prompt.""" @@ -103,7 +96,7 @@ def test_execute_skill_success(): ) result = service.execute_skill( - skill_name="entity_detection", + skill_name="test_skill", skill_version="v1", payload={"text": "hello"}, response_model=ResultModel, @@ -123,7 +116,7 @@ def test_execute_skill_file_not_found(): skill_registry=registry, ) - with pytest.raises(FileNotFoundError): + with pytest.raises(SkillTemplateError): service.execute_skill( skill_name="does_not_exist", skill_version="v1", @@ -146,13 +139,13 @@ def test_execute_skill_uses_cache(): ) result1 = service.execute_skill( - skill_name="entity_detection", + skill_name="test_skill", skill_version="v1", payload={"x": 1}, response_model=ResultModel, ) result2 = service.execute_skill( - skill_name="entity_detection", + skill_name="test_skill", skill_version="v1", payload={"x": 1}, response_model=ResultModel, @@ -177,13 +170,13 @@ def test_execute_skill_cache_miss_for_different_payload(): ) service.execute_skill( - skill_name="entity_detection", + skill_name="test_skill", skill_version="v1", payload={"x": 1}, response_model=ResultModel, ) service.execute_skill( - skill_name="entity_detection", + skill_name="test_skill", skill_version="v1", payload={"x": 2}, response_model=ResultModel, @@ -204,7 +197,7 @@ def test_execute_skill_validation_error(): with pytest.raises(RuntimeError, match="Task failed"): service.execute_skill( - skill_name="entity_detection", + skill_name="test_skill", skill_version="v1", payload={"text": "hello"}, response_model=ResultModel, diff --git a/tests/unit/skills/test_skill_registry.py b/tests/unit/skills/test_skill_registry.py index 0afdc92..c58282d 100644 --- a/tests/unit/skills/test_skill_registry.py +++ b/tests/unit/skills/test_skill_registry.py @@ -1,16 +1,16 @@ import re from collections.abc import Mapping +from pathlib import Path from typing import Any import pytest -from wags_llm.skills.base import BaseSkillTemplate +from wags_llm.skills.base import BaseSkillTemplate, SkillTemplateError from wags_llm.skills.registry import SkillRegistry, build_empty_registry class DummySkill(BaseSkillTemplate): - skill_path = "tests/unit/skills/entity_detection.md" - version = "v1" + skill_path = Path("tests/unit/skills/test_skill_v1.md") def build_user_prompt(self, payload: Mapping[str, Any]) -> str: """Build the user prompt. @@ -31,12 +31,25 @@ def test_register_and_get_skill(): registry.register(skill) - assert registry.get("entity_detection", "v1") is skill + assert registry.get("test_skill", "v1") is skill def test_build_empty_registry(): registry = build_empty_registry() with pytest.raises( - KeyError, match=re.escape("'Skill not found: (entity_detection, v1)'") + KeyError, match=re.escape("'Skill not found: (test_skill, v1)'") ): - assert registry.get("entity_detection", "v1") + assert registry.get("test_skill", "v1") + + +def test_invalid_skill_filename(): + """Test that an invalid skill filename raises SkillTemplateError.""" + + class InvalidSkill(BaseSkillTemplate): + skill_path = Path("tests/unit/skills/invalid.md") + + def build_user_prompt(self, payload) -> str: + return f"Payload: {payload}" + + with pytest.raises(SkillTemplateError): + _ = InvalidSkill().name From 2152e05208dbd0c0ca9e7376b0d53e8f985c3520 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 11 Jun 2026 09:12:18 -0500 Subject: [PATCH 12/36] refactor: update CacheLookupResult based on PR feedback --- src/wags_llm/services/structured_task.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/wags_llm/services/structured_task.py b/src/wags_llm/services/structured_task.py index a4c45a8..68e1b0b 100644 --- a/src/wags_llm/services/structured_task.py +++ b/src/wags_llm/services/structured_task.py @@ -42,7 +42,7 @@ MAX_LOG_CHARS = int(getenv("MAX_LOG_CHARS", "500")) -class CacheCheckResult(BaseModel): +class CacheLookupResult(BaseModel): """Result of a cache lookup. :var cache_key: The cache key for this request. None if caching is disabled. @@ -50,7 +50,7 @@ class CacheCheckResult(BaseModel): """ cache_key: str | None - cached: Any | None + cached: BaseModel | None class StructuredTaskRunner: @@ -207,14 +207,14 @@ def _check_cache( version: str, payload: Mapping[str, Any], response_model: type[BaseModel], - ) -> CacheCheckResult: + ) -> CacheLookupResult: """Check cache for an existing result. :param name: Registered name. :param version: Registered version. :param payload: JSON-serializable task data. :param response_model: Pydantic model for validation. - :return: Tuple of (cache_key, validated result). Result is None on cache miss. + :return: Container with the cache key and cached object (validated result). Cached object is None on cache miss. """ if self.cache is not None: cache_key = self._cache_key( @@ -224,10 +224,10 @@ def _check_cache( ) cached = self.cache.get(cache_key) if cached is not None: - return CacheCheckResult( + return CacheLookupResult( cache_key=cache_key, cached=response_model.model_validate(cached) ) else: cache_key = None - return CacheCheckResult(cache_key=cache_key, cached=None) + return CacheLookupResult(cache_key=cache_key, cached=None) From 47771d0a34fe93e806aa2f76929fa417a5288e37 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 11 Jun 2026 09:28:16 -0500 Subject: [PATCH 13/36] fix: handle UnicodeDecodeError in load_skill and rename _get_skill_name_and_version --- src/wags_llm/skills/base.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/wags_llm/skills/base.py b/src/wags_llm/skills/base.py index e6fb5bf..759e4ca 100644 --- a/src/wags_llm/skills/base.py +++ b/src/wags_llm/skills/base.py @@ -20,7 +20,10 @@ class SkillTemplateError(Exception): class BaseSkillTemplate(ABC): """Base skill template. - :var skill_path: Path to the skill `.md` file. + :var skill_path: Path to the skill `.md` file. Must follow the format + {skill_name}_{version}.md (e.g. entity_detection_v1.md). + If the filename does not follow this format, a SkillTemplateError + will be raised on initialization. """ skill_path: Path @@ -29,9 +32,10 @@ class BaseSkillTemplate(ABC): def __init__(self) -> None: """Initialize the skill template and validate the skill filename format. + :raise SkillTemplateError: If skill_path does not follow the required format. """ - self._skill_filename_match = self._validate_skill_filename() + self._skill_filename_match = self._get_skill_name_and_version() @property def name(self) -> str: @@ -53,8 +57,8 @@ def load_skill(self) -> str: """Load skill instructions from file. :return: Skill instruction string. - :raise ValueError: If skill filename does not follow the required format. - :raise SkillTemplateError: If skill_path does not exist or If skill file cannot be read. + :raise SkillTemplateError: If skill_path does not exist, if the file + contains invalid UTF-8, or if the file cannot be read. """ logger.debug("Loading skill from path: %s", self.skill_path) if not self.skill_path.exists(): @@ -63,6 +67,10 @@ def load_skill(self) -> str: try: content = self.skill_path.read_text(encoding="utf-8") + except UnicodeDecodeError as exc: + msg = f"Skill file is not valid UTF-8: {self.skill_path}" + logger.exception(msg) + raise SkillTemplateError(msg) from exc except OSError as exc: msg = f"Failed to read skill file: {self.skill_path}" logger.exception(msg) @@ -75,7 +83,6 @@ def build_system_prompt(self) -> str: """Build the system prompt by loading instructions from the skill file. :return: Skill instruction string. - :raise ValueError: If skill filename does not follow the required format. :raise SkillTemplateError: If skill_path does not exist or if skill file cannot be read. """ return self.load_skill() @@ -88,7 +95,7 @@ def build_user_prompt(self, payload: Mapping[str, Any]) -> str: :return: User prompt string. """ - def _validate_skill_filename(self) -> re.Match: + def _get_skill_name_and_version(self) -> tuple[str, str]: """Parse the skill filename to extract name and version. :return: Regex match object. From ddd220ea7fc42a0065e5395f3b4a3a4341bc3d96 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Mon, 15 Jun 2026 10:46:06 -0500 Subject: [PATCH 14/36] feat: add unified registry replacing PromptRegistry and SkillRegistry --- src/wags_llm/registry/__init__.py | 11 ++++++ src/wags_llm/registry/base.py | 58 +++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 src/wags_llm/registry/__init__.py create mode 100644 src/wags_llm/registry/base.py diff --git a/src/wags_llm/registry/__init__.py b/src/wags_llm/registry/__init__.py new file mode 100644 index 0000000..e9faa20 --- /dev/null +++ b/src/wags_llm/registry/__init__.py @@ -0,0 +1,11 @@ +"""Prompt interfaces and registry. + +Define and manage versioned prompt templates. +""" + +from wags_llm.registry.base import Registry, build_empty_registry + +__all__ = [ + "Registry", + "build_empty_registry", +] diff --git a/src/wags_llm/registry/base.py b/src/wags_llm/registry/base.py new file mode 100644 index 0000000..1b5b874 --- /dev/null +++ b/src/wags_llm/registry/base.py @@ -0,0 +1,58 @@ +"""Registry. + +Maps (name, version) -> template instance. + +Users typically: +* create prompts or skills in their project +* register them here or pass a custom registry +""" + +import logging + +from wags_llm.templates.base import PromptTemplate +from wags_llm.templates.skill_template import SkillTemplate + +_logger = logging.getLogger(__name__) + + +class Registry: + """Store and retrieve prompt and skill templates.""" + + def __init__(self) -> None: + """Initialize an empty template registry.""" + self._templates: dict[tuple[str, str], PromptTemplate | SkillTemplate] = {} + + def register(self, template: PromptTemplate | SkillTemplate) -> None: + """Register a template. + + :param template: Template instance to register. + """ + _logger.debug( + "Registering template: name='%s', version='%s'", + template.name, + template.version, + ) + self._templates[(template.name, template.version)] = template + + def get(self, name: str, version: str) -> PromptTemplate | SkillTemplate: + """Retrieve a template by name and version. + + :param name: Template name. + :param version: Template version. + :return: Registered template. + :raise KeyError: If template not found. + """ + try: + return self._templates[(name, version)] + except KeyError as exc: + msg = f"Template not found: ({name}, {version})" + _logger.exception(msg) + raise KeyError(msg) from exc + + +def build_empty_registry() -> Registry: + """Create an empty registry. + + :return: New Registry instance. + """ + return Registry() From 124eea474eac3ed5da928c301e49013a245af367 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Mon, 15 Jun 2026 10:49:39 -0500 Subject: [PATCH 15/36] feat: add unified templates directory replacing prompts and skills directories --- src/wags_llm/templates/__init__.py | 12 +++ src/wags_llm/templates/base.py | 36 ++++++++ src/wags_llm/templates/skill_template.py | 100 +++++++++++++++++++++++ 3 files changed, 148 insertions(+) create mode 100644 src/wags_llm/templates/__init__.py create mode 100644 src/wags_llm/templates/base.py create mode 100644 src/wags_llm/templates/skill_template.py diff --git a/src/wags_llm/templates/__init__.py b/src/wags_llm/templates/__init__.py new file mode 100644 index 0000000..521c6a9 --- /dev/null +++ b/src/wags_llm/templates/__init__.py @@ -0,0 +1,12 @@ +"""Prompt interfaces and registry. + +Define and manage versioned prompt templates. +""" + +from wags_llm.templates.base import PromptTemplate +from wags_llm.templates.skill_template import SkillTemplate + +__all__ = [ + "PromptTemplate", + "SkillTemplate", +] diff --git a/src/wags_llm/templates/base.py b/src/wags_llm/templates/base.py new file mode 100644 index 0000000..e00158d --- /dev/null +++ b/src/wags_llm/templates/base.py @@ -0,0 +1,36 @@ +"""Prompt interface. + +Users extend this to define new tasks. +""" + +from abc import ABC, abstractmethod +from collections.abc import Mapping +from typing import Any + + +class PromptTemplate(ABC): + """Base prompt template. + + :var name: Prompt name. + :var version: Prompt version. + """ + + name: str + version: str + + @abstractmethod + def build_system_prompt(self) -> str: + """Build the system prompt. + + Defines global instructions for the task (e.g., output format, constraints). + + :return: System prompt string. + """ + + @abstractmethod + def build_user_prompt(self, payload: Mapping[str, Any]) -> str: + """Build the user prompt. + + :param payload: JSON-serializable task data used to construct the prompt. + :return: User prompt string. + """ diff --git a/src/wags_llm/templates/skill_template.py b/src/wags_llm/templates/skill_template.py new file mode 100644 index 0000000..7a31f05 --- /dev/null +++ b/src/wags_llm/templates/skill_template.py @@ -0,0 +1,100 @@ +"""Skill interface. + +Users extend this to define new skill inputs. +""" + +import logging +import re +from pathlib import Path + +from wags_llm.templates.base import PromptTemplate + +logger = logging.getLogger(__name__) + + +class SkillTemplateError(Exception): + """Raise custom exceptions for SkillTemplateError.""" + + +class SkillTemplate(PromptTemplate): + """Base skill template. + + :var skill_path: Path to the skill `.md` file. Must follow the format + {skill_name}_{version}.md (e.g. entity_detection_v1.md). + If the filename does not follow this format, a SkillTemplateError + will be raised on initialization. + """ + + skill_path: Path + + _skill_file_pattern = re.compile(r"^(?P.+)_(?P[^_]+)\.md$") + + def __init__(self) -> None: + """Initialize the skill template and validate the skill filename format. + + :raise SkillTemplateError: If skill_path does not follow the required format. + """ + self._skill_filename_match = self._get_skill_name_and_version() + + @property + def name(self) -> str: + """Derive skill name from the file stem. + + :return: Skill name string. + """ + return self._skill_filename_match.group("name") + + @property + def version(self) -> str: + """Derive skill version from the file stem. + + :return: Skill version string. + """ + return self._skill_filename_match.group("version") + + def load_skill(self) -> str: + """Load skill instructions from file. + + :return: Skill instruction string. + :raise SkillTemplateError: If skill_path does not exist, if the file + contains invalid UTF-8, or if the file cannot be read. + """ + logger.debug("Loading skill from path: %s", self.skill_path) + if not self.skill_path.exists(): + msg = f"Skill path not found: {self.skill_path}" + raise SkillTemplateError(msg) + + try: + content = self.skill_path.read_text(encoding="utf-8") + except UnicodeDecodeError as exc: + msg = f"Skill file is not valid UTF-8: {self.skill_path}" + logger.exception(msg) + raise SkillTemplateError(msg) from exc + except OSError as exc: + msg = f"Failed to read skill file: {self.skill_path}" + logger.exception(msg) + raise SkillTemplateError(msg) from exc + + logger.info("Loaded skill from path: %s", self.skill_path) + return content + + def build_system_prompt(self) -> str: + """Build the system prompt by loading instructions from the skill file. + + :return: Skill instruction string. + :raise SkillTemplateError: If skill_path does not exist or if skill file cannot be read. + """ + return self.load_skill() + + def _get_skill_name_and_version(self) -> tuple[str, str]: + """Parse the skill filename to extract name and version. + + :return: Regex match object. + :raise SkillTemplateError: If filename does not follow the required format. + """ + name = self.skill_path.name + match = self._skill_file_pattern.search(name) + if not match: + msg = f"Skill filename must follow the format '{{skill_name}}_{{version}}.md', got: '{self.skill_path.name}'" + raise SkillTemplateError(msg) + return match From 9bd8891c2f965b9a28e97282268c002ebee6d1ec Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Mon, 15 Jun 2026 10:50:31 -0500 Subject: [PATCH 16/36] refactor: remove prompts and skills directories in favor of unified templates and registry --- src/wags_llm/prompts/__init__.py | 13 ---- src/wags_llm/prompts/base.py | 36 ---------- src/wags_llm/prompts/registry.py | 55 ---------------- src/wags_llm/skills/__init__.py | 13 ---- src/wags_llm/skills/base.py | 109 ------------------------------- src/wags_llm/skills/registry.py | 55 ---------------- 6 files changed, 281 deletions(-) delete mode 100644 src/wags_llm/prompts/__init__.py delete mode 100644 src/wags_llm/prompts/base.py delete mode 100644 src/wags_llm/prompts/registry.py delete mode 100644 src/wags_llm/skills/__init__.py delete mode 100644 src/wags_llm/skills/base.py delete mode 100644 src/wags_llm/skills/registry.py diff --git a/src/wags_llm/prompts/__init__.py b/src/wags_llm/prompts/__init__.py deleted file mode 100644 index 33aa4a3..0000000 --- a/src/wags_llm/prompts/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Prompt interfaces and registry. - -Define and manage versioned prompt templates. -""" - -from wags_llm.prompts.base import BasePromptTemplate -from wags_llm.prompts.registry import PromptRegistry, build_empty_registry - -__all__ = [ - "BasePromptTemplate", - "PromptRegistry", - "build_empty_registry", -] diff --git a/src/wags_llm/prompts/base.py b/src/wags_llm/prompts/base.py deleted file mode 100644 index 63b8792..0000000 --- a/src/wags_llm/prompts/base.py +++ /dev/null @@ -1,36 +0,0 @@ -"""Prompt interface. - -Users extend this to define new tasks. -""" - -from abc import ABC, abstractmethod -from collections.abc import Mapping -from typing import Any - - -class BasePromptTemplate(ABC): - """Base prompt template. - - :var name: Prompt name. - :var version: Prompt version. - """ - - name: str - version: str - - @abstractmethod - def build_system_prompt(self) -> str: - """Build the system prompt. - - Defines global instructions for the task (e.g., output format, constraints). - - :return: System prompt string. - """ - - @abstractmethod - def build_user_prompt(self, payload: Mapping[str, Any]) -> str: - """Build the user prompt. - - :param payload: JSON-serializable task data used to construct the prompt. - :return: User prompt string. - """ diff --git a/src/wags_llm/prompts/registry.py b/src/wags_llm/prompts/registry.py deleted file mode 100644 index a0cefab..0000000 --- a/src/wags_llm/prompts/registry.py +++ /dev/null @@ -1,55 +0,0 @@ -"""Prompt registry. - -Maps (name, version) -> prompt instance. - -Users typically: -* create prompts in their project -* register them here or pass a custom registry -""" - -import logging - -from wags_llm.prompts.base import BasePromptTemplate - -_logger = logging.getLogger(__name__) - - -class PromptRegistry: - """Store and retrieve prompts.""" - - def __init__(self) -> None: - """Initialize an empty prompt registry.""" - self._prompts: dict[tuple[str, str], BasePromptTemplate] = {} - - def register(self, prompt: BasePromptTemplate) -> None: - """Register a prompt. - - :param prompt: Prompt instance to register. - """ - _logger.debug( - "Registering prompt: name='%s', version='%s'", prompt.name, prompt.version - ) - self._prompts[(prompt.name, prompt.version)] = prompt - - def get(self, name: str, version: str) -> BasePromptTemplate: - """Retrieve a prompt. - - :param name: Prompt name. - :param version: Prompt version. - :return: Registered prompt. - :raise KeyError: If prompt is not found. - """ - try: - return self._prompts[(name, version)] - except KeyError as exc: - msg = f"Prompt not found: ({name}, {version})" - _logger.exception(msg) - raise KeyError(msg) from exc - - -def build_empty_registry() -> PromptRegistry: - """Create an empty prompt registry. - - :return: New PromptRegistry instance. - """ - return PromptRegistry() diff --git a/src/wags_llm/skills/__init__.py b/src/wags_llm/skills/__init__.py deleted file mode 100644 index cbe0258..0000000 --- a/src/wags_llm/skills/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Skill interfaces and registry. - -Define and manage versioned skill templates. -""" - -from wags_llm.skills.base import BaseSkillTemplate -from wags_llm.skills.registry import SkillRegistry, build_empty_registry - -__all__ = [ - "BaseSkillTemplate", - "SkillRegistry", - "build_empty_registry", -] diff --git a/src/wags_llm/skills/base.py b/src/wags_llm/skills/base.py deleted file mode 100644 index 759e4ca..0000000 --- a/src/wags_llm/skills/base.py +++ /dev/null @@ -1,109 +0,0 @@ -"""Skill interface. - -Users extend this to define new skill inputs. -""" - -import logging -import re -from abc import ABC, abstractmethod -from collections.abc import Mapping -from pathlib import Path -from typing import Any - -logger = logging.getLogger(__name__) - - -class SkillTemplateError(Exception): - """Raise custom exceptions for SkillTemplateError.""" - - -class BaseSkillTemplate(ABC): - """Base skill template. - - :var skill_path: Path to the skill `.md` file. Must follow the format - {skill_name}_{version}.md (e.g. entity_detection_v1.md). - If the filename does not follow this format, a SkillTemplateError - will be raised on initialization. - """ - - skill_path: Path - - _skill_file_pattern = re.compile(r"^(?P.+)_(?P[^_]+)\.md$") - - def __init__(self) -> None: - """Initialize the skill template and validate the skill filename format. - - :raise SkillTemplateError: If skill_path does not follow the required format. - """ - self._skill_filename_match = self._get_skill_name_and_version() - - @property - def name(self) -> str: - """Derive skill name from the file stem. - - :return: Skill name string. - """ - return self._skill_filename_match.group("name") - - @property - def version(self) -> str: - """Derive skill version from the file stem. - - :return: Skill version string. - """ - return self._skill_filename_match.group("version") - - def load_skill(self) -> str: - """Load skill instructions from file. - - :return: Skill instruction string. - :raise SkillTemplateError: If skill_path does not exist, if the file - contains invalid UTF-8, or if the file cannot be read. - """ - logger.debug("Loading skill from path: %s", self.skill_path) - if not self.skill_path.exists(): - msg = f"Skill path not found: {self.skill_path}" - raise SkillTemplateError(msg) - - try: - content = self.skill_path.read_text(encoding="utf-8") - except UnicodeDecodeError as exc: - msg = f"Skill file is not valid UTF-8: {self.skill_path}" - logger.exception(msg) - raise SkillTemplateError(msg) from exc - except OSError as exc: - msg = f"Failed to read skill file: {self.skill_path}" - logger.exception(msg) - raise SkillTemplateError(msg) from exc - - logger.info("Loaded skill from path: %s", self.skill_path) - return content - - def build_system_prompt(self) -> str: - """Build the system prompt by loading instructions from the skill file. - - :return: Skill instruction string. - :raise SkillTemplateError: If skill_path does not exist or if skill file cannot be read. - """ - return self.load_skill() - - @abstractmethod - def build_user_prompt(self, payload: Mapping[str, Any]) -> str: - """Build the user prompt. - - :param payload: JSON-serializable task data. - :return: User prompt string. - """ - - def _get_skill_name_and_version(self) -> tuple[str, str]: - """Parse the skill filename to extract name and version. - - :return: Regex match object. - :raise SkillTemplateError: If filename does not follow the required format. - """ - name = self.skill_path.name - match = self._skill_file_pattern.search(name) - if not match: - msg = f"Skill filename must follow the format '{{skill_name}}_{{version}}.md', got: '{self.skill_path.name}'" - raise SkillTemplateError(msg) - return match diff --git a/src/wags_llm/skills/registry.py b/src/wags_llm/skills/registry.py deleted file mode 100644 index 0c91b2a..0000000 --- a/src/wags_llm/skills/registry.py +++ /dev/null @@ -1,55 +0,0 @@ -"""skill registry. - -Maps (name, version) -> skill instance. - -Users typically: -* create skills in their project -* register them here or pass a custom registry -""" - -import logging - -from wags_llm.skills.base import BaseSkillTemplate - -_logger = logging.getLogger(__name__) - - -class SkillRegistry: - """Store and retrieve skill.""" - - def __init__(self) -> None: - """Initialize an empty skill registry.""" - self._skills: dict[tuple[str, str], BaseSkillTemplate] = {} - - def register(self, skill: BaseSkillTemplate) -> None: - """Register a skill. - - :param skill: skill instance to register. - """ - _logger.debug( - "Registering skill: name='%s', version='%s'", skill.name, skill.version - ) - self._skills[(skill.name, skill.version)] = skill - - def get(self, name: str, version: str) -> BaseSkillTemplate: - """Retrieve a skill by name and version. - - :param name: Skill name. - :param version: Skill version. - :return: Registered skill. - :raise KeyError: If skill is not found. - """ - try: - return self._skills[(name, version)] - except KeyError as exc: - msg = f"Skill not found: ({name}, {version})" - _logger.exception(msg) - raise KeyError(msg) from exc - - -def build_empty_registry() -> SkillRegistry: - """Create an empty skill registry. - - :return: New SkillRegistry instance. - """ - return SkillRegistry() From 18b7aff92a2a64deb5bf223602f4692d91062bce Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Mon, 15 Jun 2026 10:50:58 -0500 Subject: [PATCH 17/36] refactor: update StructuredTaskRunner to use unified registry --- src/wags_llm/services/structured_task.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/src/wags_llm/services/structured_task.py b/src/wags_llm/services/structured_task.py index 68e1b0b..def69c4 100644 --- a/src/wags_llm/services/structured_task.py +++ b/src/wags_llm/services/structured_task.py @@ -24,18 +24,10 @@ from wags_llm.cache.base import BaseCache from wags_llm.client.base import LLMJsonClient from wags_llm.client.exceptions import LLMClientError -from wags_llm.prompts.registry import ( - PromptRegistry, -) -from wags_llm.prompts.registry import ( +from wags_llm.registry import Registry +from wags_llm.registry import ( build_empty_registry as build_empty_prompt_registry, ) -from wags_llm.skills.registry import ( - SkillRegistry, -) -from wags_llm.skills.registry import ( - build_empty_registry as build_empty_skill_registry, -) _logger = logging.getLogger(__name__) @@ -59,20 +51,17 @@ class StructuredTaskRunner: def __init__( self, client: LLMJsonClient, - prompt_registry: PromptRegistry | None = None, - skill_registry: SkillRegistry | None = None, + prompt_registry: Registry | None = None, cache: BaseCache | None = None, ) -> None: """Initialize the structured task runner. :param client: LLM client used to execute prompts. :param prompt_registry: Registry used to resolve prompts. - :param skill_registry: Registry used to resolve skills. :param cache: Optional cache for storing and retrieving task results. """ self.client = client self.prompt_registry = prompt_registry or build_empty_prompt_registry() - self.skill_registry = skill_registry or build_empty_skill_registry() self.cache = cache def execute_skill( @@ -91,7 +80,7 @@ def execute_skill( :return: Validated task result. :raise RuntimeError: If execution or validation fails. """ - skill = self.skill_registry.get(skill_name, skill_version) + skill = self.prompt_registry.get(skill_name, skill_version) cache_result = self._check_cache( name=skill_name, From 28aac05d9fc464f5db559ab127ef3e68544ed8c3 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Mon, 15 Jun 2026 10:51:53 -0500 Subject: [PATCH 18/36] test: update tests to reflect new templates and registry structure --- tests/integration/services/test_json_task.py | 14 +++++----- tests/unit/prompts/test_registry.py | 12 ++++----- tests/unit/skills/test_skill_json_task.py | 28 ++++++++++---------- tests/unit/skills/test_skill_registry.py | 12 ++++----- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/tests/integration/services/test_json_task.py b/tests/integration/services/test_json_task.py index 4d833bc..64de0aa 100644 --- a/tests/integration/services/test_json_task.py +++ b/tests/integration/services/test_json_task.py @@ -7,12 +7,12 @@ from wags_llm.cache.in_memory import InMemoryCache from wags_llm.client.base import InvokeJsonResponse, LLMJsonClient -from wags_llm.prompts.base import BasePromptTemplate -from wags_llm.prompts.registry import PromptRegistry +from wags_llm.registry import Registry from wags_llm.services.structured_task import StructuredTaskRunner +from wags_llm.templates.base import PromptTemplate -class DummyPrompt(BasePromptTemplate): +class DummyPrompt(PromptTemplate): """Simple prompt for service tests.""" name = "test_task" @@ -81,7 +81,7 @@ class ResultModel(BaseModel): def test_run_success(): """Test that run method works correctly""" - registry = PromptRegistry() + registry = Registry() registry.register(DummyPrompt()) service = StructuredTaskRunner( @@ -101,7 +101,7 @@ def test_run_success(): def test_run_uses_cache(): """Test that run method works correctly with cache""" - registry = PromptRegistry() + registry = Registry() registry.register(DummyPrompt()) client = DummyClient() cache = InMemoryCache() @@ -132,7 +132,7 @@ def test_run_uses_cache(): def test_run_cache_miss_for_different_payload(): """Test that run method works correctly with cache that uses different payload""" - registry = PromptRegistry() + registry = Registry() registry.register(DummyPrompt()) client = DummyClient() cache = InMemoryCache() @@ -161,7 +161,7 @@ def test_run_cache_miss_for_different_payload(): def test_run_validation_error(): """Test that run raises error when response validation fails.""" - registry = PromptRegistry() + registry = Registry() registry.register(DummyPrompt()) service = StructuredTaskRunner( diff --git a/tests/unit/prompts/test_registry.py b/tests/unit/prompts/test_registry.py index f8f65cd..1d9e803 100644 --- a/tests/unit/prompts/test_registry.py +++ b/tests/unit/prompts/test_registry.py @@ -1,4 +1,4 @@ -"""Test that PromptRegistry works correctly""" +"""Test that Registry works correctly""" import re from collections.abc import Mapping @@ -6,11 +6,11 @@ import pytest -from wags_llm.prompts.base import BasePromptTemplate -from wags_llm.prompts.registry import PromptRegistry, build_empty_registry +from wags_llm.registry import Registry, build_empty_registry +from wags_llm.templates.base import PromptTemplate -class DummyPrompt(BasePromptTemplate): +class DummyPrompt(PromptTemplate): """Simple prompt for registry tests.""" name = "test_task" @@ -38,7 +38,7 @@ def build_user_prompt(self, payload: Mapping[str, Any]) -> str: def test_register_and_get_prompt(): """Register and retrieve a prompt.""" - registry = PromptRegistry() + registry = Registry() prompt = DummyPrompt() registry.register(prompt) @@ -50,6 +50,6 @@ def test_build_empty_registry(): """Test that build_empty_registry works correctly and prompt registry raises KeyError when no prompts are registered""" registry = build_empty_registry() with pytest.raises( - KeyError, match=re.escape("'Prompt not found: (test_task, v1)'") + KeyError, match=re.escape("'Template not found: (test_task, v1)'") ): assert registry.get("test_task", "v1") diff --git a/tests/unit/skills/test_skill_json_task.py b/tests/unit/skills/test_skill_json_task.py index 68cceed..488df5a 100644 --- a/tests/unit/skills/test_skill_json_task.py +++ b/tests/unit/skills/test_skill_json_task.py @@ -8,12 +8,12 @@ from wags_llm.cache.in_memory import InMemoryCache from wags_llm.client.base import InvokeJsonResponse, LLMJsonClient +from wags_llm.registry import Registry from wags_llm.services.structured_task import StructuredTaskRunner -from wags_llm.skills.base import BaseSkillTemplate, SkillTemplateError -from wags_llm.skills.registry import SkillRegistry +from wags_llm.templates.skill_template import SkillTemplate, SkillTemplateError -class DummySkill(BaseSkillTemplate): +class DummySkill(SkillTemplate): """Simple skill for service tests.""" skill_path = Path("tests/unit/skills/test_skill_v1.md") @@ -23,7 +23,7 @@ def build_user_prompt(self, payload) -> str: return f"Payload: {payload}" -class MissingFileSkill(BaseSkillTemplate): +class MissingFileSkill(SkillTemplate): """Missing skill file for service tests.""" skill_path = Path("tests/unit/skills/does_not_exist_v1.md") @@ -87,12 +87,12 @@ class ResultModel(BaseModel): def test_execute_skill_success(): """Test that execute_skill works correctly.""" - registry = SkillRegistry() + registry = Registry() registry.register(DummySkill()) service = StructuredTaskRunner( client=DummyClient(), - skill_registry=registry, + prompt_registry=registry, ) result = service.execute_skill( @@ -108,12 +108,12 @@ def test_execute_skill_success(): def test_execute_skill_file_not_found(): """Test that execute_skill raises FileNotFoundError when skill file does not exist.""" - registry = SkillRegistry() + registry = Registry() registry.register(MissingFileSkill()) service = StructuredTaskRunner( client=DummyClient(), - skill_registry=registry, + prompt_registry=registry, ) with pytest.raises(SkillTemplateError): @@ -127,14 +127,14 @@ def test_execute_skill_file_not_found(): def test_execute_skill_uses_cache(): """Test that execute_skill works correctly with cache.""" - registry = SkillRegistry() + registry = Registry() registry.register(DummySkill()) client = DummyClient() cache = InMemoryCache() service = StructuredTaskRunner( client=client, - skill_registry=registry, + prompt_registry=registry, cache=cache, ) @@ -158,14 +158,14 @@ def test_execute_skill_uses_cache(): def test_execute_skill_cache_miss_for_different_payload(): """Test that execute_skill cache misses on different payload.""" - registry = SkillRegistry() + registry = Registry() registry.register(DummySkill()) client = DummyClient() cache = InMemoryCache() service = StructuredTaskRunner( client=client, - skill_registry=registry, + prompt_registry=registry, cache=cache, ) @@ -187,12 +187,12 @@ def test_execute_skill_cache_miss_for_different_payload(): def test_execute_skill_validation_error(): """Test that execute_skill raises RuntimeError when response validation fails.""" - registry = SkillRegistry() + registry = Registry() registry.register(DummySkill()) service = StructuredTaskRunner( client=BadClient(), - skill_registry=registry, + prompt_registry=registry, ) with pytest.raises(RuntimeError, match="Task failed"): diff --git a/tests/unit/skills/test_skill_registry.py b/tests/unit/skills/test_skill_registry.py index c58282d..626b19b 100644 --- a/tests/unit/skills/test_skill_registry.py +++ b/tests/unit/skills/test_skill_registry.py @@ -5,11 +5,11 @@ import pytest -from wags_llm.skills.base import BaseSkillTemplate, SkillTemplateError -from wags_llm.skills.registry import SkillRegistry, build_empty_registry +from wags_llm.registry import Registry, build_empty_registry +from wags_llm.templates.skill_template import SkillTemplate, SkillTemplateError -class DummySkill(BaseSkillTemplate): +class DummySkill(SkillTemplate): skill_path = Path("tests/unit/skills/test_skill_v1.md") def build_user_prompt(self, payload: Mapping[str, Any]) -> str: @@ -26,7 +26,7 @@ def build_user_prompt(self, payload: Mapping[str, Any]) -> str: def test_register_and_get_skill(): - registry = SkillRegistry() + registry = Registry() skill = DummySkill() registry.register(skill) @@ -37,7 +37,7 @@ def test_register_and_get_skill(): def test_build_empty_registry(): registry = build_empty_registry() with pytest.raises( - KeyError, match=re.escape("'Skill not found: (test_skill, v1)'") + KeyError, match=re.escape("'Template not found: (test_skill, v1)'") ): assert registry.get("test_skill", "v1") @@ -45,7 +45,7 @@ def test_build_empty_registry(): def test_invalid_skill_filename(): """Test that an invalid skill filename raises SkillTemplateError.""" - class InvalidSkill(BaseSkillTemplate): + class InvalidSkill(SkillTemplate): skill_path = Path("tests/unit/skills/invalid.md") def build_user_prompt(self, payload) -> str: From 6446ada2a7a702a0e60d5a973761d991ef7d3f91 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Mon, 15 Jun 2026 10:52:03 -0500 Subject: [PATCH 19/36] chore: update example notebook to reflect new structure --- notebooks/example.ipynb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb index 2d82478..486236f 100644 --- a/notebooks/example.ipynb +++ b/notebooks/example.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "b5fe35fe", "metadata": {}, "outputs": [], @@ -24,9 +24,9 @@ "\n", "from wags_llm.cache import InMemoryCache\n", "from wags_llm.client.bedrock import BedrockClaudeJsonClient\n", - "from wags_llm.prompts.base import BasePromptTemplate\n", - "from wags_llm.prompts.registry import PromptRegistry\n", + "from wags_llm.registry.base import Registry\n", "from wags_llm.services.structured_task import StructuredTaskRunner\n", + "from wags_llm.templates.base import PromptTemplate\n", "\n", "logging.basicConfig(\n", " stream=sys.stdout,\n", @@ -46,12 +46,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "ba6504ff", "metadata": {}, "outputs": [], "source": [ - "class MyPrompt(BasePromptTemplate):\n", + "class MyPrompt(PromptTemplate):\n", " name = \"mondo_id_classification\"\n", " version = \"v1\"\n", "\n", @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "5b14b5ce", "metadata": {}, "outputs": [ @@ -133,7 +133,7 @@ " profile_name=\"dev-account\",\n", ")\n", "\n", - "registry = PromptRegistry()\n", + "registry = Registry()\n", "registry.register(MyPrompt())\n", "\n", "service = StructuredTaskRunner(\n", From 3d742f32e32124502c046cb0fe86a57c3015dcae Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Mon, 15 Jun 2026 10:59:04 -0500 Subject: [PATCH 20/36] refactor: update _get_skill_name_and_version to return tuple[str, str] --- src/wags_llm/skills/base.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/wags_llm/skills/base.py b/src/wags_llm/skills/base.py index 759e4ca..9c8fa08 100644 --- a/src/wags_llm/skills/base.py +++ b/src/wags_llm/skills/base.py @@ -35,7 +35,7 @@ def __init__(self) -> None: :raise SkillTemplateError: If skill_path does not follow the required format. """ - self._skill_filename_match = self._get_skill_name_and_version() + self._name, self._version = self._get_skill_name_and_version() @property def name(self) -> str: @@ -43,7 +43,7 @@ def name(self) -> str: :return: Skill name string. """ - return self._skill_filename_match.group("name") + return self._name @property def version(self) -> str: @@ -51,7 +51,7 @@ def version(self) -> str: :return: Skill version string. """ - return self._skill_filename_match.group("version") + return self._version def load_skill(self) -> str: """Load skill instructions from file. @@ -83,7 +83,8 @@ def build_system_prompt(self) -> str: """Build the system prompt by loading instructions from the skill file. :return: Skill instruction string. - :raise SkillTemplateError: If skill_path does not exist or if skill file cannot be read. + :raise SkillTemplateError: If skill_path does not exist, if the file + contains invalid UTF-8, or if the file cannot be read. """ return self.load_skill() @@ -98,7 +99,7 @@ def build_user_prompt(self, payload: Mapping[str, Any]) -> str: def _get_skill_name_and_version(self) -> tuple[str, str]: """Parse the skill filename to extract name and version. - :return: Regex match object. + :return: Tuple of (name, version) strings. :raise SkillTemplateError: If filename does not follow the required format. """ name = self.skill_path.name @@ -106,4 +107,4 @@ def _get_skill_name_and_version(self) -> tuple[str, str]: if not match: msg = f"Skill filename must follow the format '{{skill_name}}_{{version}}.md', got: '{self.skill_path.name}'" raise SkillTemplateError(msg) - return match + return match.group("name"), match.group("version") From c8cff35354be35f8d33dd9fd7c9892fc555055cd Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Tue, 16 Jun 2026 19:49:35 -0500 Subject: [PATCH 21/36] Use full skill path in filename error message --- src/wags_llm/skills/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wags_llm/skills/base.py b/src/wags_llm/skills/base.py index 9c8fa08..ac06579 100644 --- a/src/wags_llm/skills/base.py +++ b/src/wags_llm/skills/base.py @@ -105,6 +105,6 @@ def _get_skill_name_and_version(self) -> tuple[str, str]: name = self.skill_path.name match = self._skill_file_pattern.search(name) if not match: - msg = f"Skill filename must follow the format '{{skill_name}}_{{version}}.md', got: '{self.skill_path.name}'" + msg = f"Skill filename must follow the format '{{skill_name}}_{{version}}.md', got path: '{self.skill_path}'" raise SkillTemplateError(msg) return match.group("name"), match.group("version") From ab912990a26241b33d90e7e477ec6fe05a85459a Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 18 Jun 2026 14:43:22 -0500 Subject: [PATCH 22/36] refactor: simplify StructuredTaskRunner with unified _execute() method --- src/wags_llm/services/structured_task.py | 90 ++++++++++++------------ 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/src/wags_llm/services/structured_task.py b/src/wags_llm/services/structured_task.py index def69c4..dd880c8 100644 --- a/src/wags_llm/services/structured_task.py +++ b/src/wags_llm/services/structured_task.py @@ -1,14 +1,14 @@ -"""Run LLM prompts and return schema-validated structured outputs. +"""Run LLM prompts or skills and return schema-validated structured outputs. Inputs: -- prompt +- prompt or skill name and version - context + payload - response model (Pydantic) Returns validated output. Users extend by: -- writing prompts +- writing prompts or defining skills - defining response models (Pydantic) """ @@ -17,17 +17,14 @@ import logging from collections.abc import Mapping from os import getenv -from typing import Any +from typing import Any, Literal from pydantic import BaseModel, ValidationError from wags_llm.cache.base import BaseCache from wags_llm.client.base import LLMJsonClient from wags_llm.client.exceptions import LLMClientError -from wags_llm.registry import Registry -from wags_llm.registry import ( - build_empty_registry as build_empty_prompt_registry, -) +from wags_llm.registry import Registry, build_empty_registry _logger = logging.getLogger(__name__) @@ -51,17 +48,17 @@ class StructuredTaskRunner: def __init__( self, client: LLMJsonClient, - prompt_registry: Registry | None = None, + registry: Registry | None = None, cache: BaseCache | None = None, ) -> None: """Initialize the structured task runner. - :param client: LLM client used to execute prompts. - :param prompt_registry: Registry used to resolve prompts. + :param client: LLM client used to execute prompts or skills. + :param registry: Registry used to resolve prompts or skills. :param cache: Optional cache for storing and retrieving task results. """ self.client = client - self.prompt_registry = prompt_registry or build_empty_prompt_registry() + self.registry = registry or build_empty_registry() self.cache = cache def execute_skill( @@ -77,40 +74,17 @@ def execute_skill( :param skill_version: Registered skill version. :param payload: JSON-serializable task data. :param response_model: Pydantic model for validation. - :return: Validated task result. + :return: Validated skill result. :raise RuntimeError: If execution or validation fails. """ - skill = self.prompt_registry.get(skill_name, skill_version) - - cache_result = self._check_cache( + return self._execute( name=skill_name, version=skill_version, payload=payload, response_model=response_model, + kind="Skill", ) - if cache_result.cached is not None: - return cache_result.cached - - try: - invoke_json_response = self.client.invoke_json( - system_prompt=skill.build_system_prompt(), - user_prompt=skill.build_user_prompt(payload=payload), - json_schema=response_model.model_json_schema(), - ) - - result = response_model.model_validate(invoke_json_response.parsed_json) - - if self.cache is not None and cache_result.cache_key is not None: - self.cache.set(cache_result.cache_key, result.model_dump()) - - except (LLMClientError, ValidationError) as exc: - msg = f"Task failed: {exc}" - _logger.exception(msg) - raise RuntimeError(msg) from exc - else: - return result - def execute_prompt( self, prompt_name: str, @@ -118,20 +92,46 @@ def execute_prompt( payload: Mapping[str, Any], response_model: type[BaseModel], ) -> BaseModel: - """Execute a task and return validated output. + """Execute a prompt and return validated output. :param prompt_name: Registered prompt name. :param prompt_version: Registered prompt version. :param payload: JSON-serializable task data. :param response_model: Pydantic model for validation. + :return: Validated prompt result. + :raise RuntimeError: If execution or validation fails. + """ + return self._execute( + name=prompt_name, + version=prompt_version, + payload=payload, + response_model=response_model, + kind="Prompt", + ) + + def _execute( + self, + name: str, + version: str, + payload: Mapping[str, Any], + response_model: type[BaseModel], + kind: Literal["Skill", "Prompt"], + ) -> BaseModel: + """Execute a task and return validated output. + + :param name: Registered task name. + :param version: Registered task version. + :param payload: JSON-serializable task data. + :param response_model: Pydantic model for validation. + :param kind: Display label for the registered task type, either "Skill" or "Prompt". :return: Validated task result. :raise RuntimeError: If execution or validation fails. """ - prompt = self.prompt_registry.get(prompt_name, prompt_version) + registered_task = self.registry.get(name, version) cache_result = self._check_cache( - name=prompt_name, - version=prompt_version, + name=name, + version=version, payload=payload, response_model=response_model, ) @@ -140,8 +140,8 @@ def execute_prompt( try: invoke_json_response = self.client.invoke_json( - system_prompt=prompt.build_system_prompt(), - user_prompt=prompt.build_user_prompt(payload=payload), + system_prompt=registered_task.build_system_prompt(), + user_prompt=registered_task.build_user_prompt(payload=payload), json_schema=response_model.model_json_schema(), ) @@ -151,7 +151,7 @@ def execute_prompt( self.cache.set(cache_result.cache_key, result.model_dump()) except (LLMClientError, ValidationError) as exc: - msg = f"Task failed: {exc}" + msg = f"{kind} execution failed for {name} version {version}: {exc}" _logger.exception(msg) raise RuntimeError(msg) from exc else: From 75b5204c77dfc5678463dc8c87d2b93f7e1323ae Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 18 Jun 2026 14:43:31 -0500 Subject: [PATCH 23/36] chore: add test skill fixture using semantic versioning --- tests/unit/skills/test_skill_0.1.0.md | 55 +++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 tests/unit/skills/test_skill_0.1.0.md diff --git a/tests/unit/skills/test_skill_0.1.0.md b/tests/unit/skills/test_skill_0.1.0.md new file mode 100644 index 0000000..c14b955 --- /dev/null +++ b/tests/unit/skills/test_skill_0.1.0.md @@ -0,0 +1,55 @@ +--- +name: test-skill +description: A helpful assistant that processes text input and returns a JSON + object with a value field set to 1. +--- + +# Test Skill + +## Overview + +A simple skill that processes text input and returns a structured JSON response. + +## When to Use + +Use when you need to process a text input and receive a standardized JSON output with a value field. + +## Instructions + +You are a helpful assistant. Return a JSON object with a `value` field set to `1`. + +## Input Format + +The input will be a JSON object with the following structure: + +```json +{ + "text": "the text to process" +} +``` + +## Output Format + +Return a JSON object matching the provided schema: + +```json +{ + "value": 1 +} +``` + +## Examples + +### Input +```json +{ + "text": "hello" +} +``` + +### Output +```json +{ + "value": 1 +} +``` From 30f4c8e0bda9c7a8a0106f3e9fc00d55e5c40f93 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 18 Jun 2026 14:43:47 -0500 Subject: [PATCH 24/36] test: update skill tests to use semantic versioning format --- tests/unit/skills/test_skill_json_task.py | 30 +++++++++++------------ tests/unit/skills/test_skill_registry.py | 8 +++--- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/unit/skills/test_skill_json_task.py b/tests/unit/skills/test_skill_json_task.py index 488df5a..a35c80b 100644 --- a/tests/unit/skills/test_skill_json_task.py +++ b/tests/unit/skills/test_skill_json_task.py @@ -16,7 +16,7 @@ class DummySkill(SkillTemplate): """Simple skill for service tests.""" - skill_path = Path("tests/unit/skills/test_skill_v1.md") + skill_path = Path("tests/unit/skills/test_skill_0.1.0.md") def build_user_prompt(self, payload) -> str: """Build the user prompt.""" @@ -26,7 +26,7 @@ def build_user_prompt(self, payload) -> str: class MissingFileSkill(SkillTemplate): """Missing skill file for service tests.""" - skill_path = Path("tests/unit/skills/does_not_exist_v1.md") + skill_path = Path("tests/unit/skills/does_not_exist_0.1.0.md") def build_user_prompt(self, payload) -> str: """Build the user prompt.""" @@ -92,12 +92,12 @@ def test_execute_skill_success(): service = StructuredTaskRunner( client=DummyClient(), - prompt_registry=registry, + registry=registry, ) result = service.execute_skill( skill_name="test_skill", - skill_version="v1", + skill_version="0.1.0", payload={"text": "hello"}, response_model=ResultModel, ) @@ -113,13 +113,13 @@ def test_execute_skill_file_not_found(): service = StructuredTaskRunner( client=DummyClient(), - prompt_registry=registry, + registry=registry, ) with pytest.raises(SkillTemplateError): service.execute_skill( skill_name="does_not_exist", - skill_version="v1", + skill_version="0.1.0", payload={"text": "hello"}, response_model=ResultModel, ) @@ -134,19 +134,19 @@ def test_execute_skill_uses_cache(): service = StructuredTaskRunner( client=client, - prompt_registry=registry, + registry=registry, cache=cache, ) result1 = service.execute_skill( skill_name="test_skill", - skill_version="v1", + skill_version="0.1.0", payload={"x": 1}, response_model=ResultModel, ) result2 = service.execute_skill( skill_name="test_skill", - skill_version="v1", + skill_version="0.1.0", payload={"x": 1}, response_model=ResultModel, ) @@ -165,19 +165,19 @@ def test_execute_skill_cache_miss_for_different_payload(): service = StructuredTaskRunner( client=client, - prompt_registry=registry, + registry=registry, cache=cache, ) service.execute_skill( skill_name="test_skill", - skill_version="v1", + skill_version="0.1.0", payload={"x": 1}, response_model=ResultModel, ) service.execute_skill( skill_name="test_skill", - skill_version="v1", + skill_version="0.1.0", payload={"x": 2}, response_model=ResultModel, ) @@ -192,13 +192,13 @@ def test_execute_skill_validation_error(): service = StructuredTaskRunner( client=BadClient(), - prompt_registry=registry, + registry=registry, ) - with pytest.raises(RuntimeError, match="Task failed"): + with pytest.raises(RuntimeError, match="Skill execution failed"): service.execute_skill( skill_name="test_skill", - skill_version="v1", + skill_version="0.1.0", payload={"text": "hello"}, response_model=ResultModel, ) diff --git a/tests/unit/skills/test_skill_registry.py b/tests/unit/skills/test_skill_registry.py index 626b19b..b40ded0 100644 --- a/tests/unit/skills/test_skill_registry.py +++ b/tests/unit/skills/test_skill_registry.py @@ -10,7 +10,7 @@ class DummySkill(SkillTemplate): - skill_path = Path("tests/unit/skills/test_skill_v1.md") + skill_path = Path("tests/unit/skills/test_skill_0.1.0.md") def build_user_prompt(self, payload: Mapping[str, Any]) -> str: """Build the user prompt. @@ -31,15 +31,15 @@ def test_register_and_get_skill(): registry.register(skill) - assert registry.get("test_skill", "v1") is skill + assert registry.get("test_skill", "0.1.0") is skill def test_build_empty_registry(): registry = build_empty_registry() with pytest.raises( - KeyError, match=re.escape("'Template not found: (test_skill, v1)'") + KeyError, match=re.escape("'Template not found: (test_skill, 0.1.0)'") ): - assert registry.get("test_skill", "v1") + assert registry.get("test_skill", "0.1.0") def test_invalid_skill_filename(): From fdcef2a9d7b6e622977c07fd7635897ff8ad4755 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 18 Jun 2026 14:44:01 -0500 Subject: [PATCH 25/36] chore: update integration tests and notebook to reflect new structure --- notebooks/example.ipynb | 2 +- tests/integration/services/test_json_task.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb index 486236f..8f434b3 100644 --- a/notebooks/example.ipynb +++ b/notebooks/example.ipynb @@ -138,7 +138,7 @@ "\n", "service = StructuredTaskRunner(\n", " client=client,\n", - " prompt_registry=registry,\n", + " registry=registry,\n", " cache=InMemoryCache(),\n", ")" ] diff --git a/tests/integration/services/test_json_task.py b/tests/integration/services/test_json_task.py index 64de0aa..92fb791 100644 --- a/tests/integration/services/test_json_task.py +++ b/tests/integration/services/test_json_task.py @@ -86,7 +86,7 @@ def test_run_success(): service = StructuredTaskRunner( client=DummyClient(), - prompt_registry=registry, + registry=registry, ) result = service.execute_prompt( @@ -108,7 +108,7 @@ def test_run_uses_cache(): service = StructuredTaskRunner( client=client, - prompt_registry=registry, + registry=registry, cache=cache, ) @@ -139,7 +139,7 @@ def test_run_cache_miss_for_different_payload(): service = StructuredTaskRunner( client=client, - prompt_registry=registry, + registry=registry, cache=cache, ) @@ -166,10 +166,10 @@ def test_run_validation_error(): service = StructuredTaskRunner( client=BadClient(), - prompt_registry=registry, + registry=registry, ) - with pytest.raises(RuntimeError, match="Task failed"): + with pytest.raises(RuntimeError, match="Prompt execution failed"): service.execute_prompt( prompt_name="test_task", prompt_version="v1", From f73d43edd2c1acfb70bc60e2f9f53f0886de0c82 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Wed, 24 Jun 2026 11:22:17 -0500 Subject: [PATCH 26/36] chore: update module docstrings for clarity --- src/wags_llm/registry/__init__.py | 4 ++-- src/wags_llm/registry/base.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/wags_llm/registry/__init__.py b/src/wags_llm/registry/__init__.py index e9faa20..47766ee 100644 --- a/src/wags_llm/registry/__init__.py +++ b/src/wags_llm/registry/__init__.py @@ -1,6 +1,6 @@ -"""Prompt interfaces and registry. +"""Registry module. -Define and manage versioned prompt templates. +Store and retrieve versioned prompt and skill templates. """ from wags_llm.registry.base import Registry, build_empty_registry diff --git a/src/wags_llm/registry/base.py b/src/wags_llm/registry/base.py index 1b5b874..3855970 100644 --- a/src/wags_llm/registry/base.py +++ b/src/wags_llm/registry/base.py @@ -1,6 +1,7 @@ """Registry. Maps (name, version) -> template instance. +Template instances can be either prompts or skills. Users typically: * create prompts or skills in their project From 59f728e4eb5019c107c9b63c7493675b9b8abb3d Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Wed, 24 Jun 2026 11:22:28 -0500 Subject: [PATCH 27/36] feat: implement TaskKind enum for structured task execution --- src/wags_llm/services/structured_task.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/wags_llm/services/structured_task.py b/src/wags_llm/services/structured_task.py index dd880c8..407df3e 100644 --- a/src/wags_llm/services/structured_task.py +++ b/src/wags_llm/services/structured_task.py @@ -16,8 +16,9 @@ import json import logging from collections.abc import Mapping +from enum import Enum from os import getenv -from typing import Any, Literal +from typing import Any from pydantic import BaseModel, ValidationError @@ -31,6 +32,13 @@ MAX_LOG_CHARS = int(getenv("MAX_LOG_CHARS", "500")) +class TaskKind(Enum): + """Enum for task types supported by StructuredTaskRunner.""" + + SKILL = "Skill" + PROMPT = "Prompt" + + class CacheLookupResult(BaseModel): """Result of a cache lookup. @@ -82,7 +90,7 @@ def execute_skill( version=skill_version, payload=payload, response_model=response_model, - kind="Skill", + kind=TaskKind.SKILL, ) def execute_prompt( @@ -106,7 +114,7 @@ def execute_prompt( version=prompt_version, payload=payload, response_model=response_model, - kind="Prompt", + kind=TaskKind.PROMPT, ) def _execute( @@ -115,7 +123,7 @@ def _execute( version: str, payload: Mapping[str, Any], response_model: type[BaseModel], - kind: Literal["Skill", "Prompt"], + kind: TaskKind, ) -> BaseModel: """Execute a task and return validated output. @@ -151,7 +159,7 @@ def _execute( self.cache.set(cache_result.cache_key, result.model_dump()) except (LLMClientError, ValidationError) as exc: - msg = f"{kind} execution failed for {name} version {version}: {exc}" + msg = f"{kind.value} execution failed for {name} version {version}: {exc}" _logger.exception(msg) raise RuntimeError(msg) from exc else: From be6eef2dd4cb7be7ac653f5e560a16316e9a7a46 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Wed, 24 Jun 2026 20:42:17 -0500 Subject: [PATCH 28/36] fix: update notebook examples to match refactored code structure --- notebooks/example.ipynb | 2 +- notebooks/skills.ipynb | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb index 8f434b3..a2cbd6b 100644 --- a/notebooks/example.ipynb +++ b/notebooks/example.ipynb @@ -227,7 +227,7 @@ ], "metadata": { "kernelspec": { - "display_name": "wags-llm", + "display_name": "wags-llm (3.11.14)", "language": "python", "name": "python3" }, diff --git a/notebooks/skills.ipynb b/notebooks/skills.ipynb index 3396ee4..028ab54 100644 --- a/notebooks/skills.ipynb +++ b/notebooks/skills.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "e9161ef9", "metadata": {}, "outputs": [], @@ -26,9 +26,9 @@ "from pydantic import BaseModel, ConfigDict\n", "\n", "from wags_llm.client.bedrock import BedrockClaudeJsonClient\n", + "from wags_llm.registry.base import Registry\n", "from wags_llm.services.structured_task import StructuredTaskRunner\n", - "from wags_llm.skills.base import BaseSkillTemplate\n", - "from wags_llm.skills.registry import SkillRegistry\n", + "from wags_llm.templates.skill_template import SkillTemplate\n", "\n", "logging.basicConfig(\n", " stream=sys.stdout,\n", @@ -45,7 +45,7 @@ "metadata": {}, "outputs": [], "source": [ - "class VariantCurationSkill(BaseSkillTemplate):\n", + "class VariantCurationSkill(SkillTemplate):\n", " skill_path = Path(\"skills/variant_curation_0.1.0.md\")\n", "\n", " def build_user_prompt(self, payload: Mapping[str, Any]) -> str:\n", @@ -96,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "c44eeb11", "metadata": {}, "outputs": [ @@ -111,7 +111,7 @@ } ], "source": [ - "registry = SkillRegistry()\n", + "registry = Registry()\n", "registry.register(skill)\n", "\n", "MODEL_ID = \"us.anthropic.claude-sonnet-4-6\"\n", @@ -129,13 +129,13 @@ "\n", "task_runner = StructuredTaskRunner(\n", " client=llm_client,\n", - " skill_registry=registry,\n", + " registry=registry,\n", ")" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "d045cf30", "metadata": {}, "outputs": [ @@ -201,7 +201,7 @@ ], "metadata": { "kernelspec": { - "display_name": "wags-llm (3.13.5)", + "display_name": "wags-llm (3.11.14)", "language": "python", "name": "python3" }, @@ -215,7 +215,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.11.14" } }, "nbformat": 4, From 8966f74f92594967bd7c3538dc6f6b2afd757dd6 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 25 Jun 2026 12:49:17 -0500 Subject: [PATCH 29/36] Add task type to registry keys --- src/wags_llm/registry/__init__.py | 3 +- src/wags_llm/registry/base.py | 57 +++++++++++++++++++++--- src/wags_llm/services/structured_task.py | 22 +++------ 3 files changed, 59 insertions(+), 23 deletions(-) diff --git a/src/wags_llm/registry/__init__.py b/src/wags_llm/registry/__init__.py index 47766ee..f1f66db 100644 --- a/src/wags_llm/registry/__init__.py +++ b/src/wags_llm/registry/__init__.py @@ -3,9 +3,10 @@ Store and retrieve versioned prompt and skill templates. """ -from wags_llm.registry.base import Registry, build_empty_registry +from wags_llm.registry.base import Registry, TaskType, build_empty_registry __all__ = [ "Registry", + "TaskType", "build_empty_registry", ] diff --git a/src/wags_llm/registry/base.py b/src/wags_llm/registry/base.py index 3855970..81df788 100644 --- a/src/wags_llm/registry/base.py +++ b/src/wags_llm/registry/base.py @@ -1,6 +1,6 @@ """Registry. -Maps (name, version) -> template instance. +Maps (name, version, task_type) -> template instance. Template instances can be either prompts or skills. Users typically: @@ -9,6 +9,7 @@ """ import logging +from enum import Enum from wags_llm.templates.base import PromptTemplate from wags_llm.templates.skill_template import SkillTemplate @@ -16,40 +17,82 @@ _logger = logging.getLogger(__name__) +class TaskType(Enum): + """Enum for task types supported by StructuredTaskRunner.""" + + SKILL = "skill" + PROMPT = "prompt" + + class Registry: """Store and retrieve prompt and skill templates.""" def __init__(self) -> None: """Initialize an empty template registry.""" - self._templates: dict[tuple[str, str], PromptTemplate | SkillTemplate] = {} + self._templates: dict[ + tuple[str, str, TaskType], PromptTemplate | SkillTemplate + ] = {} def register(self, template: PromptTemplate | SkillTemplate) -> None: """Register a template. :param template: Template instance to register. """ + task_type = self._get_task_type(template) + + key = (template.name, template.version, task_type) + _logger.debug( - "Registering template: name='%s', version='%s'", + "Registering template: name='%s', version='%s', task_type='%s'", template.name, template.version, + task_type.value, ) - self._templates[(template.name, template.version)] = template - def get(self, name: str, version: str) -> PromptTemplate | SkillTemplate: + if key in self._templates: + msg = f"Template already registered:({template.name}, {template.version}, {task_type.value})" + _logger.error(msg) + raise ValueError(msg) + + self._templates[key] = template + + def get( + self, + name: str, + version: str, + task_type: TaskType, + ) -> PromptTemplate | SkillTemplate: """Retrieve a template by name and version. :param name: Template name. :param version: Template version. + :param task_type: Template type. :return: Registered template. :raise KeyError: If template not found. """ + key = (name, version, task_type) + try: - return self._templates[(name, version)] + return self._templates[key] except KeyError as exc: - msg = f"Template not found: ({name}, {version})" + msg = f"Template not found: ({name}, {version}, {task_type.value})" _logger.exception(msg) raise KeyError(msg) from exc + def _get_task_type(self, template: PromptTemplate | SkillTemplate) -> TaskType: + """Determine the task type for a template instance. + + :param template: Template instance to inspect. + :return: Task type corresponding to the template. + :raise TypeError: If the template type is unsupported. + """ + if isinstance(template, SkillTemplate): + return TaskType.SKILL + if isinstance(template, PromptTemplate): + return TaskType.PROMPT + msg = f"Unsupported template type: {type(template)}" + raise TypeError(msg) + def build_empty_registry() -> Registry: """Create an empty registry. diff --git a/src/wags_llm/services/structured_task.py b/src/wags_llm/services/structured_task.py index 407df3e..475748d 100644 --- a/src/wags_llm/services/structured_task.py +++ b/src/wags_llm/services/structured_task.py @@ -16,7 +16,6 @@ import json import logging from collections.abc import Mapping -from enum import Enum from os import getenv from typing import Any @@ -25,20 +24,13 @@ from wags_llm.cache.base import BaseCache from wags_llm.client.base import LLMJsonClient from wags_llm.client.exceptions import LLMClientError -from wags_llm.registry import Registry, build_empty_registry +from wags_llm.registry import Registry, TaskType, build_empty_registry _logger = logging.getLogger(__name__) MAX_LOG_CHARS = int(getenv("MAX_LOG_CHARS", "500")) -class TaskKind(Enum): - """Enum for task types supported by StructuredTaskRunner.""" - - SKILL = "Skill" - PROMPT = "Prompt" - - class CacheLookupResult(BaseModel): """Result of a cache lookup. @@ -90,7 +82,7 @@ def execute_skill( version=skill_version, payload=payload, response_model=response_model, - kind=TaskKind.SKILL, + task_type=TaskType.SKILL, ) def execute_prompt( @@ -114,7 +106,7 @@ def execute_prompt( version=prompt_version, payload=payload, response_model=response_model, - kind=TaskKind.PROMPT, + task_type=TaskType.PROMPT, ) def _execute( @@ -123,7 +115,7 @@ def _execute( version: str, payload: Mapping[str, Any], response_model: type[BaseModel], - kind: TaskKind, + task_type: TaskType, ) -> BaseModel: """Execute a task and return validated output. @@ -131,11 +123,11 @@ def _execute( :param version: Registered task version. :param payload: JSON-serializable task data. :param response_model: Pydantic model for validation. - :param kind: Display label for the registered task type, either "Skill" or "Prompt". + :param task_type: Registered task type, either skill or prompt. :return: Validated task result. :raise RuntimeError: If execution or validation fails. """ - registered_task = self.registry.get(name, version) + registered_task = self.registry.get(name, version, task_type) cache_result = self._check_cache( name=name, @@ -159,7 +151,7 @@ def _execute( self.cache.set(cache_result.cache_key, result.model_dump()) except (LLMClientError, ValidationError) as exc: - msg = f"{kind.value} execution failed for {name} version {version}: {exc}" + msg = f"{task_type.value} execution failed for {name} version {version}: {exc}" _logger.exception(msg) raise RuntimeError(msg) from exc else: From 26fec9a3b733282edb1cece8e586fae6620e55e6 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Thu, 25 Jun 2026 12:50:23 -0500 Subject: [PATCH 30/36] Update tests for task-type registry lookup --- tests/integration/services/test_json_task.py | 2 +- tests/unit/prompts/test_registry.py | 63 ++++++++++++++++++-- tests/unit/prompts/test_registry_0.1.0.md | 55 +++++++++++++++++ tests/unit/skills/test_skill_json_task.py | 2 +- tests/unit/skills/test_skill_registry.py | 10 ++-- 5 files changed, 120 insertions(+), 12 deletions(-) create mode 100644 tests/unit/prompts/test_registry_0.1.0.md diff --git a/tests/integration/services/test_json_task.py b/tests/integration/services/test_json_task.py index 92fb791..6826e4b 100644 --- a/tests/integration/services/test_json_task.py +++ b/tests/integration/services/test_json_task.py @@ -169,7 +169,7 @@ def test_run_validation_error(): registry=registry, ) - with pytest.raises(RuntimeError, match="Prompt execution failed"): + with pytest.raises(RuntimeError, match="prompt execution failed"): service.execute_prompt( prompt_name="test_task", prompt_version="v1", diff --git a/tests/unit/prompts/test_registry.py b/tests/unit/prompts/test_registry.py index 1d9e803..ecfa533 100644 --- a/tests/unit/prompts/test_registry.py +++ b/tests/unit/prompts/test_registry.py @@ -2,19 +2,41 @@ import re from collections.abc import Mapping + +# NEW +from pathlib import Path from typing import Any import pytest -from wags_llm.registry import Registry, build_empty_registry +from wags_llm.registry import Registry, TaskType, build_empty_registry from wags_llm.templates.base import PromptTemplate +from wags_llm.templates.skill_template import SkillTemplate + + +class DummySkill(SkillTemplate): + """Simple skill for registry tests.""" + + skill_path = Path("tests/unit/prompts/test_registry_0.1.0.md") + + def build_user_prompt(self, payload: Mapping[str, Any]) -> str: + """Build the user prompt. + + :param payload: JSON-serializable task data. + + Example: + payload = {"text": "hello"} + + :return: User prompt string. + """ + return f"Payload: {payload}" class DummyPrompt(PromptTemplate): """Simple prompt for registry tests.""" - name = "test_task" - version = "v1" + name = "test_registry" + version = "0.1.0" def build_system_prompt(self) -> str: """Build the system prompt. @@ -43,13 +65,42 @@ def test_register_and_get_prompt(): registry.register(prompt) - assert registry.get("test_task", "v1") is prompt + assert registry.get("test_registry", "0.1.0", TaskType.PROMPT) is prompt def test_build_empty_registry(): """Test that build_empty_registry works correctly and prompt registry raises KeyError when no prompts are registered""" registry = build_empty_registry() with pytest.raises( - KeyError, match=re.escape("'Template not found: (test_task, v1)'") + KeyError, + match=re.escape("'Template not found: (test_registry, 0.1.0, prompt)'"), + ): + assert registry.get("test_registry", "0.1.0", TaskType.PROMPT) + + +def test_prompt_and_skill_can_share_name_and_version(): + """Register prompt and skill with same name/version.""" + registry = Registry() + + prompt = DummyPrompt() + skill = DummySkill() + + registry.register(prompt) + registry.register(skill) + + assert registry.get("test_registry", "0.1.0", TaskType.PROMPT) is prompt + assert registry.get("test_registry", "0.1.0", TaskType.SKILL) is skill + + +def test_registering_duplicate_skill_raises_value_error(): + """Raise ValueError when registering duplicate skill name/version.""" + registry = Registry() + skill = DummySkill() + + registry.register(skill) + + with pytest.raises( + ValueError, + match=re.escape("Template already registered:(test_registry, 0.1.0, skill)"), ): - assert registry.get("test_task", "v1") + registry.register(DummySkill()) diff --git a/tests/unit/prompts/test_registry_0.1.0.md b/tests/unit/prompts/test_registry_0.1.0.md new file mode 100644 index 0000000..adc4542 --- /dev/null +++ b/tests/unit/prompts/test_registry_0.1.0.md @@ -0,0 +1,55 @@ +--- +name: test-registry +description: A helpful assistant that processes text input and returns a JSON + object with a value field set to 1. +--- + +# Test Skill + +## Overview + +A simple skill that processes text input and returns a structured JSON response. + +## When to Use + +Use when you need to process a text input and receive a standardized JSON output with a value field. + +## Instructions + +You are a helpful assistant. Return a JSON object with a `value` field set to `1`. + +## Input Format + +The input will be a JSON object with the following structure: + +```json +{ + "text": "the text to process" +} +``` + +## Output Format + +Return a JSON object matching the provided schema: + +```json +{ + "value": 1 +} +``` + +## Examples + +### Input +```json +{ + "text": "hello" +} +``` + +### Output +```json +{ + "value": 1 +} +``` diff --git a/tests/unit/skills/test_skill_json_task.py b/tests/unit/skills/test_skill_json_task.py index a35c80b..8accb50 100644 --- a/tests/unit/skills/test_skill_json_task.py +++ b/tests/unit/skills/test_skill_json_task.py @@ -195,7 +195,7 @@ def test_execute_skill_validation_error(): registry=registry, ) - with pytest.raises(RuntimeError, match="Skill execution failed"): + with pytest.raises(RuntimeError, match="skill execution failed"): service.execute_skill( skill_name="test_skill", skill_version="0.1.0", diff --git a/tests/unit/skills/test_skill_registry.py b/tests/unit/skills/test_skill_registry.py index b40ded0..53bbe18 100644 --- a/tests/unit/skills/test_skill_registry.py +++ b/tests/unit/skills/test_skill_registry.py @@ -5,11 +5,13 @@ import pytest -from wags_llm.registry import Registry, build_empty_registry +from wags_llm.registry import Registry, TaskType, build_empty_registry from wags_llm.templates.skill_template import SkillTemplate, SkillTemplateError class DummySkill(SkillTemplate): + """Simple skill for registry tests.""" + skill_path = Path("tests/unit/skills/test_skill_0.1.0.md") def build_user_prompt(self, payload: Mapping[str, Any]) -> str: @@ -31,15 +33,15 @@ def test_register_and_get_skill(): registry.register(skill) - assert registry.get("test_skill", "0.1.0") is skill + assert registry.get("test_skill", "0.1.0", TaskType.SKILL) is skill def test_build_empty_registry(): registry = build_empty_registry() with pytest.raises( - KeyError, match=re.escape("'Template not found: (test_skill, 0.1.0)'") + KeyError, match=re.escape("'Template not found: (test_skill, 0.1.0, skill)'") ): - assert registry.get("test_skill", "0.1.0") + assert registry.get("test_skill", "0.1.0", TaskType.SKILL) def test_invalid_skill_filename(): From a3845891bdf47fbd62f312c4f272c513425e5f73 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Tue, 30 Jun 2026 14:38:55 -0500 Subject: [PATCH 31/36] test: move shared example file to tests/examples --- .../test_example_0.1.0.md} | 0 tests/unit/prompts/test_registry_0.1.0.md | 55 ------------------- 2 files changed, 55 deletions(-) rename tests/{unit/skills/test_skill_0.1.0.md => examples/test_example_0.1.0.md} (100%) delete mode 100644 tests/unit/prompts/test_registry_0.1.0.md diff --git a/tests/unit/skills/test_skill_0.1.0.md b/tests/examples/test_example_0.1.0.md similarity index 100% rename from tests/unit/skills/test_skill_0.1.0.md rename to tests/examples/test_example_0.1.0.md diff --git a/tests/unit/prompts/test_registry_0.1.0.md b/tests/unit/prompts/test_registry_0.1.0.md deleted file mode 100644 index adc4542..0000000 --- a/tests/unit/prompts/test_registry_0.1.0.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -name: test-registry -description: A helpful assistant that processes text input and returns a JSON - object with a value field set to 1. ---- - -# Test Skill - -## Overview - -A simple skill that processes text input and returns a structured JSON response. - -## When to Use - -Use when you need to process a text input and receive a standardized JSON output with a value field. - -## Instructions - -You are a helpful assistant. Return a JSON object with a `value` field set to `1`. - -## Input Format - -The input will be a JSON object with the following structure: - -```json -{ - "text": "the text to process" -} -``` - -## Output Format - -Return a JSON object matching the provided schema: - -```json -{ - "value": 1 -} -``` - -## Examples - -### Input -```json -{ - "text": "hello" -} -``` - -### Output -```json -{ - "value": 1 -} -``` From 6c6406446d6cd73de2e1a08f3b2459491cbeffb3 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Tue, 30 Jun 2026 14:40:09 -0500 Subject: [PATCH 32/36] test: combine prompt and skill registry tests --- .../prompts => registry}/test_registry.py | 72 ++++++++++++------- tests/unit/skills/test_skill_registry.py | 57 --------------- tests/unit/skills/test_skill_v1.md | 55 -------------- 3 files changed, 47 insertions(+), 137 deletions(-) rename tests/{unit/prompts => registry}/test_registry.py (59%) delete mode 100644 tests/unit/skills/test_skill_registry.py delete mode 100644 tests/unit/skills/test_skill_v1.md diff --git a/tests/unit/prompts/test_registry.py b/tests/registry/test_registry.py similarity index 59% rename from tests/unit/prompts/test_registry.py rename to tests/registry/test_registry.py index ecfa533..4a60834 100644 --- a/tests/unit/prompts/test_registry.py +++ b/tests/registry/test_registry.py @@ -2,22 +2,29 @@ import re from collections.abc import Mapping - -# NEW from pathlib import Path from typing import Any import pytest -from wags_llm.registry import Registry, TaskType, build_empty_registry -from wags_llm.templates.base import PromptTemplate -from wags_llm.templates.skill_template import SkillTemplate +from wags_llm.registry import Registry, build_empty_registry +from wags_llm.templates import TemplateType +from wags_llm.templates.prompt_template import PromptTemplate +from wags_llm.templates.skill_template import SkillTemplate, SkillTemplateError -class DummySkill(SkillTemplate): - """Simple skill for registry tests.""" +class DummyPrompt(PromptTemplate): + """Simple prompt for registry tests.""" - skill_path = Path("tests/unit/prompts/test_registry_0.1.0.md") + name = "test_example" + version = "0.1.0" + + def build_system_prompt(self) -> str: + """Build the system prompt. + + :return: System prompt string. + """ + return "Return valid JSON only." def build_user_prompt(self, payload: Mapping[str, Any]) -> str: """Build the user prompt. @@ -32,18 +39,10 @@ def build_user_prompt(self, payload: Mapping[str, Any]) -> str: return f"Payload: {payload}" -class DummyPrompt(PromptTemplate): - """Simple prompt for registry tests.""" - - name = "test_registry" - version = "0.1.0" - - def build_system_prompt(self) -> str: - """Build the system prompt. +class DummySkill(SkillTemplate): + """Simple skill for registry tests.""" - :return: System prompt string. - """ - return "Return valid JSON only." + skill_path = Path("tests/examples/test_example_0.1.0.md") def build_user_prompt(self, payload: Mapping[str, Any]) -> str: """Build the user prompt. @@ -65,7 +64,17 @@ def test_register_and_get_prompt(): registry.register(prompt) - assert registry.get("test_registry", "0.1.0", TaskType.PROMPT) is prompt + assert registry.get("test_example", "0.1.0", TemplateType.PROMPT) is prompt + + +def test_register_and_get_skill(): + """Register and retrieve a skill.""" + registry = Registry() + skill = DummySkill() + + registry.register(skill) + + assert registry.get("test_example", "0.1.0", TemplateType.SKILL) is skill def test_build_empty_registry(): @@ -73,9 +82,22 @@ def test_build_empty_registry(): registry = build_empty_registry() with pytest.raises( KeyError, - match=re.escape("'Template not found: (test_registry, 0.1.0, prompt)'"), + match=re.escape("'Template not found: (test_example, 0.1.0, prompt)'"), ): - assert registry.get("test_registry", "0.1.0", TaskType.PROMPT) + assert registry.get("test_example", "0.1.0", TemplateType.PROMPT) + + +def test_invalid_skill_filename(): + """Test that an invalid skill filename raises SkillTemplateError.""" + + class InvalidSkill(SkillTemplate): + skill_path = Path("tests/examples/invalid.md") + + def build_user_prompt(self, payload) -> str: + return f"Payload: {payload}" + + with pytest.raises(SkillTemplateError): + _ = InvalidSkill().name def test_prompt_and_skill_can_share_name_and_version(): @@ -88,8 +110,8 @@ def test_prompt_and_skill_can_share_name_and_version(): registry.register(prompt) registry.register(skill) - assert registry.get("test_registry", "0.1.0", TaskType.PROMPT) is prompt - assert registry.get("test_registry", "0.1.0", TaskType.SKILL) is skill + assert registry.get("test_example", "0.1.0", TemplateType.PROMPT) is prompt + assert registry.get("test_example", "0.1.0", TemplateType.SKILL) is skill def test_registering_duplicate_skill_raises_value_error(): @@ -101,6 +123,6 @@ def test_registering_duplicate_skill_raises_value_error(): with pytest.raises( ValueError, - match=re.escape("Template already registered:(test_registry, 0.1.0, skill)"), + match=re.escape("Template already registered:(test_example, 0.1.0, skill)"), ): registry.register(DummySkill()) diff --git a/tests/unit/skills/test_skill_registry.py b/tests/unit/skills/test_skill_registry.py deleted file mode 100644 index 53bbe18..0000000 --- a/tests/unit/skills/test_skill_registry.py +++ /dev/null @@ -1,57 +0,0 @@ -import re -from collections.abc import Mapping -from pathlib import Path -from typing import Any - -import pytest - -from wags_llm.registry import Registry, TaskType, build_empty_registry -from wags_llm.templates.skill_template import SkillTemplate, SkillTemplateError - - -class DummySkill(SkillTemplate): - """Simple skill for registry tests.""" - - skill_path = Path("tests/unit/skills/test_skill_0.1.0.md") - - def build_user_prompt(self, payload: Mapping[str, Any]) -> str: - """Build the user prompt. - - :param payload: JSON-serializable task data. - - Example: - payload = {"text": "hello"} - - :return: User prompt string. - """ - return f"Payload: {payload}" - - -def test_register_and_get_skill(): - registry = Registry() - skill = DummySkill() - - registry.register(skill) - - assert registry.get("test_skill", "0.1.0", TaskType.SKILL) is skill - - -def test_build_empty_registry(): - registry = build_empty_registry() - with pytest.raises( - KeyError, match=re.escape("'Template not found: (test_skill, 0.1.0, skill)'") - ): - assert registry.get("test_skill", "0.1.0", TaskType.SKILL) - - -def test_invalid_skill_filename(): - """Test that an invalid skill filename raises SkillTemplateError.""" - - class InvalidSkill(SkillTemplate): - skill_path = Path("tests/unit/skills/invalid.md") - - def build_user_prompt(self, payload) -> str: - return f"Payload: {payload}" - - with pytest.raises(SkillTemplateError): - _ = InvalidSkill().name diff --git a/tests/unit/skills/test_skill_v1.md b/tests/unit/skills/test_skill_v1.md deleted file mode 100644 index c14b955..0000000 --- a/tests/unit/skills/test_skill_v1.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -name: test-skill -description: A helpful assistant that processes text input and returns a JSON - object with a value field set to 1. ---- - -# Test Skill - -## Overview - -A simple skill that processes text input and returns a structured JSON response. - -## When to Use - -Use when you need to process a text input and receive a standardized JSON output with a value field. - -## Instructions - -You are a helpful assistant. Return a JSON object with a `value` field set to `1`. - -## Input Format - -The input will be a JSON object with the following structure: - -```json -{ - "text": "the text to process" -} -``` - -## Output Format - -Return a JSON object matching the provided schema: - -```json -{ - "value": 1 -} -``` - -## Examples - -### Input -```json -{ - "text": "hello" -} -``` - -### Output -```json -{ - "value": 1 -} -``` From a52d93c95d73f22efb748358a0de4a5a4180104a Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Tue, 30 Jun 2026 14:43:13 -0500 Subject: [PATCH 33/36] refactor: split prompt template into base template and updated paths --- src/wags_llm/services/structured_task.py | 15 ++++++++------- src/wags_llm/templates/__init__.py | 5 ++++- src/wags_llm/templates/base.py | 18 +++++++++++++----- src/wags_llm/templates/prompt_template.py | 15 +++++++++++++++ src/wags_llm/templates/skill_template.py | 7 ++++--- tests/integration/services/test_json_task.py | 2 +- tests/unit/skills/test_skill_json_task.py | 16 ++++++++-------- 7 files changed, 53 insertions(+), 25 deletions(-) create mode 100644 src/wags_llm/templates/prompt_template.py diff --git a/src/wags_llm/services/structured_task.py b/src/wags_llm/services/structured_task.py index 475748d..b0d79bc 100644 --- a/src/wags_llm/services/structured_task.py +++ b/src/wags_llm/services/structured_task.py @@ -24,7 +24,8 @@ from wags_llm.cache.base import BaseCache from wags_llm.client.base import LLMJsonClient from wags_llm.client.exceptions import LLMClientError -from wags_llm.registry import Registry, TaskType, build_empty_registry +from wags_llm.registry import Registry, build_empty_registry +from wags_llm.templates import TemplateType _logger = logging.getLogger(__name__) @@ -82,7 +83,7 @@ def execute_skill( version=skill_version, payload=payload, response_model=response_model, - task_type=TaskType.SKILL, + template_type=TemplateType.SKILL, ) def execute_prompt( @@ -106,7 +107,7 @@ def execute_prompt( version=prompt_version, payload=payload, response_model=response_model, - task_type=TaskType.PROMPT, + template_type=TemplateType.PROMPT, ) def _execute( @@ -115,7 +116,7 @@ def _execute( version: str, payload: Mapping[str, Any], response_model: type[BaseModel], - task_type: TaskType, + template_type: TemplateType, ) -> BaseModel: """Execute a task and return validated output. @@ -123,11 +124,11 @@ def _execute( :param version: Registered task version. :param payload: JSON-serializable task data. :param response_model: Pydantic model for validation. - :param task_type: Registered task type, either skill or prompt. + :param template_type: Registered template type, either skill or prompt. :return: Validated task result. :raise RuntimeError: If execution or validation fails. """ - registered_task = self.registry.get(name, version, task_type) + registered_task = self.registry.get(name, version, template_type) cache_result = self._check_cache( name=name, @@ -151,7 +152,7 @@ def _execute( self.cache.set(cache_result.cache_key, result.model_dump()) except (LLMClientError, ValidationError) as exc: - msg = f"{task_type.value} execution failed for {name} version {version}: {exc}" + msg = f"{template_type.value} execution failed for {name} version {version}: {exc}" _logger.exception(msg) raise RuntimeError(msg) from exc else: diff --git a/src/wags_llm/templates/__init__.py b/src/wags_llm/templates/__init__.py index 521c6a9..ead7b4f 100644 --- a/src/wags_llm/templates/__init__.py +++ b/src/wags_llm/templates/__init__.py @@ -3,10 +3,13 @@ Define and manage versioned prompt templates. """ -from wags_llm.templates.base import PromptTemplate +from wags_llm.templates.base import BaseTemplate, TemplateType +from wags_llm.templates.prompt_template import PromptTemplate from wags_llm.templates.skill_template import SkillTemplate __all__ = [ + "BaseTemplate", "PromptTemplate", "SkillTemplate", + "TemplateType", ] diff --git a/src/wags_llm/templates/base.py b/src/wags_llm/templates/base.py index e00158d..5b30def 100644 --- a/src/wags_llm/templates/base.py +++ b/src/wags_llm/templates/base.py @@ -1,18 +1,26 @@ -"""Prompt interface. +"""Base template interface. Users extend this to define new tasks. """ from abc import ABC, abstractmethod from collections.abc import Mapping +from enum import Enum from typing import Any -class PromptTemplate(ABC): - """Base prompt template. +class TemplateType(Enum): + """Enum for template types supported by StructuredTaskRunner.""" - :var name: Prompt name. - :var version: Prompt version. + SKILL = "skill" + PROMPT = "prompt" + + +class BaseTemplate(ABC): + """Base template. + + :var name: Template name. + :var version: Template version. """ name: str diff --git a/src/wags_llm/templates/prompt_template.py b/src/wags_llm/templates/prompt_template.py new file mode 100644 index 0000000..b2fe43d --- /dev/null +++ b/src/wags_llm/templates/prompt_template.py @@ -0,0 +1,15 @@ +"""Prompt interface. + +Users extend this to define new tasks. +""" + +from wags_llm.templates.base import BaseTemplate, TemplateType + + +class PromptTemplate(BaseTemplate): + """Prompt template. + + :var template_type: Identifies this as a prompt template; always set to TemplateType.PROMPT + """ + + template_type = TemplateType.PROMPT diff --git a/src/wags_llm/templates/skill_template.py b/src/wags_llm/templates/skill_template.py index 3ff14bd..35475cf 100644 --- a/src/wags_llm/templates/skill_template.py +++ b/src/wags_llm/templates/skill_template.py @@ -7,7 +7,7 @@ import re from pathlib import Path -from wags_llm.templates.base import PromptTemplate +from wags_llm.templates.base import BaseTemplate, TemplateType logger = logging.getLogger(__name__) @@ -16,17 +16,18 @@ class SkillTemplateError(Exception): """Raise custom exceptions for SkillTemplateError.""" -class SkillTemplate(PromptTemplate): +class SkillTemplate(BaseTemplate): """Base skill template. :var skill_path: Path to the skill `.md` file. Must follow the format {skill_name}_{version}.md (e.g. entity_detection_v1.md). If the filename does not follow this format, a SkillTemplateError will be raised on initialization. + :var template_type: Identifies this as a skill template; always set to TemplateType.SKILL """ skill_path: Path - + template_type = TemplateType.SKILL _skill_file_pattern = re.compile(r"^(?P.+)_(?P[^_]+)\.md$") def __init__(self) -> None: diff --git a/tests/integration/services/test_json_task.py b/tests/integration/services/test_json_task.py index 6826e4b..65ccedb 100644 --- a/tests/integration/services/test_json_task.py +++ b/tests/integration/services/test_json_task.py @@ -9,7 +9,7 @@ from wags_llm.client.base import InvokeJsonResponse, LLMJsonClient from wags_llm.registry import Registry from wags_llm.services.structured_task import StructuredTaskRunner -from wags_llm.templates.base import PromptTemplate +from wags_llm.templates.prompt_template import PromptTemplate class DummyPrompt(PromptTemplate): diff --git a/tests/unit/skills/test_skill_json_task.py b/tests/unit/skills/test_skill_json_task.py index 8accb50..efdefcf 100644 --- a/tests/unit/skills/test_skill_json_task.py +++ b/tests/unit/skills/test_skill_json_task.py @@ -16,7 +16,7 @@ class DummySkill(SkillTemplate): """Simple skill for service tests.""" - skill_path = Path("tests/unit/skills/test_skill_0.1.0.md") + skill_path = Path("tests/examples/test_example_0.1.0.md") def build_user_prompt(self, payload) -> str: """Build the user prompt.""" @@ -26,7 +26,7 @@ def build_user_prompt(self, payload) -> str: class MissingFileSkill(SkillTemplate): """Missing skill file for service tests.""" - skill_path = Path("tests/unit/skills/does_not_exist_0.1.0.md") + skill_path = Path("tests/examples/does_not_exist_0.1.0.md") def build_user_prompt(self, payload) -> str: """Build the user prompt.""" @@ -96,7 +96,7 @@ def test_execute_skill_success(): ) result = service.execute_skill( - skill_name="test_skill", + skill_name="test_example", skill_version="0.1.0", payload={"text": "hello"}, response_model=ResultModel, @@ -139,13 +139,13 @@ def test_execute_skill_uses_cache(): ) result1 = service.execute_skill( - skill_name="test_skill", + skill_name="test_example", skill_version="0.1.0", payload={"x": 1}, response_model=ResultModel, ) result2 = service.execute_skill( - skill_name="test_skill", + skill_name="test_example", skill_version="0.1.0", payload={"x": 1}, response_model=ResultModel, @@ -170,13 +170,13 @@ def test_execute_skill_cache_miss_for_different_payload(): ) service.execute_skill( - skill_name="test_skill", + skill_name="test_example", skill_version="0.1.0", payload={"x": 1}, response_model=ResultModel, ) service.execute_skill( - skill_name="test_skill", + skill_name="test_example", skill_version="0.1.0", payload={"x": 2}, response_model=ResultModel, @@ -197,7 +197,7 @@ def test_execute_skill_validation_error(): with pytest.raises(RuntimeError, match="skill execution failed"): service.execute_skill( - skill_name="test_skill", + skill_name="test_example", skill_version="0.1.0", payload={"text": "hello"}, response_model=ResultModel, From f109c0b296dd2979427ec8e4430b1e9dfebc6c8e Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Tue, 30 Jun 2026 14:45:36 -0500 Subject: [PATCH 34/36] feat: add template type to registry for duplicate skill and prompt entries --- src/wags_llm/registry/__init__.py | 3 +- src/wags_llm/registry/base.py | 61 ++++++++++++++----------------- 2 files changed, 29 insertions(+), 35 deletions(-) diff --git a/src/wags_llm/registry/__init__.py b/src/wags_llm/registry/__init__.py index f1f66db..47766ee 100644 --- a/src/wags_llm/registry/__init__.py +++ b/src/wags_llm/registry/__init__.py @@ -3,10 +3,9 @@ Store and retrieve versioned prompt and skill templates. """ -from wags_llm.registry.base import Registry, TaskType, build_empty_registry +from wags_llm.registry.base import Registry, build_empty_registry __all__ = [ "Registry", - "TaskType", "build_empty_registry", ] diff --git a/src/wags_llm/registry/base.py b/src/wags_llm/registry/base.py index 81df788..37cf38e 100644 --- a/src/wags_llm/registry/base.py +++ b/src/wags_llm/registry/base.py @@ -1,6 +1,6 @@ """Registry. -Maps (name, version, task_type) -> template instance. +Maps (name, version, TemplateType) -> template instance. Template instances can be either prompts or skills. Users typically: @@ -9,19 +9,20 @@ """ import logging -from enum import Enum +from types import MappingProxyType -from wags_llm.templates.base import PromptTemplate +from wags_llm.templates.base import TemplateType +from wags_llm.templates.prompt_template import PromptTemplate from wags_llm.templates.skill_template import SkillTemplate _logger = logging.getLogger(__name__) - -class TaskType(Enum): - """Enum for task types supported by StructuredTaskRunner.""" - - SKILL = "skill" - PROMPT = "prompt" +_TEMPLATE_CLASS_TO_TYPE = MappingProxyType( + { + SkillTemplate: TemplateType.SKILL, + PromptTemplate: TemplateType.PROMPT, + } +) class Registry: @@ -30,27 +31,35 @@ class Registry: def __init__(self) -> None: """Initialize an empty template registry.""" self._templates: dict[ - tuple[str, str, TaskType], PromptTemplate | SkillTemplate + tuple[str, str, TemplateType], PromptTemplate | SkillTemplate ] = {} def register(self, template: PromptTemplate | SkillTemplate) -> None: """Register a template. :param template: Template instance to register. + :raise TypeError: If the template type is unsupported. + :raise ValueError: If a template with the same name, version, and template type is already registered. """ - task_type = self._get_task_type(template) + for cls, mapped_type in _TEMPLATE_CLASS_TO_TYPE.items(): + if isinstance(template, cls): + template_type = mapped_type + break + else: + msg = f"Unsupported template type: {type(template)}" + raise TypeError(msg) - key = (template.name, template.version, task_type) + key = (template.name, template.version, template_type) _logger.debug( - "Registering template: name='%s', version='%s', task_type='%s'", + "Registering template: name='%s', version='%s', template_type='%s'", template.name, template.version, - task_type.value, + template_type.value, ) if key in self._templates: - msg = f"Template already registered:({template.name}, {template.version}, {task_type.value})" + msg = f"Template already registered:({template.name}, {template.version}, {template_type.value})" _logger.error(msg) raise ValueError(msg) @@ -60,39 +69,25 @@ def get( self, name: str, version: str, - task_type: TaskType, + template_type: TemplateType, ) -> PromptTemplate | SkillTemplate: """Retrieve a template by name and version. :param name: Template name. :param version: Template version. - :param task_type: Template type. + :param template_type: Template type. :return: Registered template. :raise KeyError: If template not found. """ - key = (name, version, task_type) + key = (name, version, template_type) try: return self._templates[key] except KeyError as exc: - msg = f"Template not found: ({name}, {version}, {task_type.value})" + msg = f"Template not found: ({name}, {version}, {template_type.value})" _logger.exception(msg) raise KeyError(msg) from exc - def _get_task_type(self, template: PromptTemplate | SkillTemplate) -> TaskType: - """Determine the task type for a template instance. - - :param template: Template instance to inspect. - :return: Task type corresponding to the template. - :raise TypeError: If the template type is unsupported. - """ - if isinstance(template, SkillTemplate): - return TaskType.SKILL - if isinstance(template, PromptTemplate): - return TaskType.PROMPT - msg = f"Unsupported template type: {type(template)}" - raise TypeError(msg) - def build_empty_registry() -> Registry: """Create an empty registry. From 3041117ba5d92075c72edc9fafbee00c9b56c735 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Wed, 1 Jul 2026 09:54:39 -0500 Subject: [PATCH 35/36] docs: clean up template and registry documentation --- src/wags_llm/registry/base.py | 2 +- src/wags_llm/templates/__init__.py | 2 +- src/wags_llm/templates/base.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/wags_llm/registry/base.py b/src/wags_llm/registry/base.py index 37cf38e..632e3d2 100644 --- a/src/wags_llm/registry/base.py +++ b/src/wags_llm/registry/base.py @@ -71,7 +71,7 @@ def get( version: str, template_type: TemplateType, ) -> PromptTemplate | SkillTemplate: - """Retrieve a template by name and version. + """Retrieve a template by name, version, and template type. :param name: Template name. :param version: Template version. diff --git a/src/wags_llm/templates/__init__.py b/src/wags_llm/templates/__init__.py index ead7b4f..860ede4 100644 --- a/src/wags_llm/templates/__init__.py +++ b/src/wags_llm/templates/__init__.py @@ -1,6 +1,6 @@ """Prompt interfaces and registry. -Define and manage versioned prompt templates. +Define and manage versioned prompt and skill templates. """ from wags_llm.templates.base import BaseTemplate, TemplateType diff --git a/src/wags_llm/templates/base.py b/src/wags_llm/templates/base.py index 5b30def..fe63b6b 100644 --- a/src/wags_llm/templates/base.py +++ b/src/wags_llm/templates/base.py @@ -5,11 +5,11 @@ from abc import ABC, abstractmethod from collections.abc import Mapping -from enum import Enum +from enum import StrEnum from typing import Any -class TemplateType(Enum): +class TemplateType(StrEnum): """Enum for template types supported by StructuredTaskRunner.""" SKILL = "skill" From 2f3a83e62b4ef7402ddebb7f4c1db7734dd4b5c4 Mon Sep 17 00:00:00 2001 From: SalemBajjali Date: Wed, 1 Jul 2026 11:15:57 -0500 Subject: [PATCH 36/36] fix: correct import path for PromptTemplate --- notebooks/example.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb index a2cbd6b..bd2f414 100644 --- a/notebooks/example.ipynb +++ b/notebooks/example.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "b5fe35fe", "metadata": {}, "outputs": [], @@ -26,7 +26,7 @@ "from wags_llm.client.bedrock import BedrockClaudeJsonClient\n", "from wags_llm.registry.base import Registry\n", "from wags_llm.services.structured_task import StructuredTaskRunner\n", - "from wags_llm.templates.base import PromptTemplate\n", + "from wags_llm.templates.prompt_template import PromptTemplate\n", "\n", "logging.basicConfig(\n", " stream=sys.stdout,\n",