From 7ce33cbfc510457078178ec028b1a098eaab1e76 Mon Sep 17 00:00:00 2001 From: Brian McMahon Date: Thu, 28 May 2026 09:33:12 -0700 Subject: [PATCH] refactor(judge-batch): delegate build_batch_request to lib chokepoint (L334) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `evals/judge.py::build_batch_request` now delegates payload construction to `alpha_engine_lib.anthropic_payload.build_batches_request_params` (L334 second-consumer chokepoint, shipped in lib v0.41.0). Drops the inline `{custom_id, params}` dict construction; same wire shape, same behavior. The chokepoint enforces the server-tool ⊥ assistant-prefill invariant on the embedded `params` dict — so a future RubricEval extension that adds a server-side tool (e.g. `web_search` for citation lookup) can't silently reach Anthropic's HTTP 400 the way morning-signal did in May. Lib pin v0.34.0 → v0.41.0 in lockstep across: - requirements.txt - Dockerfile (main research Lambda image) - Dockerfile.alerts (research-alerts Lambda image) ROADMAP: **L334** part 2/2 — consumer migration. Part 1 = alpha-engine-lib PR #85 (build_batches_request_params + v0.40.1 → v0.41.0). **Merge-blocked on lib PR #85 landing + the v0.41.0 git tag.** Suite: 1609 passed. Co-Authored-By: Claude Opus 4.7 (1M context) --- Dockerfile | 2 +- Dockerfile.alerts | 2 +- evals/judge.py | 42 ++++++++++++++++++++++-------------------- requirements.txt | 2 +- 4 files changed, 25 insertions(+), 23 deletions(-) diff --git a/Dockerfile b/Dockerfile index d15d4365..fe5c2ccb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,7 +28,7 @@ RUN microdnf install -y git && microdnf clean all # Research Lambda invocation). Treat `Dockerfile` + `Dockerfile.alerts` # + `requirements.txt` as one tri-state pin that must move in lockstep. COPY requirements.txt ${LAMBDA_TASK_ROOT}/ -RUN pip install --no-cache-dir "alpha-engine-lib[arcticdb,flow_doctor,rag] @ git+https://github.com/cipher813/alpha-engine-lib@v0.34.0" && \ +RUN pip install --no-cache-dir "alpha-engine-lib[arcticdb,flow_doctor,rag] @ git+https://github.com/cipher813/alpha-engine-lib@v0.41.0" && \ grep -vE "^#|^$|^pytest|^python-dotenv|^boto3|^botocore|^s3transfer|^alpha-engine-lib" requirements.txt > /tmp/req-lambda.txt && \ pip install --no-cache-dir -r /tmp/req-lambda.txt && \ rm -rf /root/.cache/pip /tmp/req-lambda.txt diff --git a/Dockerfile.alerts b/Dockerfile.alerts index 9a245638..ac3acc42 100644 --- a/Dockerfile.alerts +++ b/Dockerfile.alerts @@ -13,7 +13,7 @@ FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.12 # v0.1.0 — but moving the alerts pin in lockstep avoids two-track lib # state across this repo's Lambda images. COPY requirements-alerts.txt ${LAMBDA_TASK_ROOT}/ -RUN pip install --no-cache-dir "alpha-engine-lib[flow_doctor] @ git+https://github.com/cipher813/alpha-engine-lib@v0.34.0" && \ +RUN pip install --no-cache-dir "alpha-engine-lib[flow_doctor] @ git+https://github.com/cipher813/alpha-engine-lib@v0.41.0" && \ pip install --no-cache-dir -r requirements-alerts.txt && \ rm -rf /root/.cache/pip diff --git a/evals/judge.py b/evals/judge.py index 43ad1827..df3b94dd 100644 --- a/evals/judge.py +++ b/evals/judge.py @@ -472,6 +472,14 @@ def build_batch_request( ``with_structured_output`` call shape — same structured-output semantics, just transported via the Batches API). + Delegates payload construction to + ``alpha_engine_lib.anthropic_payload.build_batches_request_params`` + (L334 chokepoint — second consumer of the lib's anthropic_payload + substrate after morning-signal). The chokepoint enforces the + server-tool ⊥ assistant-prefill invariant on the embedded + ``params`` dict so future RubricEval extensions that add a server + tool (web_search etc.) can't reach Anthropic's HTTP 400. + Raises ``ValueError`` if ``artifact.agent_id`` has no rubric mapped — callers must pre-filter via ``resolve_rubric_for_agent``. @@ -479,6 +487,8 @@ def build_batch_request( this function is invoked (the skip artifact is persisted client-side without spending a batch slot). """ + from alpha_engine_lib.anthropic_payload import build_batches_request_params + rubric_name = resolve_rubric_for_agent(artifact.agent_id) if rubric_name is None: raise ValueError( @@ -491,26 +501,18 @@ def build_batch_request( rendered = _render_rubric(artifact, loaded_prompt) tool_spec = _build_rubric_tool_spec() - return { - "custom_id": custom_id, - "params": { - "model": judge_model, - "max_tokens": max_tokens, - "tools": [tool_spec], - # Force the model to call the rubric tool — equivalent to - # ``with_structured_output(...)``'s ``tool_choice`` posture. - # Without this the model can decide to emit prose, which - # would fall through every parser in this module. - "tool_choice": {"type": "tool", "name": _RUBRIC_TOOL_NAME}, - "messages": [ - {"role": "user", "content": rendered}, - ], - # ``metadata.user_id`` is reserved for end-user identification - # in Anthropic's contract; we pass the rubric+version pair - # via ``metadata`` for batch-side observability without - # putting it on a schema-validated field. - }, - } + # Force the model to call the rubric tool — equivalent to + # ``with_structured_output(...)``'s ``tool_choice`` posture. Without + # this the model can decide to emit prose, which would fall through + # every parser in this module. + return build_batches_request_params( + custom_id=custom_id, + model=judge_model, + max_tokens=max_tokens, + user_content=rendered, + tools=[tool_spec], + tool_choice={"type": "tool", "name": _RUBRIC_TOOL_NAME}, + ) def parse_batch_message( diff --git a/requirements.txt b/requirements.txt index 48b5e2e6..653a901f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,7 +31,7 @@ pydantic~=2.7 # graph/research_graph rag.is_available probe) # Lib is now public — no PAT required. Lambda Dockerfile installs via the # git URL; CI pulls directly. -alpha-engine-lib[arcticdb,flow_doctor,rag] @ git+https://github.com/cipher813/alpha-engine-lib@v0.34.0 +alpha-engine-lib[arcticdb,flow_doctor,rag] @ git+https://github.com/cipher813/alpha-engine-lib@v0.41.0 # Scheduling / calendar exchange-calendars~=4.5