diff --git a/Dockerfile b/Dockerfile index d15d436..fe5c2cc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,7 +28,7 @@ RUN microdnf install -y git && microdnf clean all # Research Lambda invocation). Treat `Dockerfile` + `Dockerfile.alerts` # + `requirements.txt` as one tri-state pin that must move in lockstep. COPY requirements.txt ${LAMBDA_TASK_ROOT}/ -RUN pip install --no-cache-dir "alpha-engine-lib[arcticdb,flow_doctor,rag] @ git+https://github.com/cipher813/alpha-engine-lib@v0.34.0" && \ +RUN pip install --no-cache-dir "alpha-engine-lib[arcticdb,flow_doctor,rag] @ git+https://github.com/cipher813/alpha-engine-lib@v0.41.0" && \ grep -vE "^#|^$|^pytest|^python-dotenv|^boto3|^botocore|^s3transfer|^alpha-engine-lib" requirements.txt > /tmp/req-lambda.txt && \ pip install --no-cache-dir -r /tmp/req-lambda.txt && \ rm -rf /root/.cache/pip /tmp/req-lambda.txt diff --git a/Dockerfile.alerts b/Dockerfile.alerts index 9a24563..ac3acc4 100644 --- a/Dockerfile.alerts +++ b/Dockerfile.alerts @@ -13,7 +13,7 @@ FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.12 # v0.1.0 — but moving the alerts pin in lockstep avoids two-track lib # state across this repo's Lambda images. COPY requirements-alerts.txt ${LAMBDA_TASK_ROOT}/ -RUN pip install --no-cache-dir "alpha-engine-lib[flow_doctor] @ git+https://github.com/cipher813/alpha-engine-lib@v0.34.0" && \ +RUN pip install --no-cache-dir "alpha-engine-lib[flow_doctor] @ git+https://github.com/cipher813/alpha-engine-lib@v0.41.0" && \ pip install --no-cache-dir -r requirements-alerts.txt && \ rm -rf /root/.cache/pip diff --git a/evals/judge.py b/evals/judge.py index 43ad182..df3b94d 100644 --- a/evals/judge.py +++ b/evals/judge.py @@ -472,6 +472,14 @@ def build_batch_request( ``with_structured_output`` call shape — same structured-output semantics, just transported via the Batches API). + Delegates payload construction to + ``alpha_engine_lib.anthropic_payload.build_batches_request_params`` + (L334 chokepoint — second consumer of the lib's anthropic_payload + substrate after morning-signal). The chokepoint enforces the + server-tool ⊥ assistant-prefill invariant on the embedded + ``params`` dict so future RubricEval extensions that add a server + tool (web_search etc.) can't reach Anthropic's HTTP 400. + Raises ``ValueError`` if ``artifact.agent_id`` has no rubric mapped — callers must pre-filter via ``resolve_rubric_for_agent``. @@ -479,6 +487,8 @@ def build_batch_request( this function is invoked (the skip artifact is persisted client-side without spending a batch slot). """ + from alpha_engine_lib.anthropic_payload import build_batches_request_params + rubric_name = resolve_rubric_for_agent(artifact.agent_id) if rubric_name is None: raise ValueError( @@ -491,26 +501,18 @@ def build_batch_request( rendered = _render_rubric(artifact, loaded_prompt) tool_spec = _build_rubric_tool_spec() - return { - "custom_id": custom_id, - "params": { - "model": judge_model, - "max_tokens": max_tokens, - "tools": [tool_spec], - # Force the model to call the rubric tool — equivalent to - # ``with_structured_output(...)``'s ``tool_choice`` posture. - # Without this the model can decide to emit prose, which - # would fall through every parser in this module. - "tool_choice": {"type": "tool", "name": _RUBRIC_TOOL_NAME}, - "messages": [ - {"role": "user", "content": rendered}, - ], - # ``metadata.user_id`` is reserved for end-user identification - # in Anthropic's contract; we pass the rubric+version pair - # via ``metadata`` for batch-side observability without - # putting it on a schema-validated field. - }, - } + # Force the model to call the rubric tool — equivalent to + # ``with_structured_output(...)``'s ``tool_choice`` posture. Without + # this the model can decide to emit prose, which would fall through + # every parser in this module. + return build_batches_request_params( + custom_id=custom_id, + model=judge_model, + max_tokens=max_tokens, + user_content=rendered, + tools=[tool_spec], + tool_choice={"type": "tool", "name": _RUBRIC_TOOL_NAME}, + ) def parse_batch_message( diff --git a/requirements.txt b/requirements.txt index 48b5e2e..653a901 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,7 +31,7 @@ pydantic~=2.7 # graph/research_graph rag.is_available probe) # Lib is now public — no PAT required. Lambda Dockerfile installs via the # git URL; CI pulls directly. -alpha-engine-lib[arcticdb,flow_doctor,rag] @ git+https://github.com/cipher813/alpha-engine-lib@v0.34.0 +alpha-engine-lib[arcticdb,flow_doctor,rag] @ git+https://github.com/cipher813/alpha-engine-lib@v0.41.0 # Scheduling / calendar exchange-calendars~=4.5