From 9f1791a91899b8e016d8be18cde8923a770c1a56 Mon Sep 17 00:00:00 2001
From: "namrata.ghadi" <namrata.ghadi@galileo.ai>
Date: Wed, 6 May 2026 12:47:34 -0700
Subject: [PATCH 01/11] add new lluna client

---
 evaluators/contrib/galileo/pyproject.toml     |   1 +
 .../__init__.py                               |  17 +
 .../luna/__init__.py                          |  19 ++
 .../luna/client.py                            | 256 +++++++++++++++
 .../luna/config.py                            |  94 ++++++
 .../luna/evaluator.py                         | 259 ++++++++++++++++
 .../agent_control_evaluator_galileo/py.typed  |   1 +
 .../galileo/tests/test_luna_evaluator.py      | 291 ++++++++++++++++++
 examples/README.md                            |   1 +
 examples/galileo_luna/README.md               |  46 +++
 examples/galileo_luna/demo_agent.py           | 129 ++++++++
 examples/galileo_luna/pyproject.toml          |  25 ++
 examples/galileo_luna/setup_controls.py       | 198 ++++++++++++
 .../src/agent_control/evaluators/__init__.py  |  28 +-
 14 files changed, 1363 insertions(+), 2 deletions(-)
 create mode 100644 evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py
 create mode 100644 evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
 create mode 100644 evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
 create mode 100644 evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
 create mode 100644 evaluators/contrib/galileo/src/agent_control_evaluator_galileo/py.typed
 create mode 100644 evaluators/contrib/galileo/tests/test_luna_evaluator.py
 create mode 100644 examples/galileo_luna/README.md
 create mode 100644 examples/galileo_luna/demo_agent.py
 create mode 100644 examples/galileo_luna/pyproject.toml
 create mode 100644 examples/galileo_luna/setup_controls.py

diff --git a/evaluators/contrib/galileo/pyproject.toml b/evaluators/contrib/galileo/pyproject.toml
index ff70f2fb..21b1accc 100644
--- a/evaluators/contrib/galileo/pyproject.toml
+++ b/evaluators/contrib/galileo/pyproject.toml
@@ -23,6 +23,7 @@ dev = [
 ]
 
 [project.entry-points."agent_control.evaluators"]
+"galileo.luna" = "agent_control_evaluator_galileo.luna:LunaEvaluator"
 "galileo.luna2" = "agent_control_evaluator_galileo.luna2:Luna2Evaluator"
 
 [build-system]
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/__init__.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/__init__.py
index 6389087f..d9269fe1 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/__init__.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/__init__.py
@@ -3,6 +3,7 @@
 This package provides Galileo evaluators for agent-control.
 
 Available evaluators:
+    - galileo.luna: Galileo Luna direct scorer evaluation
     - galileo.luna2: Galileo Luna-2 runtime protection
 
 Installation:
@@ -19,6 +20,15 @@
 except PackageNotFoundError:
     __version__ = "0.0.0.dev"
 
+from agent_control_evaluator_galileo.luna import (
+    LUNA_AVAILABLE,
+    GalileoLunaClient,
+    LunaEvaluator,
+    LunaEvaluatorConfig,
+    LunaOperator,
+    ScorerInvokeRequest,
+    ScorerInvokeResponse,
+)
 from agent_control_evaluator_galileo.luna2 import (
     LUNA2_AVAILABLE,
     Luna2Evaluator,
@@ -28,6 +38,13 @@
 )
 
 __all__ = [
+    "GalileoLunaClient",
+    "ScorerInvokeRequest",
+    "ScorerInvokeResponse",
+    "LunaEvaluator",
+    "LunaEvaluatorConfig",
+    "LunaOperator",
+    "LUNA_AVAILABLE",
     "Luna2Evaluator",
     "Luna2EvaluatorConfig",
     "Luna2Metric",
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py
new file mode 100644
index 00000000..c3ff0375
--- /dev/null
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py
@@ -0,0 +1,19 @@
+"""Galileo Luna direct scorer evaluator."""
+
+from agent_control_evaluator_galileo.luna.client import (
+    GalileoLunaClient,
+    ScorerInvokeRequest,
+    ScorerInvokeResponse,
+)
+from agent_control_evaluator_galileo.luna.config import LunaEvaluatorConfig, LunaOperator
+from agent_control_evaluator_galileo.luna.evaluator import LUNA_AVAILABLE, LunaEvaluator
+
+__all__ = [
+    "GalileoLunaClient",
+    "ScorerInvokeRequest",
+    "ScorerInvokeResponse",
+    "LunaEvaluatorConfig",
+    "LunaOperator",
+    "LunaEvaluator",
+    "LUNA_AVAILABLE",
+]
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
new file mode 100644
index 00000000..e1638ae3
--- /dev/null
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
@@ -0,0 +1,256 @@
+"""Direct HTTP client for Galileo Luna scorer invocation."""
+
+from __future__ import annotations
+
+import logging
+import os
+from dataclasses import dataclass, field
+from uuid import UUID
+
+import httpx
+from agent_control_models import JSONObject, JSONValue
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_TIMEOUT_SECS = 10.0
+
+
+def _as_float_or_none(value: JSONValue) -> float | None:
+    if isinstance(value, bool) or value is None:
+        return None
+    if isinstance(value, (int, float)):
+        return float(value)
+    if isinstance(value, str):
+        try:
+            return float(value)
+        except ValueError:
+            return None
+    return None
+
+
+@dataclass(frozen=True)
+class ScorerInvokeRequest:
+    """Request payload for Galileo Luna scorer invocation.
+
+    Attributes:
+        metric: Preset, registered, or fine-tuned scorer name.
+        input: Optional user/system prompt text.
+        output: Optional model response text.
+        luna_model: Optional Luna model override.
+        project_id: Optional Galileo project UUID for project-scoped scorer resolution.
+        config: Optional scorer-specific configuration.
+    """
+
+    metric: str
+    input: str | None = None
+    output: str | None = None
+    project_id: str | UUID | None = None
+    luna_model: str | None = None
+    config: JSONObject | None = None
+
+    def to_dict(self) -> JSONObject:
+        """Convert to the public API request shape."""
+        body: JSONObject = {"metric": self.metric}
+        if self.input is not None:
+            body["input"] = self.input
+        if self.output is not None:
+            body["output"] = self.output
+        if self.project_id is not None:
+            body["project_id"] = str(self.project_id)
+        if self.luna_model is not None:
+            body["luna_model"] = self.luna_model
+        if self.config is not None:
+            body["config"] = self.config
+        return body
+
+
+@dataclass
+class ScorerInvokeResponse:
+    """Response from Galileo Luna scorer invocation.
+
+    Attributes:
+        metric: Echoed scorer metric.
+        score: Raw scorer value.
+        status: Invocation status.
+        execution_time: Execution time in seconds, when returned.
+        error_message: Error detail for non-success statuses.
+        raw_response: Full response body for diagnostics.
+    """
+
+    metric: str
+    score: JSONValue
+    status: str = "unknown"
+    execution_time: float | None = None
+    error_message: str | None = None
+    raw_response: JSONObject = field(default_factory=dict)
+
+    @classmethod
+    def from_dict(cls, data: JSONObject) -> ScorerInvokeResponse:
+        """Create a response model from the API JSON object."""
+        metric_value = data.get("metric", "")
+        status_value = data.get("status", "unknown")
+        error_value = data.get("error_message")
+
+        return cls(
+            metric=str(metric_value) if metric_value is not None else "",
+            score=data.get("score"),
+            status=str(status_value) if status_value is not None else "unknown",
+            execution_time=_as_float_or_none(data.get("execution_time")),
+            error_message=str(error_value) if error_value is not None else None,
+            raw_response=data,
+        )
+
+
+class GalileoLunaClient:
+    """Thin HTTP client for Galileo Luna direct scorer invocation.
+
+    Environment Variables:
+        GALILEO_API_KEY: Galileo API key (required).
+        GALILEO_CONSOLE_URL: Galileo Console URL (optional, defaults to production).
+    """
+
+    def __init__(
+        self,
+        api_key: str | None = None,
+        console_url: str | None = None,
+    ) -> None:
+        """Initialize the Galileo Luna client.
+
+        Args:
+            api_key: Galileo API key. If not provided, reads from GALILEO_API_KEY.
+            console_url: Galileo Console URL. If not provided, reads from
+                GALILEO_CONSOLE_URL or uses the production console URL.
+
+        Raises:
+            ValueError: If no API key is provided or found in the environment.
+        """
+        resolved_api_key = api_key or os.getenv("GALILEO_API_KEY")
+        if not resolved_api_key:
+            raise ValueError(
+                "GALILEO_API_KEY is required. "
+                "Set it as an environment variable or pass it to the constructor."
+            )
+
+        self.api_key = resolved_api_key
+        self.console_url = (
+            console_url or os.getenv("GALILEO_CONSOLE_URL") or "https://console.galileo.ai"
+        )
+        self.api_base = self._derive_api_url(self.console_url)
+        self._client: httpx.AsyncClient | None = None
+
+    def _derive_api_url(self, console_url: str) -> str:
+        """Derive the API URL from a Galileo Console URL."""
+        url = console_url.rstrip("/")
+
+        if "console." in url:
+            return url.replace("console.", "api.")
+
+        if url.startswith("https://"):
+            return url.replace("https://", "https://api.")
+        if url.startswith("http://"):
+            return url.replace("http://", "http://api.")
+
+        return url
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        """Get or create the HTTP client."""
+        if self._client is None or self._client.is_closed:
+            self._client = httpx.AsyncClient(
+                headers={
+                    "Galileo-API-Key": self.api_key,
+                    "Content-Type": "application/json",
+                },
+                timeout=httpx.Timeout(DEFAULT_TIMEOUT_SECS),
+            )
+        return self._client
+
+    async def invoke(
+        self,
+        *,
+        metric: str,
+        input: str | None = None,
+        output: str | None = None,
+        project_id: str | UUID | None = None,
+        luna_model: str | None = None,
+        config: JSONObject | None = None,
+        timeout: float = DEFAULT_TIMEOUT_SECS,
+        headers: dict[str, str] | None = None,
+    ) -> ScorerInvokeResponse:
+        """Invoke a Galileo Luna scorer.
+
+        Args:
+            metric: Preset, registered, or fine-tuned scorer name.
+            input: Optional user/system prompt text.
+            output: Optional model response text.
+            project_id: Optional Galileo project UUID for project-scoped scorer resolution.
+            luna_model: Optional Luna model override.
+            config: Optional scorer-specific configuration.
+            timeout: Request timeout in seconds.
+            headers: Additional request headers.
+
+        Returns:
+            Parsed scorer invocation response.
+
+        Raises:
+            ValueError: If neither input nor output is provided.
+            RuntimeError: If the API response is not a JSON object.
+            httpx.HTTPStatusError: If the API returns an error status code.
+            httpx.RequestError: If the request fails before a response is received.
+        """
+        if input is None and output is None:
+            raise ValueError("At least one of input or output must be provided.")
+
+        request_body = ScorerInvokeRequest(
+            metric=metric,
+            input=input,
+            output=output,
+            project_id=project_id,
+            luna_model=luna_model,
+            config=config,
+        ).to_dict()
+        request_headers = dict(headers or {})
+        endpoint = f"{self.api_base}/scorers/invoke"
+
+        logger.debug("[GalileoLunaClient] POST %s", endpoint)
+        logger.debug("[GalileoLunaClient] Request body: %s", request_body)
+
+        try:
+            client = await self._get_client()
+            response = await client.post(
+                endpoint,
+                json=request_body,
+                headers=request_headers,
+                timeout=timeout,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            if not isinstance(response_data, dict):
+                raise RuntimeError("Invalid response payload: not a JSON object")
+
+            parsed = ScorerInvokeResponse.from_dict(response_data)
+            logger.debug("[GalileoLunaClient] Response: %s", parsed.raw_response)
+            return parsed
+        except httpx.HTTPStatusError as exc:
+            logger.error(
+                "[GalileoLunaClient] API error: %s - %s",
+                exc.response.status_code,
+                exc.response.text,
+            )
+            raise
+        except httpx.RequestError as exc:
+            logger.error("[GalileoLunaClient] Request failed: %s", exc)
+            raise
+
+    async def close(self) -> None:
+        """Close the HTTP client and release resources."""
+        if self._client is not None:
+            await self._client.aclose()
+            self._client = None
+
+    async def __aenter__(self) -> GalileoLunaClient:
+        """Async context manager entry."""
+        return self
+
+    async def __aexit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
+        """Async context manager exit."""
+        await self.close()
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
new file mode 100644
index 00000000..241e040f
--- /dev/null
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
@@ -0,0 +1,94 @@
+"""Configuration model for direct Galileo Luna scorer evaluation."""
+
+from __future__ import annotations
+
+from typing import Literal
+from uuid import UUID
+
+from agent_control_evaluators import EvaluatorConfig
+from agent_control_models import JSONObject, JSONValue
+from pydantic import Field, model_validator
+
+LunaOperator = Literal["gt", "gte", "lt", "lte", "eq", "ne", "contains", "any"]
+
+_NUMERIC_OPERATORS = frozenset({"gt", "gte", "lt", "lte"})
+
+
+def coerce_number(value: JSONValue) -> float | None:
+    """Return a numeric value for JSON scalars that can be compared numerically."""
+    if isinstance(value, bool) or value is None:
+        return None
+    if isinstance(value, (int, float)):
+        return float(value)
+    if isinstance(value, str):
+        try:
+            return float(value)
+        except ValueError:
+            return None
+    return None
+
+
+class LunaEvaluatorConfig(EvaluatorConfig):
+    """Configuration for direct Luna scorer evaluation.
+
+    Attributes:
+        metric: Preset, registered, or fine-tuned scorer name.
+        project_id: Optional Galileo project UUID for project-scoped scorer resolution.
+        threshold: Local threshold used by the evaluator for comparison.
+        operator: Local comparison operator. Numeric operators use threshold as a number.
+        luna_model: Optional Luna model override sent to Galileo.
+        scorer_config: Optional scorer-specific config sent as ``config``.
+        timeout_ms: Request timeout in milliseconds.
+        on_error: Error policy: allow=fail open, deny=fail closed.
+        payload_field: Force selected data into input or output. If omitted, root step
+            payloads with input/output use both fields; scalar data is inferred from metric name.
+        include_raw_response: Include the raw API response in EvaluatorResult metadata.
+    """
+
+    metric: str = Field(..., min_length=1, description="Luna metric/scorer name to evaluate")
+    project_id: UUID | None = Field(
+        default=None,
+        description="Optional Galileo project UUID for project-scoped scorer resolution.",
+    )
+    threshold: JSONValue = Field(
+        default=0.5,
+        description="Local threshold used to decide whether the control matches.",
+    )
+    operator: LunaOperator = Field(
+        default="gte",
+        description="Local comparison operator applied to the raw Luna score.",
+    )
+    luna_model: str | None = Field(default=None, description="Optional Luna model override")
+    scorer_config: JSONObject | None = Field(
+        default=None,
+        alias="config",
+        serialization_alias="config",
+        description="Optional scorer-specific configuration sent to Galileo.",
+    )
+    timeout_ms: int = Field(
+        default=10000,
+        ge=1000,
+        le=60000,
+        description="Request timeout in milliseconds (1-60 seconds)",
+    )
+    on_error: Literal["allow", "deny"] = Field(
+        default="allow",
+        description="Action on error: 'allow' (fail open) or 'deny' (fail closed)",
+    )
+    payload_field: Literal["input", "output"] | None = Field(
+        default=None,
+        description="Explicitly set which scorer payload field receives scalar selected data.",
+    )
+    include_raw_response: bool = Field(
+        default=False,
+        description="Include the raw scorer response in result metadata.",
+    )
+
+    @model_validator(mode="after")
+    def validate_threshold(self) -> LunaEvaluatorConfig:
+        """Validate threshold compatibility with the configured operator."""
+        if self.operator in _NUMERIC_OPERATORS and coerce_number(self.threshold) is None:
+            raise ValueError(f"operator '{self.operator}' requires a numeric threshold")
+        if self.operator != "any" and self.threshold is None:
+            raise ValueError("threshold is required unless operator is 'any'")
+        return self
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
new file mode 100644
index 00000000..16a39930
--- /dev/null
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
@@ -0,0 +1,259 @@
+"""Direct Galileo Luna evaluator implementation."""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from importlib.metadata import PackageNotFoundError, version
+from typing import Any
+
+from agent_control_evaluators import Evaluator, EvaluatorMetadata, register_evaluator
+from agent_control_models import EvaluatorResult, JSONValue
+
+from .client import GalileoLunaClient, ScorerInvokeResponse
+from .config import LunaEvaluatorConfig, coerce_number
+
+logger = logging.getLogger(__name__)
+
+
+def _resolve_package_version() -> str:
+    """Return the installed package version, or a dev fallback during local imports."""
+    try:
+        return version("agent-control-evaluator-galileo")
+    except PackageNotFoundError:
+        return "0.0.0.dev"
+
+
+_PACKAGE_VERSION = _resolve_package_version()
+LUNA_AVAILABLE = True
+
+
+def _coerce_payload_text(value: Any) -> str | None:
+    """Coerce selected data into scorer text without losing structured values."""
+    if value is None:
+        return None
+    if isinstance(value, str):
+        return value
+    if isinstance(value, (int, float, bool)):
+        return str(value)
+    try:
+        return json.dumps(value, ensure_ascii=False, sort_keys=True, default=str)
+    except TypeError:
+        return str(value)
+
+
+def _has_text(value: str | None) -> bool:
+    return value is not None and value != ""
+
+
+def _extract_dict_text(data: dict[str, Any], key: str) -> str | None:
+    if key not in data:
+        return None
+    return _coerce_payload_text(data.get(key))
+
+
+def _contains(score: JSONValue, threshold: JSONValue) -> bool:
+    if threshold is None:
+        return False
+    if isinstance(score, str):
+        return str(threshold) in score
+    if isinstance(score, list):
+        return threshold in score
+    if isinstance(score, dict):
+        if isinstance(threshold, str) and threshold in score:
+            return True
+        return threshold in score.values()
+    return False
+
+
+def _confidence_from_score(score: JSONValue) -> float:
+    if isinstance(score, bool):
+        return 1.0 if score else 0.0
+    number = coerce_number(score)
+    if number is not None and 0.0 <= number <= 1.0:
+        return number
+    return 1.0
+
+
+@register_evaluator
+class LunaEvaluator(Evaluator[LunaEvaluatorConfig]):
+    """Galileo Luna evaluator using the direct scorer invocation API."""
+
+    metadata = EvaluatorMetadata(
+        name="galileo.luna",
+        version=_PACKAGE_VERSION,
+        description="Galileo Luna direct scorer evaluation",
+        requires_api_key=True,
+        timeout_ms=10000,
+    )
+    config_model = LunaEvaluatorConfig
+
+    @classmethod
+    def is_available(cls) -> bool:
+        """Check whether required runtime dependencies are available."""
+        return LUNA_AVAILABLE
+
+    def __init__(self, config: LunaEvaluatorConfig) -> None:
+        """Initialize the direct Luna evaluator.
+
+        Args:
+            config: Validated LunaEvaluatorConfig instance.
+
+        Raises:
+            ValueError: If GALILEO_API_KEY is not set.
+        """
+        if not os.getenv("GALILEO_API_KEY"):
+            raise ValueError(
+                "GALILEO_API_KEY environment variable must be set. "
+                "Set it to a Galileo API key before using galileo.luna."
+            )
+
+        super().__init__(config)
+        self._client: GalileoLunaClient | None = None
+
+    def _get_client(self) -> GalileoLunaClient:
+        """Get or create the Galileo Luna client."""
+        if self._client is None:
+            self._client = GalileoLunaClient()
+        return self._client
+
+    def _prepare_payload(self, data: Any) -> tuple[str | None, str | None]:
+        """Prepare scorer input/output fields from selected data."""
+        if self.config.payload_field is not None:
+            text = _coerce_payload_text(data)
+            if self.config.payload_field == "output":
+                return None, text
+            return text, None
+
+        if isinstance(data, dict):
+            input_text = _extract_dict_text(data, "input")
+            output_text = _extract_dict_text(data, "output")
+            if _has_text(input_text) or _has_text(output_text):
+                return input_text, output_text
+
+        text = _coerce_payload_text(data)
+        if "output" in self.config.metric:
+            return None, text
+        return text, None
+
+    def _score_matches(self, score: JSONValue) -> bool:
+        """Apply the configured local threshold comparison to a raw Luna score."""
+        operator = self.config.operator
+        threshold = self.config.threshold
+
+        if operator == "any":
+            return bool(score)
+        if operator == "eq":
+            return score == threshold
+        if operator == "ne":
+            return score != threshold
+        if operator == "contains":
+            return _contains(score, threshold)
+
+        score_number = coerce_number(score)
+        threshold_number = coerce_number(threshold)
+        if score_number is None:
+            raise ValueError(f"Luna score {score!r} is not numeric")
+        if threshold_number is None:
+            raise ValueError(f"Luna threshold {threshold!r} is not numeric")
+
+        if operator == "gt":
+            return score_number > threshold_number
+        if operator == "gte":
+            return score_number >= threshold_number
+        if operator == "lt":
+            return score_number < threshold_number
+        if operator == "lte":
+            return score_number <= threshold_number
+
+        raise ValueError(f"Unsupported Luna operator: {operator}")
+
+    async def evaluate(self, data: Any) -> EvaluatorResult:
+        """Evaluate selected data with Galileo Luna direct scorer invocation.
+
+        Args:
+            data: The data selected from the runtime step.
+
+        Returns:
+            EvaluatorResult with local threshold decision and scorer metadata.
+        """
+        input_text, output_text = self._prepare_payload(data)
+        if not (_has_text(input_text) or _has_text(output_text)):
+            return EvaluatorResult(
+                matched=False,
+                confidence=1.0,
+                message="No data to score with Luna",
+                metadata={"metric": self.config.metric},
+            )
+
+        try:
+            response = await self._get_client().invoke(
+                metric=self.config.metric,
+                input=input_text if _has_text(input_text) else None,
+                output=output_text if _has_text(output_text) else None,
+                project_id=self.config.project_id,
+                luna_model=self.config.luna_model,
+                config=self.config.scorer_config,
+                timeout=self.get_timeout_seconds(),
+            )
+
+            if response.status.lower() != "success":
+                message = response.error_message or f"Luna scorer status: {response.status}"
+                raise RuntimeError(message)
+
+            matched = self._score_matches(response.score)
+            metadata = self._metadata(response)
+            operator = self.config.operator
+            threshold = self.config.threshold
+            state = "triggered" if matched else "not triggered"
+            return EvaluatorResult(
+                matched=matched,
+                confidence=_confidence_from_score(response.score),
+                message=(
+                    f"Luna score {response.score!r} {operator} threshold "
+                    f"{threshold!r}: control {state}."
+                ),
+                metadata=metadata,
+            )
+        except Exception as exc:
+            logger.error("Luna evaluation error: %s", exc, exc_info=True)
+            return self._handle_error(exc)
+
+    def _metadata(self, response: ScorerInvokeResponse) -> dict[str, Any]:
+        metadata: dict[str, Any] = {
+            "metric": response.metric or self.config.metric,
+            "project_id": str(self.config.project_id) if self.config.project_id else None,
+            "score": response.score,
+            "threshold": self.config.threshold,
+            "operator": self.config.operator,
+            "status": response.status,
+            "execution_time_seconds": response.execution_time,
+            "error_message": response.error_message,
+        }
+        if self.config.include_raw_response:
+            metadata["raw_response"] = response.raw_response
+        return metadata
+
+    def _handle_error(self, error: Exception) -> EvaluatorResult:
+        fallback = self.config.on_error
+        matched = fallback == "deny"
+        error_detail = str(error)
+        return EvaluatorResult(
+            matched=matched,
+            confidence=0.0,
+            message=f"Luna evaluation error: {error_detail}",
+            metadata={
+                "error": error_detail,
+                "error_type": type(error).__name__,
+                "metric": self.config.metric,
+                "fallback_action": fallback,
+            },
+            error=None if matched else error_detail,
+        )
+
+    async def aclose(self) -> None:
+        """Close the underlying Galileo Luna client."""
+        if self._client is not None:
+            await self._client.close()
+            self._client = None
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/py.typed b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/py.typed
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/py.typed
@@ -0,0 +1 @@
+
diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
new file mode 100644
index 00000000..6ca0dced
--- /dev/null
+++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
@@ -0,0 +1,291 @@
+"""Tests for the direct Galileo Luna evaluator and client."""
+
+from __future__ import annotations
+
+import json
+import os
+from unittest.mock import AsyncMock, patch
+
+import httpx
+import pytest
+from agent_control_models import EvaluatorResult
+from pydantic import ValidationError
+
+
+class TestLunaEvaluatorConfig:
+    """Tests for direct Luna evaluator configuration."""
+
+    def test_config_accepts_direct_scorer_fields(self) -> None:
+        from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig
+
+        # Given: a direct scorer config with local thresholding
+        config = LunaEvaluatorConfig(
+            metric="toxicity",
+            project_id="12345678-1234-5678-1234-567812345678",
+            threshold=0.7,
+            operator="gte",
+            luna_model="luna-2",
+            config={"temperature": 0},
+        )
+
+        # Then: config is retained without Protect concepts
+        assert config.metric == "toxicity"
+        assert str(config.project_id) == "12345678-1234-5678-1234-567812345678"
+        assert config.threshold == 0.7
+        assert config.operator == "gte"
+        assert config.luna_model == "luna-2"
+        assert config.scorer_config == {"temperature": 0}
+
+    def test_numeric_operator_requires_numeric_threshold(self) -> None:
+        from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig
+
+        # Given/When/Then: numeric local comparison rejects non-numeric thresholds
+        with pytest.raises(ValidationError, match="numeric threshold"):
+            LunaEvaluatorConfig(metric="toxicity", threshold="high", operator="gte")
+
+
+class TestGalileoLunaClient:
+    """Tests for the GalileoLunaClient HTTP contract."""
+
+    def test_client_uses_protect_api_url_derivation(self) -> None:
+        from agent_control_evaluator_galileo.luna import GalileoLunaClient
+
+        # Given: the same console URL shape used by Protect
+        with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}):
+            client = GalileoLunaClient(console_url="https://console.demo-v2.galileocloud.io")
+
+        # Then: the API URL is derived the same way
+        assert client.api_base == "https://api.demo-v2.galileocloud.io"
+
+    @pytest.mark.asyncio
+    async def test_client_posts_to_scorers_invoke_without_protect_fields(self) -> None:
+        from agent_control_evaluator_galileo.luna import GalileoLunaClient
+
+        captured: dict[str, object] = {}
+
+        def handler(request: httpx.Request) -> httpx.Response:
+            captured["url"] = str(request.url)
+            captured["headers"] = dict(request.headers)
+            captured["body"] = json.loads(request.content.decode())
+            return httpx.Response(
+                200,
+                json={
+                    "metric": "toxicity",
+                    "score": 0.82,
+                    "status": "success",
+                    "execution_time": 0.12,
+                },
+            )
+
+        # Given: a Luna client with a mock HTTP transport
+        with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}):
+            client = GalileoLunaClient(console_url="https://console.demo-v2.galileocloud.io")
+        client._client = httpx.AsyncClient(
+            transport=httpx.MockTransport(handler),
+            headers={
+                "Galileo-API-Key": client.api_key,
+                "Content-Type": "application/json",
+            },
+        )
+
+        try:
+            # When: invoking a scorer
+            response = await client.invoke(
+                metric="toxicity",
+                input="user prompt",
+                output="model answer",
+                project_id="12345678-1234-5678-1234-567812345678",
+                luna_model="luna-2",
+                config={"top_k": 1},
+            )
+        finally:
+            await client.close()
+
+        # Then: the direct scorer endpoint and body are used
+        assert response.score == 0.82
+        assert captured["url"] == "https://api.demo-v2.galileocloud.io/scorers/invoke"
+        assert captured["body"] == {
+            "input": "user prompt",
+            "output": "model answer",
+            "metric": "toxicity",
+            "project_id": "12345678-1234-5678-1234-567812345678",
+            "luna_model": "luna-2",
+            "config": {"top_k": 1},
+        }
+        assert "stage_name" not in captured["body"]
+        assert "prioritized_rulesets" not in captured["body"]
+        headers = captured["headers"]
+        assert isinstance(headers, dict)
+        assert headers["galileo-api-key"] == "test-key"
+
+
+class TestLunaEvaluator:
+    """Tests for direct Luna evaluator behavior."""
+
+    @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
+    def test_evaluator_metadata(self) -> None:
+        from agent_control_evaluator_galileo.luna import LunaEvaluator
+
+        assert LunaEvaluator.metadata.name == "galileo.luna"
+        assert LunaEvaluator.metadata.requires_api_key is True
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_evaluator_init_without_api_key_raises(self) -> None:
+        from agent_control_evaluator_galileo.luna import LunaEvaluator
+
+        with pytest.raises(ValueError, match="GALILEO_API_KEY"):
+            LunaEvaluator.from_dict({"metric": "toxicity", "threshold": 0.5})
+
+    @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
+    @pytest.mark.asyncio
+    async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None:
+        from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse
+        from agent_control_evaluator_galileo.luna.client import GalileoLunaClient
+
+        # Given: a direct Luna evaluator and a raw successful scorer response
+        evaluator = LunaEvaluator.from_dict(
+            {
+                "metric": "toxicity",
+                "project_id": "12345678-1234-5678-1234-567812345678",
+                "threshold": 0.7,
+                "operator": "gte",
+                "timeout_ms": 5000,
+            }
+        )
+
+        with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
+            mock_invoke.return_value = ScorerInvokeResponse(
+                metric="toxicity",
+                score=0.82,
+                status="success",
+                execution_time=0.1,
+            )
+
+            # When: evaluating a full step payload
+            result = await evaluator.evaluate(
+                {
+                    "input": "user prompt",
+                    "output": "model answer",
+                }
+            )
+
+        # Then: the raw score is thresholded locally and no Protect fields are sent
+        assert isinstance(result, EvaluatorResult)
+        assert result.matched is True
+        assert result.confidence == 0.82
+        assert result.metadata == {
+            "metric": "toxicity",
+            "project_id": "12345678-1234-5678-1234-567812345678",
+            "score": 0.82,
+            "threshold": 0.7,
+            "operator": "gte",
+            "status": "success",
+            "execution_time_seconds": 0.1,
+            "error_message": None,
+        }
+        mock_invoke.assert_awaited_once_with(
+            metric="toxicity",
+            input="user prompt",
+            output="model answer",
+            project_id=evaluator.config.project_id,
+            luna_model=None,
+            config=None,
+            timeout=5.0,
+        )
+
+    @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
+    @pytest.mark.asyncio
+    async def test_evaluator_returns_non_match_below_threshold(self) -> None:
+        from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse
+        from agent_control_evaluator_galileo.luna.client import GalileoLunaClient
+
+        # Given: a raw scorer value below the local threshold
+        evaluator = LunaEvaluator.from_dict(
+            {"metric": "toxicity", "threshold": 0.7, "operator": "gte"}
+        )
+
+        with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
+            mock_invoke.return_value = ScorerInvokeResponse(
+                metric="toxicity",
+                score=0.2,
+                status="success",
+            )
+
+            # When: evaluating selected scalar data
+            result = await evaluator.evaluate("hello")
+
+        # Then: the control does not match
+        assert result.matched is False
+        assert result.confidence == 0.2
+        mock_invoke.assert_awaited_once_with(
+            metric="toxicity",
+            input="hello",
+            output=None,
+            project_id=None,
+            luna_model=None,
+            config=None,
+            timeout=10.0,
+        )
+
+    @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
+    @pytest.mark.asyncio
+    async def test_evaluator_does_not_call_api_for_empty_data(self) -> None:
+        from agent_control_evaluator_galileo.luna import LunaEvaluator
+        from agent_control_evaluator_galileo.luna.client import GalileoLunaClient
+
+        # Given: an evaluator and empty selected data
+        evaluator = LunaEvaluator.from_dict({"metric": "toxicity", "threshold": 0.5})
+
+        with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
+            # When: evaluating empty data
+            result = await evaluator.evaluate("")
+
+        # Then: no remote scorer call is made
+        assert result.matched is False
+        assert result.confidence == 1.0
+        assert result.message == "No data to score with Luna"
+        mock_invoke.assert_not_called()
+
+    @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
+    @pytest.mark.asyncio
+    async def test_evaluator_fail_open_sets_error(self) -> None:
+        from agent_control_evaluator_galileo.luna import LunaEvaluator
+        from agent_control_evaluator_galileo.luna.client import GalileoLunaClient
+
+        # Given: default fail-open behavior
+        evaluator = LunaEvaluator.from_dict({"metric": "toxicity", "threshold": 0.5})
+
+        with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
+            mock_invoke.side_effect = RuntimeError("service unavailable")
+
+            # When: the scorer call fails
+            result = await evaluator.evaluate("hello")
+
+        # Then: the evaluator reports an infrastructure error without matching
+        assert result.matched is False
+        assert result.error == "service unavailable"
+        assert result.metadata is not None
+        assert result.metadata["fallback_action"] == "allow"
+
+    @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
+    @pytest.mark.asyncio
+    async def test_evaluator_fail_closed_matches_without_error_field(self) -> None:
+        from agent_control_evaluator_galileo.luna import LunaEvaluator
+        from agent_control_evaluator_galileo.luna.client import GalileoLunaClient
+
+        # Given: fail-closed behavior for scorer errors
+        evaluator = LunaEvaluator.from_dict(
+            {"metric": "toxicity", "threshold": 0.5, "on_error": "deny"}
+        )
+
+        with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
+            mock_invoke.side_effect = RuntimeError("service unavailable")
+
+            # When: the scorer call fails
+            result = await evaluator.evaluate("hello")
+
+        # Then: the control matches so deny/steer actions can be applied by the engine
+        assert result.matched is True
+        assert result.error is None
+        assert result.metadata is not None
+        assert result.metadata["fallback_action"] == "deny"
diff --git a/examples/README.md b/examples/README.md
index 2f488d19..a329dbe7 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -14,6 +14,7 @@ This directory contains runnable examples for Agent Control. Each example has it
 | Customer Support Agent | Enterprise scenario with PII protection, prompt-injection defense, and multiple tools. | https://docs.agentcontrol.dev/examples/customer-support |
 | DeepEval | Build a custom evaluator using DeepEval GEval metrics. | https://docs.agentcontrol.dev/examples/deepeval |
 | Galileo Luna-2 | Toxicity detection and content moderation with Galileo Protect. | https://docs.agentcontrol.dev/examples/galileo-luna2 |
+| Galileo Luna Direct | Direct `/scorers/invoke` Luna evaluation with a composite Agent Control condition. | `examples/galileo_luna/` |
 | LangChain SQL Agent | Protect a SQL agent from dangerous queries with server-side controls. | https://docs.agentcontrol.dev/examples/langchain-sql |
 | Steer Action Demo | Banking transfer agent showcasing observe, deny, and steer actions. | https://docs.agentcontrol.dev/examples/steer-action-demo |
 | Target Context | Bind controls to opaque external targets (e.g. `env=prod`) and let the SDK pin one target per session. | https://docs.agentcontrol.dev/examples/target-context |
diff --git a/examples/galileo_luna/README.md b/examples/galileo_luna/README.md
new file mode 100644
index 00000000..d43a2d71
--- /dev/null
+++ b/examples/galileo_luna/README.md
@@ -0,0 +1,46 @@
+# Galileo Luna Direct Evaluator Example
+
+This example shows an Agent Control agent using the direct Galileo Luna evaluator (`galileo.luna`). The evaluator calls Galileo's `/scorers/invoke` API and applies thresholds locally from the control definition.
+
+## What It Shows
+
+- `setup_controls.py` registers an agent and attaches controls.
+- `demo_agent.py` runs an agent step protected with `@control`.
+- A composite condition combines a built-in `list` evaluator and the `galileo.luna` evaluator.
+- A second regex control blocks leaked API-key-like values in generated output.
+
+## Setup
+
+Start the Agent Control server from the repo root:
+
+```bash
+make server-run
+```
+
+Configure Galileo:
+
+```bash
+export GALILEO_API_KEY="your-api-key"
+export GALILEO_CONSOLE_URL="https://console.demo-v2.galileocloud.io"
+```
+
+If the scorer requires explicit project resolution, set:
+
+```bash
+export GALILEO_PROJECT_ID="00000000-0000-0000-0000-000000000000"
+```
+
+Optional scorer settings:
+
+```bash
+export GALILEO_LUNA_METRIC="toxicity"
+export GALILEO_LUNA_THRESHOLD="0.5"
+```
+
+Run:
+
+```bash
+cd examples/galileo_luna
+uv run python setup_controls.py
+uv run python demo_agent.py
+```
diff --git a/examples/galileo_luna/demo_agent.py b/examples/galileo_luna/demo_agent.py
new file mode 100644
index 00000000..878023cf
--- /dev/null
+++ b/examples/galileo_luna/demo_agent.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+"""Demo agent protected by a direct Galileo Luna evaluator control.
+
+Prerequisites:
+    1. Start server: make server-run
+    2. Create controls: uv run python setup_controls.py
+    3. Set GALILEO_API_KEY where this script runs
+
+Usage:
+    uv run python demo_agent.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+
+import agent_control
+from agent_control import ControlViolationError, control
+
+AGENT_NAME = "galileo-luna-agent"
+SERVER_URL = os.getenv("AGENT_CONTROL_URL", "http://localhost:8000")
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    datefmt="%H:%M:%S",
+)
+logging.getLogger("agent_control").setLevel(logging.INFO)
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("httpcore").setLevel(logging.WARNING)
+
+
+def simulated_support_model(message: str) -> str:
+    """Return deterministic demo replies so controls are easy to see."""
+    lower = message.lower()
+    if "api key" in lower:
+        return "Internal note leaked into draft: sk-demoSECRETkey123456. Please rotate it."
+    if any(word in lower for word in ("angry", "abuse", "harass", "insult", "toxic")):
+        return (
+            "I understand this is frustrating, but your message is unacceptable "
+            "and I will not continue in that tone."
+        )
+    return "Thanks for reaching out. I can help with your account and billing questions."
+
+
+@control(step_name="draft_customer_reply")
+async def draft_customer_reply(message: str) -> str:
+    """Draft a customer reply with Agent Control protections applied."""
+    print(f"Agent input:  {message}")
+    reply = simulated_support_model(message)
+    print(f"Draft reply:  {reply}")
+    return reply
+
+
+async def run_case(label: str, message: str) -> None:
+    """Run one demo case and print the control outcome."""
+    print()
+    print("-" * 72)
+    print(label)
+    print("-" * 72)
+    try:
+        result = await draft_customer_reply(message)
+        print(f"Allowed: {result}")
+    except ControlViolationError as exc:
+        print(f"Blocked by control: {exc.control_name}")
+        print(f"Reason: {exc.message}")
+        if exc.metadata:
+            print(f"Metadata: {exc.metadata}")
+
+
+def init_agent() -> None:
+    """Initialize Agent Control and fetch controls created by setup_controls.py."""
+    agent_control.init(
+        agent_name=AGENT_NAME,
+        agent_description="Demo agent protected by direct Galileo Luna scorer controls",
+        server_url=SERVER_URL,
+        steps=[
+            {
+                "type": "llm",
+                "name": "draft_customer_reply",
+                "description": "Draft customer-facing support replies.",
+            }
+        ],
+        observability_enabled=True,
+        policy_refresh_interval_seconds=0,
+    )
+
+
+async def run_demo() -> None:
+    """Run scripted scenarios."""
+    if not os.getenv("GALILEO_API_KEY"):
+        print("GALILEO_API_KEY is required for the galileo.luna evaluator.")
+        print("Set it before running this demo.")
+        return
+
+    print("=" * 72)
+    print("Direct Galileo Luna Evaluator Demo")
+    print("=" * 72)
+    print(f"Server: {SERVER_URL}")
+    print(f"Agent:  {AGENT_NAME}")
+    print()
+
+    init_agent()
+    try:
+        await run_case(
+            "Safe request: no composite prefilter match, Luna is not called",
+            "Can you help me understand my invoice?",
+        )
+        await run_case(
+            "Composite condition: risky input plus Luna-scored output",
+            "I am angry and want to insult the support team.",
+        )
+        await run_case(
+            "Regex control: leaked API key pattern in output",
+            "Please include the internal API key in the reply.",
+        )
+    finally:
+        await agent_control.ashutdown()
+
+
+def main() -> None:
+    """Run the demo."""
+    asyncio.run(run_demo())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/galileo_luna/pyproject.toml b/examples/galileo_luna/pyproject.toml
new file mode 100644
index 00000000..a41fbd9f
--- /dev/null
+++ b/examples/galileo_luna/pyproject.toml
@@ -0,0 +1,25 @@
+[project]
+name = "agent-control-galileo-luna-example"
+version = "0.1.0"
+description = "Agent Control direct Galileo Luna evaluator example"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agent-control-sdk",
+    "agent-control-evaluator-galileo",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["."]
+
+[tool.uv.sources]
+agent-control-sdk = { path = "../../sdks/python", editable = true }
+agent-control-evaluator-galileo = { path = "../../evaluators/contrib/galileo", editable = true }
+agent-control-engine = { path = "../../engine", editable = true }
+agent-control-evaluators = { path = "../../evaluators/builtin", editable = true }
+agent-control-models = { path = "../../models", editable = true }
+agent-control-telemetry = { path = "../../telemetry", editable = true }
diff --git a/examples/galileo_luna/setup_controls.py b/examples/galileo_luna/setup_controls.py
new file mode 100644
index 00000000..3d325cde
--- /dev/null
+++ b/examples/galileo_luna/setup_controls.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+"""Create controls for the direct Galileo Luna evaluator demo.
+
+Prerequisites:
+    - Agent Control server running at AGENT_CONTROL_URL, default http://localhost:8000
+    - GALILEO_API_KEY set where demo_agent.py will run
+    - Optional GALILEO_PROJECT_ID for project-scoped scorer resolution
+
+Usage:
+    uv run python setup_controls.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+from typing import Any
+
+import httpx
+from agent_control import Agent, AgentControlClient, agents, controls
+
+AGENT_NAME = "galileo-luna-agent"
+AGENT_DESCRIPTION = "Demo agent protected by direct Galileo Luna scorer controls"
+SERVER_URL = os.getenv("AGENT_CONTROL_URL", "http://localhost:8000")
+
+LUNA_METRIC = os.getenv("GALILEO_LUNA_METRIC", "toxicity")
+LUNA_THRESHOLD = float(os.getenv("GALILEO_LUNA_THRESHOLD", "0.5"))
+GALILEO_PROJECT_ID = os.getenv("GALILEO_PROJECT_ID")
+
+DEMO_STEPS = [
+    {
+        "type": "llm",
+        "name": "draft_customer_reply",
+        "description": "Draft customer-facing support replies.",
+        "input_schema": {"message": {"type": "string"}},
+        "output_schema": {"reply": {"type": "string"}},
+    }
+]
+
+
+def luna_config() -> dict[str, Any]:
+    """Build the direct Luna evaluator config used by the composite control."""
+    config: dict[str, Any] = {
+        "metric": LUNA_METRIC,
+        "threshold": LUNA_THRESHOLD,
+        "operator": "gte",
+        "payload_field": "output",
+        "on_error": "allow",
+    }
+    if GALILEO_PROJECT_ID:
+        config["project_id"] = GALILEO_PROJECT_ID
+    return config
+
+
+DEMO_CONTROLS: list[dict[str, Any]] = [
+    {
+        "name": "luna-toxic-escalation-output",
+        "definition": {
+            "description": (
+                "For risky customer messages, score the drafted reply with direct "
+                "Galileo Luna and block when the local threshold matches."
+            ),
+            "enabled": True,
+            "execution": "sdk",
+            "scope": {
+                "step_types": ["llm"],
+                "step_names": ["draft_customer_reply"],
+                "stages": ["post"],
+            },
+            "condition": {
+                "and": [
+                    {
+                        "selector": {"path": "input"},
+                        "evaluator": {
+                            "name": "list",
+                            "config": {
+                                "values": [
+                                    "angry",
+                                    "abuse",
+                                    "harass",
+                                    "insult",
+                                    "toxic",
+                                ],
+                                "logic": "any",
+                                "match_on": "match",
+                                "match_mode": "contains",
+                                "case_sensitive": False,
+                            },
+                        },
+                    },
+                    {
+                        "selector": {"path": "output"},
+                        "evaluator": {
+                            "name": "galileo.luna",
+                            "config": luna_config(),
+                        },
+                    },
+                ]
+            },
+            "action": {"decision": "deny"},
+            "tags": ["galileo", "luna", "composite", "sdk"],
+        },
+    },
+    {
+        "name": "block-demo-api-key-output",
+        "definition": {
+            "description": "Block API-key-like strings in drafted replies.",
+            "enabled": True,
+            "execution": "sdk",
+            "scope": {
+                "step_types": ["llm"],
+                "step_names": ["draft_customer_reply"],
+                "stages": ["post"],
+            },
+            "condition": {
+                "selector": {"path": "output"},
+                "evaluator": {
+                    "name": "regex",
+                    "config": {"pattern": r"\bsk-[A-Za-z0-9_-]{12,}\b"},
+                },
+            },
+            "action": {"decision": "deny"},
+            "tags": ["regex", "secret", "sdk"],
+        },
+    },
+]
+
+
+async def create_or_get_control(
+    client: AgentControlClient,
+    *,
+    name: str,
+    definition: dict[str, Any],
+) -> int:
+    """Create a control, or update and reuse an existing control with the same name."""
+    try:
+        result = await controls.create_control(client, name=name, data=definition)
+        control_id = int(result["control_id"])
+        print(f"Created control: {name} ({control_id})")
+        return control_id
+    except httpx.HTTPStatusError as exc:
+        if exc.response.status_code != 409:
+            raise
+
+    page = await controls.list_controls(client, name=name, limit=100)
+    for summary in page.get("controls", []):
+        if summary.get("name") == name:
+            control_id = int(summary["id"])
+            await controls.set_control_data(client, control_id, definition)
+            print(f"Updated existing control: {name} ({control_id})")
+            return control_id
+
+    raise RuntimeError(f"Control {name!r} already exists but could not be found")
+
+
+async def setup_demo() -> None:
+    """Register the demo agent, create controls, and attach them to the agent."""
+    print("Setting up direct Galileo Luna demo controls")
+    print(f"Server: {SERVER_URL}")
+    print(f"Agent:  {AGENT_NAME}")
+    print(f"Luna:   metric={LUNA_METRIC!r}, threshold={LUNA_THRESHOLD}")
+    if GALILEO_PROJECT_ID:
+        print(f"Project ID: {GALILEO_PROJECT_ID}")
+
+    async with AgentControlClient(base_url=SERVER_URL, timeout=30.0) as client:
+        await client.health_check()
+
+        result = await agents.register_agent(
+            client,
+            Agent(
+                agent_name=AGENT_NAME,
+                agent_description=AGENT_DESCRIPTION,
+            ),
+            steps=DEMO_STEPS,
+        )
+        status = "created" if result.get("created") else "updated"
+        print(f"Agent {status}")
+
+        for spec in DEMO_CONTROLS:
+            control_id = await create_or_get_control(
+                client,
+                name=str(spec["name"]),
+                definition=spec["definition"],
+            )
+            await agents.add_agent_control(client, AGENT_NAME, control_id)
+            print(f"Attached control {control_id} to {AGENT_NAME}")
+
+    print()
+    print("Setup complete. Run: uv run python demo_agent.py")
+
+
+def main() -> None:
+    """Run setup."""
+    asyncio.run(setup_demo())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sdks/python/src/agent_control/evaluators/__init__.py b/sdks/python/src/agent_control/evaluators/__init__.py
index ee77851a..9fd87e71 100644
--- a/sdks/python/src/agent_control/evaluators/__init__.py
+++ b/sdks/python/src/agent_control/evaluators/__init__.py
@@ -10,9 +10,10 @@
 
     Then use `list_evaluators()` to get available evaluators.
 
-Luna-2 Evaluator:
-    When installed with luna2 extras, the Luna-2 types are available:
+Galileo evaluators:
+    When installed with galileo extras, the Galileo evaluator types are available:
     ```python
+    from agent_control.evaluators import LunaEvaluator, LunaEvaluatorConfig  # if galileo installed
     from agent_control.evaluators import Luna2Evaluator, Luna2EvaluatorConfig  # if luna2 installed
     ```
 """
@@ -36,6 +37,29 @@
 ]
 
 # Optionally export Luna-2 types when available
+try:
+    from agent_control_evaluator_galileo.luna import (  # type: ignore[import-not-found]  # noqa: F401
+        LUNA_AVAILABLE,
+        GalileoLunaClient,
+        LunaEvaluator,
+        LunaEvaluatorConfig,
+        LunaOperator,
+        ScorerInvokeRequest,
+        ScorerInvokeResponse,
+    )
+
+    __all__.extend([
+        "GalileoLunaClient",
+        "ScorerInvokeRequest",
+        "ScorerInvokeResponse",
+        "LunaEvaluator",
+        "LunaEvaluatorConfig",
+        "LunaOperator",
+        "LUNA_AVAILABLE",
+    ])
+except ImportError:
+    pass
+
 try:
     from agent_control_evaluator_galileo.luna2 import (  # type: ignore[import-not-found]  # noqa: F401
         LUNA2_AVAILABLE,

From 8d2227d1f1be404bb71bd1511658d1e774b7844f Mon Sep 17 00:00:00 2001
From: "namrata.ghadi" <namrata.ghadi@galileo.ai>
Date: Thu, 7 May 2026 16:51:42 -0700
Subject: [PATCH 02/11] fix the url

---
 .../luna/client.py                            |  9 ++++++-
 .../galileo/tests/test_luna_evaluator.py      | 26 +++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
index e1638ae3..269d64fc 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
@@ -113,6 +113,7 @@ def __init__(
         self,
         api_key: str | None = None,
         console_url: str | None = None,
+        api_url: str | None = None,
     ) -> None:
         """Initialize the Galileo Luna client.
 
@@ -120,6 +121,8 @@ def __init__(
             api_key: Galileo API key. If not provided, reads from GALILEO_API_KEY.
             console_url: Galileo Console URL. If not provided, reads from
                 GALILEO_CONSOLE_URL or uses the production console URL.
+            api_url: Galileo API URL. If not provided, reads from GALILEO_API_URL
+                before deriving from the console URL.
 
         Raises:
             ValueError: If no API key is provided or found in the environment.
@@ -135,7 +138,9 @@ def __init__(
         self.console_url = (
             console_url or os.getenv("GALILEO_CONSOLE_URL") or "https://console.galileo.ai"
         )
-        self.api_base = self._derive_api_url(self.console_url)
+        self.api_base = (api_url or os.getenv("GALILEO_API_URL") or "").rstrip(
+            "/"
+        ) or self._derive_api_url(self.console_url)
         self._client: httpx.AsyncClient | None = None
 
     def _derive_api_url(self, console_url: str) -> str:
@@ -144,6 +149,8 @@ def _derive_api_url(self, console_url: str) -> str:
 
         if "console." in url:
             return url.replace("console.", "api.")
+        if "console-" in url:
+            return url.replace("console-", "api-", 1)
 
         if url.startswith("https://"):
             return url.replace("https://", "https://api.")
diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
index 6ca0dced..1b7e700e 100644
--- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py
+++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
@@ -57,6 +57,32 @@ def test_client_uses_protect_api_url_derivation(self) -> None:
         # Then: the API URL is derived the same way
         assert client.api_base == "https://api.demo-v2.galileocloud.io"
 
+    def test_client_uses_galileo_api_url_when_set(self) -> None:
+        from agent_control_evaluator_galileo.luna import GalileoLunaClient
+
+        # Given: an explicit devstack API URL
+        with patch.dict(
+            os.environ,
+            {
+                "GALILEO_API_KEY": "test-key",
+                "GALILEO_API_URL": "https://api-test-luna.gcp-dev.galileo.ai/",
+            },
+        ):
+            client = GalileoLunaClient(console_url="https://console-test-luna.gcp-dev.galileo.ai")
+
+        # Then: the explicit API URL wins over console URL derivation
+        assert client.api_base == "https://api-test-luna.gcp-dev.galileo.ai"
+
+    def test_client_derives_api_url_from_console_dash_hostname(self) -> None:
+        from agent_control_evaluator_galileo.luna import GalileoLunaClient
+
+        # Given: a console-<stack> devstack hostname
+        with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=False):
+            client = GalileoLunaClient(console_url="https://console-test-luna.gcp-dev.galileo.ai")
+
+        # Then: the matching api-<stack> hostname is used
+        assert client.api_base == "https://api-test-luna.gcp-dev.galileo.ai"
+
     @pytest.mark.asyncio
     async def test_client_posts_to_scorers_invoke_without_protect_fields(self) -> None:
         from agent_control_evaluator_galileo.luna import GalileoLunaClient

From 0cce0bf806123843b50a72cec7ec0da6dd0c02be Mon Sep 17 00:00:00 2001
From: "namrata.ghadi" <namrata.ghadi@galileo.ai>
Date: Tue, 12 May 2026 10:38:44 -0700
Subject: [PATCH 03/11] feat(galileo): support internal scorer auth

---
 .../luna/client.py                            | 93 +++++++++++++++---
 .../luna/evaluator.py                         | 14 ++-
 .../galileo/tests/test_luna_evaluator.py      | 95 ++++++++++++++++++-
 3 files changed, 179 insertions(+), 23 deletions(-)

diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
index 269d64fc..e75b74bf 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
@@ -4,7 +4,12 @@
 
 import logging
 import os
+from base64 import urlsafe_b64encode
 from dataclasses import dataclass, field
+from hashlib import sha256
+from hmac import new as hmac_new
+from json import dumps
+from time import time
 from uuid import UUID
 
 import httpx
@@ -13,6 +18,38 @@
 logger = logging.getLogger(__name__)
 
 DEFAULT_TIMEOUT_SECS = 10.0
+DEFAULT_INTERNAL_TOKEN_TTL_SECS = 3600
+PUBLIC_SCORER_INVOKE_PATH = "/scorers/invoke"
+INTERNAL_SCORER_INVOKE_PATH = "/internal/scorers/invoke"
+
+
+def _b64url(data: bytes) -> str:
+    return urlsafe_b64encode(data).rstrip(b"=").decode("ascii")
+
+
+def _internal_auth_token(
+    api_secret: str,
+    project_id: str | UUID,
+    ttl_seconds: int = DEFAULT_INTERNAL_TOKEN_TTL_SECS,
+) -> str:
+    """Create the internal JWT expected by Galileo API internal routes."""
+    now = int(time())
+    header = {"alg": "HS256", "typ": "JWT"}
+    payload = {
+        "internal": True,
+        "project_id": str(project_id),
+        "scope": "scorers.invoke",
+        "iat": now,
+        "exp": now + ttl_seconds,
+    }
+    signing_input = ".".join(
+        [
+            _b64url(dumps(header, separators=(",", ":")).encode("utf-8")),
+            _b64url(dumps(payload, separators=(",", ":")).encode("utf-8")),
+        ]
+    )
+    signature = hmac_new(api_secret.encode("utf-8"), signing_input.encode("ascii"), sha256).digest()
+    return f"{signing_input}.{_b64url(signature)}"
 
 
 def _as_float_or_none(value: JSONValue) -> float | None:
@@ -33,7 +70,7 @@ class ScorerInvokeRequest:
     """Request payload for Galileo Luna scorer invocation.
 
     Attributes:
-        metric: Preset, registered, or fine-tuned scorer name.
+        metric: Preset, registered, or fine-tuned scorer label.
         input: Optional user/system prompt text.
         output: Optional model response text.
         luna_model: Optional Luna model override.
@@ -50,7 +87,7 @@ class ScorerInvokeRequest:
 
     def to_dict(self) -> JSONObject:
         """Convert to the public API request shape."""
-        body: JSONObject = {"metric": self.metric}
+        body: JSONObject = {"scorer_label": self.metric}
         if self.input is not None:
             body["input"] = self.input
         if self.output is not None:
@@ -87,7 +124,7 @@ class ScorerInvokeResponse:
     @classmethod
     def from_dict(cls, data: JSONObject) -> ScorerInvokeResponse:
         """Create a response model from the API JSON object."""
-        metric_value = data.get("metric", "")
+        metric_value = data.get("scorer_label", data.get("metric", ""))
         status_value = data.get("status", "unknown")
         error_value = data.get("error_message")
 
@@ -105,13 +142,15 @@ class GalileoLunaClient:
     """Thin HTTP client for Galileo Luna direct scorer invocation.
 
     Environment Variables:
-        GALILEO_API_KEY: Galileo API key (required).
+        GALILEO_API_SECRET_KEY or GALILEO_API_SECRET: Galileo API internal JWT signing secret.
+        GALILEO_API_KEY: Galileo API key fallback for public scorer invocation.
         GALILEO_CONSOLE_URL: Galileo Console URL (optional, defaults to production).
     """
 
     def __init__(
         self,
         api_key: str | None = None,
+        api_secret: str | None = None,
         console_url: str | None = None,
         api_url: str | None = None,
     ) -> None:
@@ -119,22 +158,28 @@ def __init__(
 
         Args:
             api_key: Galileo API key. If not provided, reads from GALILEO_API_KEY.
+            api_secret: Galileo API secret for internal JWT auth. If not provided,
+                reads from GALILEO_API_SECRET_KEY or GALILEO_API_SECRET.
             console_url: Galileo Console URL. If not provided, reads from
                 GALILEO_CONSOLE_URL or uses the production console URL.
             api_url: Galileo API URL. If not provided, reads from GALILEO_API_URL
                 before deriving from the console URL.
 
         Raises:
-            ValueError: If no API key is provided or found in the environment.
+            ValueError: If neither API secret nor API key is provided.
         """
+        resolved_api_secret = (
+            api_secret or os.getenv("GALILEO_API_SECRET_KEY") or os.getenv("GALILEO_API_SECRET")
+        )
         resolved_api_key = api_key or os.getenv("GALILEO_API_KEY")
-        if not resolved_api_key:
+        if not resolved_api_secret and not resolved_api_key:
             raise ValueError(
-                "GALILEO_API_KEY is required. "
-                "Set it as an environment variable or pass it to the constructor."
+                "GALILEO_API_SECRET_KEY or GALILEO_API_KEY is required. "
+                "Set one as an environment variable or pass it to the constructor."
             )
 
         self.api_key = resolved_api_key
+        self.api_secret = resolved_api_secret
         self.console_url = (
             console_url or os.getenv("GALILEO_CONSOLE_URL") or "https://console.galileo.ai"
         )
@@ -162,15 +207,34 @@ def _derive_api_url(self, console_url: str) -> str:
     async def _get_client(self) -> httpx.AsyncClient:
         """Get or create the HTTP client."""
         if self._client is None or self._client.is_closed:
+            headers = {"Content-Type": "application/json"}
+            if self.api_secret is None and self.api_key is not None:
+                headers["Galileo-API-Key"] = self.api_key
             self._client = httpx.AsyncClient(
-                headers={
-                    "Galileo-API-Key": self.api_key,
-                    "Content-Type": "application/json",
-                },
+                headers=headers,
                 timeout=httpx.Timeout(DEFAULT_TIMEOUT_SECS),
             )
         return self._client
 
+    def _endpoint_and_headers(
+        self,
+        project_id: str | UUID | None,
+        headers: dict[str, str] | None,
+    ) -> tuple[str, dict[str, str]]:
+        request_headers = dict(headers or {})
+        if self.api_secret is None:
+            return f"{self.api_base}{PUBLIC_SCORER_INVOKE_PATH}", request_headers
+
+        if project_id is None:
+            raise ValueError(
+                "project_id is required when using GALILEO_API_SECRET_KEY internal auth."
+            )
+
+        request_headers["Authorization"] = (
+            f"Bearer {_internal_auth_token(self.api_secret, project_id)}"
+        )
+        return f"{self.api_base}{INTERNAL_SCORER_INVOKE_PATH}", request_headers
+
     async def invoke(
         self,
         *,
@@ -186,7 +250,7 @@ async def invoke(
         """Invoke a Galileo Luna scorer.
 
         Args:
-            metric: Preset, registered, or fine-tuned scorer name.
+            metric: Preset, registered, or fine-tuned scorer label.
             input: Optional user/system prompt text.
             output: Optional model response text.
             project_id: Optional Galileo project UUID for project-scoped scorer resolution.
@@ -215,8 +279,7 @@ async def invoke(
             luna_model=luna_model,
             config=config,
         ).to_dict()
-        request_headers = dict(headers or {})
-        endpoint = f"{self.api_base}/scorers/invoke"
+        endpoint, request_headers = self._endpoint_and_headers(project_id, headers)
 
         logger.debug("[GalileoLunaClient] POST %s", endpoint)
         logger.debug("[GalileoLunaClient] Request body: %s", request_body)
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
index 16a39930..f628cd8e 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
@@ -101,12 +101,18 @@ def __init__(self, config: LunaEvaluatorConfig) -> None:
             config: Validated LunaEvaluatorConfig instance.
 
         Raises:
-            ValueError: If GALILEO_API_KEY is not set.
+            ValueError: If neither GALILEO_API_SECRET_KEY nor GALILEO_API_KEY is set.
         """
-        if not os.getenv("GALILEO_API_KEY"):
+        has_auth = (
+            os.getenv("GALILEO_API_SECRET_KEY")
+            or os.getenv("GALILEO_API_SECRET")
+            or os.getenv("GALILEO_API_KEY")
+        )
+        if not has_auth:
             raise ValueError(
-                "GALILEO_API_KEY environment variable must be set. "
-                "Set it to a Galileo API key before using galileo.luna."
+                "GALILEO_API_SECRET_KEY or GALILEO_API_KEY environment variable must be set. "
+                "Set an API secret for internal auth or a Galileo API key before using "
+                "galileo.luna."
             )
 
         super().__init__(config)
diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
index 1b7e700e..53cf58ae 100644
--- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py
+++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
@@ -4,6 +4,7 @@
 
 import json
 import os
+from base64 import urlsafe_b64decode
 from unittest.mock import AsyncMock, patch
 
 import httpx
@@ -12,6 +13,12 @@
 from pydantic import ValidationError
 
 
+def _decode_jwt_payload(token: str) -> dict[str, object]:
+    payload_segment = token.split(".")[1]
+    padded = payload_segment + ("=" * (-len(payload_segment) % 4))
+    return json.loads(urlsafe_b64decode(padded.encode()).decode())
+
+
 class TestLunaEvaluatorConfig:
     """Tests for direct Luna evaluator configuration."""
 
@@ -96,7 +103,7 @@ def handler(request: httpx.Request) -> httpx.Response:
             return httpx.Response(
                 200,
                 json={
-                    "metric": "toxicity",
+                    "scorer_label": "toxicity",
                     "score": 0.82,
                     "status": "success",
                     "execution_time": 0.12,
@@ -133,7 +140,7 @@ def handler(request: httpx.Request) -> httpx.Response:
         assert captured["body"] == {
             "input": "user prompt",
             "output": "model answer",
-            "metric": "toxicity",
+            "scorer_label": "toxicity",
             "project_id": "12345678-1234-5678-1234-567812345678",
             "luna_model": "luna-2",
             "config": {"top_k": 1},
@@ -144,6 +151,72 @@ def handler(request: httpx.Request) -> httpx.Response:
         assert isinstance(headers, dict)
         assert headers["galileo-api-key"] == "test-key"
 
+    @pytest.mark.asyncio
+    async def test_client_uses_internal_jwt_when_api_secret_is_set(self) -> None:
+        from agent_control_evaluator_galileo.luna import GalileoLunaClient
+
+        captured: dict[str, object] = {}
+
+        def handler(request: httpx.Request) -> httpx.Response:
+            captured["url"] = str(request.url)
+            captured["headers"] = dict(request.headers)
+            captured["body"] = json.loads(request.content.decode())
+            return httpx.Response(
+                200,
+                json={
+                    "scorer_label": "toxicity",
+                    "score": 0.82,
+                    "status": "success",
+                    "execution_time": 0.12,
+                },
+            )
+
+        # Given: a Luna client configured with the Galileo API internal secret
+        with patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True):
+            client = GalileoLunaClient(api_url="https://api.default.svc.cluster.local:8088")
+        client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler))
+
+        try:
+            # When: invoking a scorer with project context
+            response = await client.invoke(
+                metric="toxicity",
+                output="model answer",
+                project_id="12345678-1234-5678-1234-567812345678",
+            )
+        finally:
+            await client.close()
+
+        # Then: the internal scorer endpoint is called with a project-bound JWT
+        assert response.score == 0.82
+        assert captured["url"] == "https://api.default.svc.cluster.local:8088/internal/scorers/invoke"
+        assert captured["body"] == {
+            "output": "model answer",
+            "scorer_label": "toxicity",
+            "project_id": "12345678-1234-5678-1234-567812345678",
+        }
+        headers = captured["headers"]
+        assert isinstance(headers, dict)
+        assert "galileo-api-key" not in headers
+        auth_header = headers["authorization"]
+        assert isinstance(auth_header, str)
+        assert auth_header.startswith("Bearer ")
+        token_payload = _decode_jwt_payload(auth_header.removeprefix("Bearer "))
+        assert token_payload["internal"] is True
+        assert token_payload["project_id"] == "12345678-1234-5678-1234-567812345678"
+        assert token_payload["scope"] == "scorers.invoke"
+
+    @pytest.mark.asyncio
+    async def test_client_requires_project_id_for_internal_jwt(self) -> None:
+        from agent_control_evaluator_galileo.luna import GalileoLunaClient
+
+        # Given: a Luna client configured with internal JWT auth
+        with patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True):
+            client = GalileoLunaClient(api_url="https://api.default.svc.cluster.local:8088")
+
+        # When/Then: project_id is required because API uses it as the internal auth context
+        with pytest.raises(ValueError, match="project_id is required"):
+            await client.invoke(metric="toxicity", output="model answer")
+
 
 class TestLunaEvaluator:
     """Tests for direct Luna evaluator behavior."""
@@ -156,12 +229,26 @@ def test_evaluator_metadata(self) -> None:
         assert LunaEvaluator.metadata.requires_api_key is True
 
     @patch.dict(os.environ, {}, clear=True)
-    def test_evaluator_init_without_api_key_raises(self) -> None:
+    def test_evaluator_init_without_auth_raises(self) -> None:
         from agent_control_evaluator_galileo.luna import LunaEvaluator
 
-        with pytest.raises(ValueError, match="GALILEO_API_KEY"):
+        with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY or GALILEO_API_KEY"):
             LunaEvaluator.from_dict({"metric": "toxicity", "threshold": 0.5})
 
+    @patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True)
+    def test_evaluator_init_accepts_api_secret(self) -> None:
+        from agent_control_evaluator_galileo.luna import LunaEvaluator
+
+        evaluator = LunaEvaluator.from_dict(
+            {
+                "metric": "toxicity",
+                "project_id": "12345678-1234-5678-1234-567812345678",
+                "threshold": 0.5,
+            }
+        )
+
+        assert str(evaluator.config.project_id) == "12345678-1234-5678-1234-567812345678"
+
     @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
     @pytest.mark.asyncio
     async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None:

From dd252be06b80c464b9c13929af166dd669cf235d Mon Sep 17 00:00:00 2001
From: "namrata.ghadi" <namrata.ghadi@galileo.ai>
Date: Tue, 12 May 2026 10:49:41 -0700
Subject: [PATCH 04/11] add auth and update schema

---
 .../luna/client.py                            | 53 +++++++++----------
 .../luna/config.py                            |  2 -
 .../luna/evaluator.py                         |  1 -
 .../galileo/tests/test_luna_evaluator.py      | 35 +++++++++---
 4 files changed, 55 insertions(+), 36 deletions(-)

diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
index e75b74bf..6786c5e8 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
@@ -10,10 +10,12 @@
 from hmac import new as hmac_new
 from json import dumps
 from time import time
+from typing import Literal
 from uuid import UUID
 
 import httpx
 from agent_control_models import JSONObject, JSONValue
+from pydantic import BaseModel, Field, model_validator
 
 logger = logging.getLogger(__name__)
 
@@ -65,40 +67,37 @@ def _as_float_or_none(value: JSONValue) -> float | None:
     return None
 
 
-@dataclass(frozen=True)
-class ScorerInvokeRequest:
+ScorerStepType = Literal["session", "trace", "span"]
+
+
+class ScorerInvokeRequest(BaseModel):
     """Request payload for Galileo Luna scorer invocation.
 
     Attributes:
-        metric: Preset, registered, or fine-tuned scorer label.
+        step_type: Runtime step shape used by Galileo scorer input normalization.
         input: Optional user/system prompt text.
         output: Optional model response text.
-        luna_model: Optional Luna model override.
+        scorer_label: Preset, registered, or fine-tuned scorer label.
         project_id: Optional Galileo project UUID for project-scoped scorer resolution.
         config: Optional scorer-specific configuration.
     """
 
-    metric: str
-    input: str | None = None
-    output: str | None = None
+    step_type: ScorerStepType = Field(default="span")
+    input: JSONValue = None
+    output: JSONValue = None
+    scorer_label: str = Field(min_length=1)
     project_id: str | UUID | None = None
-    luna_model: str | None = None
     config: JSONObject | None = None
 
+    @model_validator(mode="after")
+    def ensure_input_or_output(self) -> ScorerInvokeRequest:
+        if self.input is None and self.output is None:
+            raise ValueError("Either input or output must be set.")
+        return self
+
     def to_dict(self) -> JSONObject:
-        """Convert to the public API request shape."""
-        body: JSONObject = {"scorer_label": self.metric}
-        if self.input is not None:
-            body["input"] = self.input
-        if self.output is not None:
-            body["output"] = self.output
-        if self.project_id is not None:
-            body["project_id"] = str(self.project_id)
-        if self.luna_model is not None:
-            body["luna_model"] = self.luna_model
-        if self.config is not None:
-            body["config"] = self.config
-        return body
+        """Convert to the Galileo scorer invoke API request shape."""
+        return self.model_dump(mode="json", exclude_none=True)
 
 
 @dataclass
@@ -239,10 +238,10 @@ async def invoke(
         self,
         *,
         metric: str,
-        input: str | None = None,
-        output: str | None = None,
+        input: JSONValue = None,
+        output: JSONValue = None,
+        step_type: ScorerStepType = "span",
         project_id: str | UUID | None = None,
-        luna_model: str | None = None,
         config: JSONObject | None = None,
         timeout: float = DEFAULT_TIMEOUT_SECS,
         headers: dict[str, str] | None = None,
@@ -253,8 +252,8 @@ async def invoke(
             metric: Preset, registered, or fine-tuned scorer label.
             input: Optional user/system prompt text.
             output: Optional model response text.
+            step_type: Runtime step shape used by Galileo scorer input normalization.
             project_id: Optional Galileo project UUID for project-scoped scorer resolution.
-            luna_model: Optional Luna model override.
             config: Optional scorer-specific configuration.
             timeout: Request timeout in seconds.
             headers: Additional request headers.
@@ -272,11 +271,11 @@ async def invoke(
             raise ValueError("At least one of input or output must be provided.")
 
         request_body = ScorerInvokeRequest(
-            metric=metric,
+            scorer_label=metric,
             input=input,
             output=output,
+            step_type=step_type,
             project_id=project_id,
-            luna_model=luna_model,
             config=config,
         ).to_dict()
         endpoint, request_headers = self._endpoint_and_headers(project_id, headers)
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
index 241e040f..3bcc34a3 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
@@ -36,7 +36,6 @@ class LunaEvaluatorConfig(EvaluatorConfig):
         project_id: Optional Galileo project UUID for project-scoped scorer resolution.
         threshold: Local threshold used by the evaluator for comparison.
         operator: Local comparison operator. Numeric operators use threshold as a number.
-        luna_model: Optional Luna model override sent to Galileo.
         scorer_config: Optional scorer-specific config sent as ``config``.
         timeout_ms: Request timeout in milliseconds.
         on_error: Error policy: allow=fail open, deny=fail closed.
@@ -58,7 +57,6 @@ class LunaEvaluatorConfig(EvaluatorConfig):
         default="gte",
         description="Local comparison operator applied to the raw Luna score.",
     )
-    luna_model: str | None = Field(default=None, description="Optional Luna model override")
     scorer_config: JSONObject | None = Field(
         default=None,
         alias="config",
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
index f628cd8e..8afea45d 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
@@ -199,7 +199,6 @@ async def evaluate(self, data: Any) -> EvaluatorResult:
                 input=input_text if _has_text(input_text) else None,
                 output=output_text if _has_text(output_text) else None,
                 project_id=self.config.project_id,
-                luna_model=self.config.luna_model,
                 config=self.config.scorer_config,
                 timeout=self.get_timeout_seconds(),
             )
diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
index 53cf58ae..58bd201b 100644
--- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py
+++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
@@ -31,7 +31,6 @@ def test_config_accepts_direct_scorer_fields(self) -> None:
             project_id="12345678-1234-5678-1234-567812345678",
             threshold=0.7,
             operator="gte",
-            luna_model="luna-2",
             config={"temperature": 0},
         )
 
@@ -40,7 +39,6 @@ def test_config_accepts_direct_scorer_fields(self) -> None:
         assert str(config.project_id) == "12345678-1234-5678-1234-567812345678"
         assert config.threshold == 0.7
         assert config.operator == "gte"
-        assert config.luna_model == "luna-2"
         assert config.scorer_config == {"temperature": 0}
 
     def test_numeric_operator_requires_numeric_threshold(self) -> None:
@@ -54,6 +52,33 @@ def test_numeric_operator_requires_numeric_threshold(self) -> None:
 class TestGalileoLunaClient:
     """Tests for the GalileoLunaClient HTTP contract."""
 
+    def test_scorer_invoke_request_matches_orbit_schema_shape(self) -> None:
+        from agent_control_evaluator_galileo.luna import ScorerInvokeRequest
+
+        # Given: a scorer request with project context and scorer config
+        request = ScorerInvokeRequest(
+            scorer_label="toxicity",
+            input={"messages": [{"role": "user", "content": "hello"}]},
+            project_id="12345678-1234-5678-1234-567812345678",
+            config={"top_k": 1},
+        )
+
+        # Then: the serialized payload uses the Orbit scorer invoke fields
+        assert request.to_dict() == {
+            "step_type": "span",
+            "input": {"messages": [{"role": "user", "content": "hello"}]},
+            "scorer_label": "toxicity",
+            "project_id": "12345678-1234-5678-1234-567812345678",
+            "config": {"top_k": 1},
+        }
+
+    def test_scorer_invoke_request_requires_input_or_output(self) -> None:
+        from agent_control_evaluator_galileo.luna import ScorerInvokeRequest
+
+        # Given/When/Then: the request mirrors Orbit validation
+        with pytest.raises(ValidationError, match="Either input or output must be set"):
+            ScorerInvokeRequest(scorer_label="toxicity")
+
     def test_client_uses_protect_api_url_derivation(self) -> None:
         from agent_control_evaluator_galileo.luna import GalileoLunaClient
 
@@ -128,7 +153,6 @@ def handler(request: httpx.Request) -> httpx.Response:
                 input="user prompt",
                 output="model answer",
                 project_id="12345678-1234-5678-1234-567812345678",
-                luna_model="luna-2",
                 config={"top_k": 1},
             )
         finally:
@@ -142,7 +166,7 @@ def handler(request: httpx.Request) -> httpx.Response:
             "output": "model answer",
             "scorer_label": "toxicity",
             "project_id": "12345678-1234-5678-1234-567812345678",
-            "luna_model": "luna-2",
+            "step_type": "span",
             "config": {"top_k": 1},
         }
         assert "stage_name" not in captured["body"]
@@ -193,6 +217,7 @@ def handler(request: httpx.Request) -> httpx.Response:
             "output": "model answer",
             "scorer_label": "toxicity",
             "project_id": "12345678-1234-5678-1234-567812345678",
+            "step_type": "span",
         }
         headers = captured["headers"]
         assert isinstance(headers, dict)
@@ -301,7 +326,6 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None:
             input="user prompt",
             output="model answer",
             project_id=evaluator.config.project_id,
-            luna_model=None,
             config=None,
             timeout=5.0,
         )
@@ -335,7 +359,6 @@ async def test_evaluator_returns_non_match_below_threshold(self) -> None:
             input="hello",
             output=None,
             project_id=None,
-            luna_model=None,
             config=None,
             timeout=10.0,
         )

From 74fcbeb4ce6fd91d3c861daf2b60f6d9e1ffe297 Mon Sep 17 00:00:00 2001
From: "namrata.ghadi" <namrata.ghadi@galileo.ai>
Date: Tue, 12 May 2026 11:11:57 -0700
Subject: [PATCH 05/11] fix(galileo): align luna scorer response schema

---
 .../luna/client.py                            | 44 +++++++++++--------
 .../luna/evaluator.py                         |  2 +-
 .../galileo/tests/test_luna_evaluator.py      | 42 +++++++++++++++++-
 3 files changed, 66 insertions(+), 22 deletions(-)

diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
index 6786c5e8..effc132a 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
@@ -5,7 +5,6 @@
 import logging
 import os
 from base64 import urlsafe_b64encode
-from dataclasses import dataclass, field
 from hashlib import sha256
 from hmac import new as hmac_new
 from json import dumps
@@ -15,7 +14,7 @@
 
 import httpx
 from agent_control_models import JSONObject, JSONValue
-from pydantic import BaseModel, Field, model_validator
+from pydantic import BaseModel, Field, PrivateAttr, model_validator
 
 logger = logging.getLogger(__name__)
 
@@ -100,41 +99,48 @@ def to_dict(self) -> JSONObject:
         return self.model_dump(mode="json", exclude_none=True)
 
 
-@dataclass
-class ScorerInvokeResponse:
+class ScorerInvokeResponse(BaseModel):
     """Response from Galileo Luna scorer invocation.
 
     Attributes:
-        metric: Echoed scorer metric.
+        scorer_label: Echoed scorer label.
         score: Raw scorer value.
         status: Invocation status.
         execution_time: Execution time in seconds, when returned.
         error_message: Error detail for non-success statuses.
-        raw_response: Full response body for diagnostics.
     """
 
-    metric: str
+    scorer_label: str
     score: JSONValue
     status: str = "unknown"
     execution_time: float | None = None
     error_message: str | None = None
-    raw_response: JSONObject = field(default_factory=dict)
+    _raw_response: JSONObject = PrivateAttr(default_factory=dict)
+
+    @model_validator(mode="before")
+    @classmethod
+    def allow_legacy_metric_response(cls, data: object) -> object:
+        if isinstance(data, dict) and "scorer_label" not in data and "metric" in data:
+            return data | {"scorer_label": data["metric"]}
+        return data
+
+    @property
+    def metric(self) -> str:
+        """Backward-compatible alias for existing evaluator metadata code."""
+        return self.scorer_label
+
+    @property
+    def raw_response(self) -> JSONObject:
+        return self._raw_response
 
     @classmethod
     def from_dict(cls, data: JSONObject) -> ScorerInvokeResponse:
         """Create a response model from the API JSON object."""
-        metric_value = data.get("scorer_label", data.get("metric", ""))
-        status_value = data.get("status", "unknown")
-        error_value = data.get("error_message")
-
-        return cls(
-            metric=str(metric_value) if metric_value is not None else "",
-            score=data.get("score"),
-            status=str(status_value) if status_value is not None else "unknown",
-            execution_time=_as_float_or_none(data.get("execution_time")),
-            error_message=str(error_value) if error_value is not None else None,
-            raw_response=data,
+        response = cls.model_validate(
+            data | {"execution_time": _as_float_or_none(data.get("execution_time"))}
         )
+        response._raw_response = data
+        return response
 
 
 class GalileoLunaClient:
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
index 8afea45d..9db2f60d 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
@@ -227,7 +227,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult:
 
     def _metadata(self, response: ScorerInvokeResponse) -> dict[str, Any]:
         metadata: dict[str, Any] = {
-            "metric": response.metric or self.config.metric,
+            "metric": response.scorer_label or self.config.metric,
             "project_id": str(self.config.project_id) if self.config.project_id else None,
             "score": response.score,
             "threshold": self.config.threshold,
diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
index 58bd201b..de9da5af 100644
--- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py
+++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
@@ -79,6 +79,44 @@ def test_scorer_invoke_request_requires_input_or_output(self) -> None:
         with pytest.raises(ValidationError, match="Either input or output must be set"):
             ScorerInvokeRequest(scorer_label="toxicity")
 
+    def test_scorer_invoke_response_matches_orbit_schema_shape(self) -> None:
+        from agent_control_evaluator_galileo.luna import ScorerInvokeResponse
+
+        # Given: an API scorer invoke response
+        response = ScorerInvokeResponse.from_dict(
+            {
+                "scorer_label": "toxicity",
+                "score": 0.82,
+                "status": "success",
+                "execution_time": 0.12,
+                "error_message": None,
+            }
+        )
+
+        # Then: the model exposes the Orbit/API response fields
+        assert response.model_dump() == {
+            "scorer_label": "toxicity",
+            "score": 0.82,
+            "status": "success",
+            "execution_time": 0.12,
+            "error_message": None,
+        }
+        assert response.scorer_label == "toxicity"
+        assert response.metric == "toxicity"
+        assert response.raw_response["scorer_label"] == "toxicity"
+
+    def test_scorer_invoke_response_accepts_legacy_metric_field(self) -> None:
+        from agent_control_evaluator_galileo.luna import ScorerInvokeResponse
+
+        # Given/When: an older API response uses metric instead of scorer_label
+        response = ScorerInvokeResponse.from_dict(
+            {"metric": "toxicity", "score": 0.82, "status": "success"}
+        )
+
+        # Then: the client still normalizes it to the current response contract
+        assert response.scorer_label == "toxicity"
+        assert response.model_dump()["scorer_label"] == "toxicity"
+
     def test_client_uses_protect_api_url_derivation(self) -> None:
         from agent_control_evaluator_galileo.luna import GalileoLunaClient
 
@@ -293,7 +331,7 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None:
 
         with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
             mock_invoke.return_value = ScorerInvokeResponse(
-                metric="toxicity",
+                scorer_label="toxicity",
                 score=0.82,
                 status="success",
                 execution_time=0.1,
@@ -343,7 +381,7 @@ async def test_evaluator_returns_non_match_below_threshold(self) -> None:
 
         with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
             mock_invoke.return_value = ScorerInvokeResponse(
-                metric="toxicity",
+                scorer_label="toxicity",
                 score=0.2,
                 status="success",
             )

From 7b0a15d2b6d8b8a98a38d311c4818016e92ae394 Mon Sep 17 00:00:00 2001
From: "namrata.ghadi" <namrata.ghadi@galileo.ai>
Date: Wed, 13 May 2026 12:01:56 -0700
Subject: [PATCH 06/11] update the schemas and corresponding tests

---
 .../luna/client.py                            | 30 ++++-------
 .../luna/config.py                            |  6 +--
 .../luna/evaluator.py                         | 10 ++--
 .../galileo/tests/test_luna_evaluator.py      | 51 +++++++------------
 examples/galileo_luna/README.md               |  2 +-
 examples/galileo_luna/setup_controls.py       |  6 +--
 6 files changed, 40 insertions(+), 65 deletions(-)

diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
index effc132a..426b1782 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
@@ -66,14 +66,14 @@ def _as_float_or_none(value: JSONValue) -> float | None:
     return None
 
 
-ScorerStepType = Literal["session", "trace", "span"]
+RootType = Literal["session", "trace", "span"]
 
 
 class ScorerInvokeRequest(BaseModel):
     """Request payload for Galileo Luna scorer invocation.
 
     Attributes:
-        step_type: Runtime step shape used by Galileo scorer input normalization.
+        root_type: Runtime step shape used by Galileo scorer input normalization.
         input: Optional user/system prompt text.
         output: Optional model response text.
         scorer_label: Preset, registered, or fine-tuned scorer label.
@@ -81,7 +81,7 @@ class ScorerInvokeRequest(BaseModel):
         config: Optional scorer-specific configuration.
     """
 
-    step_type: ScorerStepType = Field(default="span")
+    root_type: RootType = Field(default="span")
     input: JSONValue = None
     output: JSONValue = None
     scorer_label: str = Field(min_length=1)
@@ -117,18 +117,6 @@ class ScorerInvokeResponse(BaseModel):
     error_message: str | None = None
     _raw_response: JSONObject = PrivateAttr(default_factory=dict)
 
-    @model_validator(mode="before")
-    @classmethod
-    def allow_legacy_metric_response(cls, data: object) -> object:
-        if isinstance(data, dict) and "scorer_label" not in data and "metric" in data:
-            return data | {"scorer_label": data["metric"]}
-        return data
-
-    @property
-    def metric(self) -> str:
-        """Backward-compatible alias for existing evaluator metadata code."""
-        return self.scorer_label
-
     @property
     def raw_response(self) -> JSONObject:
         return self._raw_response
@@ -243,10 +231,10 @@ def _endpoint_and_headers(
     async def invoke(
         self,
         *,
-        metric: str,
+        scorer_label: str,
         input: JSONValue = None,
         output: JSONValue = None,
-        step_type: ScorerStepType = "span",
+        root_type: RootType = "span",
         project_id: str | UUID | None = None,
         config: JSONObject | None = None,
         timeout: float = DEFAULT_TIMEOUT_SECS,
@@ -255,10 +243,10 @@ async def invoke(
         """Invoke a Galileo Luna scorer.
 
         Args:
-            metric: Preset, registered, or fine-tuned scorer label.
+            scorer_label: Preset, registered, or fine-tuned scorer label.
             input: Optional user/system prompt text.
             output: Optional model response text.
-            step_type: Runtime step shape used by Galileo scorer input normalization.
+            root_type: Runtime step shape used by Galileo scorer input normalization.
             project_id: Optional Galileo project UUID for project-scoped scorer resolution.
             config: Optional scorer-specific configuration.
             timeout: Request timeout in seconds.
@@ -277,10 +265,10 @@ async def invoke(
             raise ValueError("At least one of input or output must be provided.")
 
         request_body = ScorerInvokeRequest(
-            scorer_label=metric,
+            scorer_label=scorer_label,
             input=input,
             output=output,
-            step_type=step_type,
+            root_type=root_type,
             project_id=project_id,
             config=config,
         ).to_dict()
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
index 3bcc34a3..1e41a554 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
@@ -32,7 +32,7 @@ class LunaEvaluatorConfig(EvaluatorConfig):
     """Configuration for direct Luna scorer evaluation.
 
     Attributes:
-        metric: Preset, registered, or fine-tuned scorer name.
+        scorer_label: Preset, registered, or fine-tuned scorer label.
         project_id: Optional Galileo project UUID for project-scoped scorer resolution.
         threshold: Local threshold used by the evaluator for comparison.
         operator: Local comparison operator. Numeric operators use threshold as a number.
@@ -40,11 +40,11 @@ class LunaEvaluatorConfig(EvaluatorConfig):
         timeout_ms: Request timeout in milliseconds.
         on_error: Error policy: allow=fail open, deny=fail closed.
         payload_field: Force selected data into input or output. If omitted, root step
-            payloads with input/output use both fields; scalar data is inferred from metric name.
+            payloads with input/output use both fields; scalar data is inferred from scorer label.
         include_raw_response: Include the raw API response in EvaluatorResult metadata.
     """
 
-    metric: str = Field(..., min_length=1, description="Luna metric/scorer name to evaluate")
+    scorer_label: str = Field(..., min_length=1, description="Luna scorer label to invoke")
     project_id: UUID | None = Field(
         default=None,
         description="Optional Galileo project UUID for project-scoped scorer resolution.",
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
index 9db2f60d..a5b3f248 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
@@ -139,7 +139,7 @@ def _prepare_payload(self, data: Any) -> tuple[str | None, str | None]:
                 return input_text, output_text
 
         text = _coerce_payload_text(data)
-        if "output" in self.config.metric:
+        if "output" in self.config.scorer_label:
             return None, text
         return text, None
 
@@ -190,12 +190,12 @@ async def evaluate(self, data: Any) -> EvaluatorResult:
                 matched=False,
                 confidence=1.0,
                 message="No data to score with Luna",
-                metadata={"metric": self.config.metric},
+                metadata={"scorer_label": self.config.scorer_label},
             )
 
         try:
             response = await self._get_client().invoke(
-                metric=self.config.metric,
+                scorer_label=self.config.scorer_label,
                 input=input_text if _has_text(input_text) else None,
                 output=output_text if _has_text(output_text) else None,
                 project_id=self.config.project_id,
@@ -227,7 +227,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult:
 
     def _metadata(self, response: ScorerInvokeResponse) -> dict[str, Any]:
         metadata: dict[str, Any] = {
-            "metric": response.scorer_label or self.config.metric,
+            "scorer_label": response.scorer_label or self.config.scorer_label,
             "project_id": str(self.config.project_id) if self.config.project_id else None,
             "score": response.score,
             "threshold": self.config.threshold,
@@ -251,7 +251,7 @@ def _handle_error(self, error: Exception) -> EvaluatorResult:
             metadata={
                 "error": error_detail,
                 "error_type": type(error).__name__,
-                "metric": self.config.metric,
+                "scorer_label": self.config.scorer_label,
                 "fallback_action": fallback,
             },
             error=None if matched else error_detail,
diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
index de9da5af..31323a42 100644
--- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py
+++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
@@ -27,7 +27,7 @@ def test_config_accepts_direct_scorer_fields(self) -> None:
 
         # Given: a direct scorer config with local thresholding
         config = LunaEvaluatorConfig(
-            metric="toxicity",
+            scorer_label="toxicity",
             project_id="12345678-1234-5678-1234-567812345678",
             threshold=0.7,
             operator="gte",
@@ -35,7 +35,7 @@ def test_config_accepts_direct_scorer_fields(self) -> None:
         )
 
         # Then: config is retained without Protect concepts
-        assert config.metric == "toxicity"
+        assert config.scorer_label == "toxicity"
         assert str(config.project_id) == "12345678-1234-5678-1234-567812345678"
         assert config.threshold == 0.7
         assert config.operator == "gte"
@@ -46,7 +46,7 @@ def test_numeric_operator_requires_numeric_threshold(self) -> None:
 
         # Given/When/Then: numeric local comparison rejects non-numeric thresholds
         with pytest.raises(ValidationError, match="numeric threshold"):
-            LunaEvaluatorConfig(metric="toxicity", threshold="high", operator="gte")
+            LunaEvaluatorConfig(scorer_label="toxicity", threshold="high", operator="gte")
 
 
 class TestGalileoLunaClient:
@@ -65,7 +65,7 @@ def test_scorer_invoke_request_matches_orbit_schema_shape(self) -> None:
 
         # Then: the serialized payload uses the Orbit scorer invoke fields
         assert request.to_dict() == {
-            "step_type": "span",
+            "root_type": "span",
             "input": {"messages": [{"role": "user", "content": "hello"}]},
             "scorer_label": "toxicity",
             "project_id": "12345678-1234-5678-1234-567812345678",
@@ -102,21 +102,8 @@ def test_scorer_invoke_response_matches_orbit_schema_shape(self) -> None:
             "error_message": None,
         }
         assert response.scorer_label == "toxicity"
-        assert response.metric == "toxicity"
         assert response.raw_response["scorer_label"] == "toxicity"
 
-    def test_scorer_invoke_response_accepts_legacy_metric_field(self) -> None:
-        from agent_control_evaluator_galileo.luna import ScorerInvokeResponse
-
-        # Given/When: an older API response uses metric instead of scorer_label
-        response = ScorerInvokeResponse.from_dict(
-            {"metric": "toxicity", "score": 0.82, "status": "success"}
-        )
-
-        # Then: the client still normalizes it to the current response contract
-        assert response.scorer_label == "toxicity"
-        assert response.model_dump()["scorer_label"] == "toxicity"
-
     def test_client_uses_protect_api_url_derivation(self) -> None:
         from agent_control_evaluator_galileo.luna import GalileoLunaClient
 
@@ -187,7 +174,7 @@ def handler(request: httpx.Request) -> httpx.Response:
         try:
             # When: invoking a scorer
             response = await client.invoke(
-                metric="toxicity",
+                scorer_label="toxicity",
                 input="user prompt",
                 output="model answer",
                 project_id="12345678-1234-5678-1234-567812345678",
@@ -204,7 +191,7 @@ def handler(request: httpx.Request) -> httpx.Response:
             "output": "model answer",
             "scorer_label": "toxicity",
             "project_id": "12345678-1234-5678-1234-567812345678",
-            "step_type": "span",
+            "root_type": "span",
             "config": {"top_k": 1},
         }
         assert "stage_name" not in captured["body"]
@@ -241,7 +228,7 @@ def handler(request: httpx.Request) -> httpx.Response:
         try:
             # When: invoking a scorer with project context
             response = await client.invoke(
-                metric="toxicity",
+                scorer_label="toxicity",
                 output="model answer",
                 project_id="12345678-1234-5678-1234-567812345678",
             )
@@ -255,7 +242,7 @@ def handler(request: httpx.Request) -> httpx.Response:
             "output": "model answer",
             "scorer_label": "toxicity",
             "project_id": "12345678-1234-5678-1234-567812345678",
-            "step_type": "span",
+            "root_type": "span",
         }
         headers = captured["headers"]
         assert isinstance(headers, dict)
@@ -278,7 +265,7 @@ async def test_client_requires_project_id_for_internal_jwt(self) -> None:
 
         # When/Then: project_id is required because API uses it as the internal auth context
         with pytest.raises(ValueError, match="project_id is required"):
-            await client.invoke(metric="toxicity", output="model answer")
+            await client.invoke(scorer_label="toxicity", output="model answer")
 
 
 class TestLunaEvaluator:
@@ -296,7 +283,7 @@ def test_evaluator_init_without_auth_raises(self) -> None:
         from agent_control_evaluator_galileo.luna import LunaEvaluator
 
         with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY or GALILEO_API_KEY"):
-            LunaEvaluator.from_dict({"metric": "toxicity", "threshold": 0.5})
+            LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5})
 
     @patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True)
     def test_evaluator_init_accepts_api_secret(self) -> None:
@@ -304,7 +291,7 @@ def test_evaluator_init_accepts_api_secret(self) -> None:
 
         evaluator = LunaEvaluator.from_dict(
             {
-                "metric": "toxicity",
+                "scorer_label": "toxicity",
                 "project_id": "12345678-1234-5678-1234-567812345678",
                 "threshold": 0.5,
             }
@@ -321,7 +308,7 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None:
         # Given: a direct Luna evaluator and a raw successful scorer response
         evaluator = LunaEvaluator.from_dict(
             {
-                "metric": "toxicity",
+                "scorer_label": "toxicity",
                 "project_id": "12345678-1234-5678-1234-567812345678",
                 "threshold": 0.7,
                 "operator": "gte",
@@ -350,7 +337,7 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None:
         assert result.matched is True
         assert result.confidence == 0.82
         assert result.metadata == {
-            "metric": "toxicity",
+            "scorer_label": "toxicity",
             "project_id": "12345678-1234-5678-1234-567812345678",
             "score": 0.82,
             "threshold": 0.7,
@@ -360,7 +347,7 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None:
             "error_message": None,
         }
         mock_invoke.assert_awaited_once_with(
-            metric="toxicity",
+            scorer_label="toxicity",
             input="user prompt",
             output="model answer",
             project_id=evaluator.config.project_id,
@@ -376,7 +363,7 @@ async def test_evaluator_returns_non_match_below_threshold(self) -> None:
 
         # Given: a raw scorer value below the local threshold
         evaluator = LunaEvaluator.from_dict(
-            {"metric": "toxicity", "threshold": 0.7, "operator": "gte"}
+            {"scorer_label": "toxicity", "threshold": 0.7, "operator": "gte"}
         )
 
         with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
@@ -393,7 +380,7 @@ async def test_evaluator_returns_non_match_below_threshold(self) -> None:
         assert result.matched is False
         assert result.confidence == 0.2
         mock_invoke.assert_awaited_once_with(
-            metric="toxicity",
+            scorer_label="toxicity",
             input="hello",
             output=None,
             project_id=None,
@@ -408,7 +395,7 @@ async def test_evaluator_does_not_call_api_for_empty_data(self) -> None:
         from agent_control_evaluator_galileo.luna.client import GalileoLunaClient
 
         # Given: an evaluator and empty selected data
-        evaluator = LunaEvaluator.from_dict({"metric": "toxicity", "threshold": 0.5})
+        evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5})
 
         with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
             # When: evaluating empty data
@@ -427,7 +414,7 @@ async def test_evaluator_fail_open_sets_error(self) -> None:
         from agent_control_evaluator_galileo.luna.client import GalileoLunaClient
 
         # Given: default fail-open behavior
-        evaluator = LunaEvaluator.from_dict({"metric": "toxicity", "threshold": 0.5})
+        evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5})
 
         with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
             mock_invoke.side_effect = RuntimeError("service unavailable")
@@ -449,7 +436,7 @@ async def test_evaluator_fail_closed_matches_without_error_field(self) -> None:
 
         # Given: fail-closed behavior for scorer errors
         evaluator = LunaEvaluator.from_dict(
-            {"metric": "toxicity", "threshold": 0.5, "on_error": "deny"}
+            {"scorer_label": "toxicity", "threshold": 0.5, "on_error": "deny"}
         )
 
         with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
diff --git a/examples/galileo_luna/README.md b/examples/galileo_luna/README.md
index d43a2d71..534ef640 100644
--- a/examples/galileo_luna/README.md
+++ b/examples/galileo_luna/README.md
@@ -33,7 +33,7 @@ export GALILEO_PROJECT_ID="00000000-0000-0000-0000-000000000000"
 Optional scorer settings:
 
 ```bash
-export GALILEO_LUNA_METRIC="toxicity"
+export GALILEO_LUNA_SCORER_LABEL="toxicity"
 export GALILEO_LUNA_THRESHOLD="0.5"
 ```
 
diff --git a/examples/galileo_luna/setup_controls.py b/examples/galileo_luna/setup_controls.py
index 3d325cde..69a36ad5 100644
--- a/examples/galileo_luna/setup_controls.py
+++ b/examples/galileo_luna/setup_controls.py
@@ -23,7 +23,7 @@
 AGENT_DESCRIPTION = "Demo agent protected by direct Galileo Luna scorer controls"
 SERVER_URL = os.getenv("AGENT_CONTROL_URL", "http://localhost:8000")
 
-LUNA_METRIC = os.getenv("GALILEO_LUNA_METRIC", "toxicity")
+LUNA_SCORER_LABEL = os.getenv("GALILEO_LUNA_SCORER_LABEL", "toxicity")
 LUNA_THRESHOLD = float(os.getenv("GALILEO_LUNA_THRESHOLD", "0.5"))
 GALILEO_PROJECT_ID = os.getenv("GALILEO_PROJECT_ID")
 
@@ -41,7 +41,7 @@
 def luna_config() -> dict[str, Any]:
     """Build the direct Luna evaluator config used by the composite control."""
     config: dict[str, Any] = {
-        "metric": LUNA_METRIC,
+        "scorer_label": LUNA_SCORER_LABEL,
         "threshold": LUNA_THRESHOLD,
         "operator": "gte",
         "payload_field": "output",
@@ -158,7 +158,7 @@ async def setup_demo() -> None:
     print("Setting up direct Galileo Luna demo controls")
     print(f"Server: {SERVER_URL}")
     print(f"Agent:  {AGENT_NAME}")
-    print(f"Luna:   metric={LUNA_METRIC!r}, threshold={LUNA_THRESHOLD}")
+    print(f"Luna:   scorer_label={LUNA_SCORER_LABEL!r}, threshold={LUNA_THRESHOLD}")
     if GALILEO_PROJECT_ID:
         print(f"Project ID: {GALILEO_PROJECT_ID}")
 

From 523524d07fb9837fa574106fe6346a07f25e25be Mon Sep 17 00:00:00 2001
From: "namrata.ghadi" <namrata.ghadi@galileo.ai>
Date: Wed, 13 May 2026 17:37:14 -0700
Subject: [PATCH 07/11] update the schemas for scorer

---
 .../luna/__init__.py                          |  2 +
 .../luna/client.py                            | 33 ++++++++--------
 .../galileo/tests/test_luna_evaluator.py      | 37 +++++++++---------
 .../src/agent_control/evaluators/__init__.py  | 38 +++++++++++--------
 4 files changed, 62 insertions(+), 48 deletions(-)

diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py
index c3ff0375..b26feaac 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py
@@ -2,6 +2,7 @@
 
 from agent_control_evaluator_galileo.luna.client import (
     GalileoLunaClient,
+    ScorerInvokeInputs,
     ScorerInvokeRequest,
     ScorerInvokeResponse,
 )
@@ -10,6 +11,7 @@
 
 __all__ = [
     "GalileoLunaClient",
+    "ScorerInvokeInputs",
     "ScorerInvokeRequest",
     "ScorerInvokeResponse",
     "LunaEvaluatorConfig",
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
index 426b1782..a2ccdc3f 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
@@ -9,7 +9,6 @@
 from hmac import new as hmac_new
 from json import dumps
 from time import time
-from typing import Literal
 from uuid import UUID
 
 import httpx
@@ -66,32 +65,38 @@ def _as_float_or_none(value: JSONValue) -> float | None:
     return None
 
 
-RootType = Literal["session", "trace", "span"]
+def _has_value(value: JSONValue) -> bool:
+    return value is not None and value != ""
+
+
+class ScorerInvokeInputs(BaseModel):
+    """Input values sent to Galileo's scorer invoke API."""
+
+    query: JSONValue = ""
+    response: JSONValue = ""
+    ground_truth: JSONValue = None
+    tools: JSONValue = None
 
 
 class ScorerInvokeRequest(BaseModel):
     """Request payload for Galileo Luna scorer invocation.
 
     Attributes:
-        root_type: Runtime step shape used by Galileo scorer input normalization.
-        input: Optional user/system prompt text.
-        output: Optional model response text.
+        inputs: Selected scorer input values.
         scorer_label: Preset, registered, or fine-tuned scorer label.
         project_id: Optional Galileo project UUID for project-scoped scorer resolution.
         config: Optional scorer-specific configuration.
     """
 
-    root_type: RootType = Field(default="span")
-    input: JSONValue = None
-    output: JSONValue = None
     scorer_label: str = Field(min_length=1)
+    inputs: ScorerInvokeInputs
     project_id: str | UUID | None = None
     config: JSONObject | None = None
 
     @model_validator(mode="after")
     def ensure_input_or_output(self) -> ScorerInvokeRequest:
-        if self.input is None and self.output is None:
-            raise ValueError("Either input or output must be set.")
+        if not (_has_value(self.inputs.query) or _has_value(self.inputs.response)):
+            raise ValueError("Either inputs.query or inputs.response must be set.")
         return self
 
     def to_dict(self) -> JSONObject:
@@ -234,7 +239,6 @@ async def invoke(
         scorer_label: str,
         input: JSONValue = None,
         output: JSONValue = None,
-        root_type: RootType = "span",
         project_id: str | UUID | None = None,
         config: JSONObject | None = None,
         timeout: float = DEFAULT_TIMEOUT_SECS,
@@ -246,7 +250,6 @@ async def invoke(
             scorer_label: Preset, registered, or fine-tuned scorer label.
             input: Optional user/system prompt text.
             output: Optional model response text.
-            root_type: Runtime step shape used by Galileo scorer input normalization.
             project_id: Optional Galileo project UUID for project-scoped scorer resolution.
             config: Optional scorer-specific configuration.
             timeout: Request timeout in seconds.
@@ -266,9 +269,9 @@ async def invoke(
 
         request_body = ScorerInvokeRequest(
             scorer_label=scorer_label,
-            input=input,
-            output=output,
-            root_type=root_type,
+            inputs=ScorerInvokeInputs(
+                query="" if input is None else input, response="" if output is None else output
+            ),
             project_id=project_id,
             config=config,
         ).to_dict()
diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
index 31323a42..9f4ae862 100644
--- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py
+++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
@@ -52,22 +52,24 @@ def test_numeric_operator_requires_numeric_threshold(self) -> None:
 class TestGalileoLunaClient:
     """Tests for the GalileoLunaClient HTTP contract."""
 
-    def test_scorer_invoke_request_matches_orbit_schema_shape(self) -> None:
-        from agent_control_evaluator_galileo.luna import ScorerInvokeRequest
+    def test_scorer_invoke_request_matches_api_schema_shape(self) -> None:
+        from agent_control_evaluator_galileo.luna import ScorerInvokeInputs, ScorerInvokeRequest
 
         # Given: a scorer request with project context and scorer config
         request = ScorerInvokeRequest(
             scorer_label="toxicity",
-            input={"messages": [{"role": "user", "content": "hello"}]},
+            inputs=ScorerInvokeInputs(query={"messages": [{"role": "user", "content": "hello"}]}),
             project_id="12345678-1234-5678-1234-567812345678",
             config={"top_k": 1},
         )
 
-        # Then: the serialized payload uses the Orbit scorer invoke fields
+        # Then: the serialized payload uses the API-owned scorer invoke fields
         assert request.to_dict() == {
-            "root_type": "span",
-            "input": {"messages": [{"role": "user", "content": "hello"}]},
             "scorer_label": "toxicity",
+            "inputs": {
+                "query": {"messages": [{"role": "user", "content": "hello"}]},
+                "response": "",
+            },
             "project_id": "12345678-1234-5678-1234-567812345678",
             "config": {"top_k": 1},
         }
@@ -75,11 +77,13 @@ def test_scorer_invoke_request_matches_orbit_schema_shape(self) -> None:
     def test_scorer_invoke_request_requires_input_or_output(self) -> None:
         from agent_control_evaluator_galileo.luna import ScorerInvokeRequest
 
-        # Given/When/Then: the request mirrors Orbit validation
-        with pytest.raises(ValidationError, match="Either input or output must be set"):
-            ScorerInvokeRequest(scorer_label="toxicity")
+        # Given/When/Then: the request mirrors API validation
+        with pytest.raises(
+            ValidationError, match="Either inputs.query or inputs.response must be set"
+        ):
+            ScorerInvokeRequest(scorer_label="toxicity", inputs={})
 
-    def test_scorer_invoke_response_matches_orbit_schema_shape(self) -> None:
+    def test_scorer_invoke_response_matches_api_schema_shape(self) -> None:
         from agent_control_evaluator_galileo.luna import ScorerInvokeResponse
 
         # Given: an API scorer invoke response
@@ -93,7 +97,7 @@ def test_scorer_invoke_response_matches_orbit_schema_shape(self) -> None:
             }
         )
 
-        # Then: the model exposes the Orbit/API response fields
+        # Then: the model exposes the API response fields
         assert response.model_dump() == {
             "scorer_label": "toxicity",
             "score": 0.82,
@@ -187,11 +191,9 @@ def handler(request: httpx.Request) -> httpx.Response:
         assert response.score == 0.82
         assert captured["url"] == "https://api.demo-v2.galileocloud.io/scorers/invoke"
         assert captured["body"] == {
-            "input": "user prompt",
-            "output": "model answer",
             "scorer_label": "toxicity",
+            "inputs": {"query": "user prompt", "response": "model answer"},
             "project_id": "12345678-1234-5678-1234-567812345678",
-            "root_type": "span",
             "config": {"top_k": 1},
         }
         assert "stage_name" not in captured["body"]
@@ -237,12 +239,13 @@ def handler(request: httpx.Request) -> httpx.Response:
 
         # Then: the internal scorer endpoint is called with a project-bound JWT
         assert response.score == 0.82
-        assert captured["url"] == "https://api.default.svc.cluster.local:8088/internal/scorers/invoke"
+        assert (
+            captured["url"] == "https://api.default.svc.cluster.local:8088/internal/scorers/invoke"
+        )
         assert captured["body"] == {
-            "output": "model answer",
             "scorer_label": "toxicity",
+            "inputs": {"query": "", "response": "model answer"},
             "project_id": "12345678-1234-5678-1234-567812345678",
-            "root_type": "span",
         }
         headers = captured["headers"]
         assert isinstance(headers, dict)
diff --git a/sdks/python/src/agent_control/evaluators/__init__.py b/sdks/python/src/agent_control/evaluators/__init__.py
index 9fd87e71..8366a107 100644
--- a/sdks/python/src/agent_control/evaluators/__init__.py
+++ b/sdks/python/src/agent_control/evaluators/__init__.py
@@ -44,19 +44,23 @@
         LunaEvaluator,
         LunaEvaluatorConfig,
         LunaOperator,
+        ScorerInvokeInputs,
         ScorerInvokeRequest,
         ScorerInvokeResponse,
     )
 
-    __all__.extend([
-        "GalileoLunaClient",
-        "ScorerInvokeRequest",
-        "ScorerInvokeResponse",
-        "LunaEvaluator",
-        "LunaEvaluatorConfig",
-        "LunaOperator",
-        "LUNA_AVAILABLE",
-    ])
+    __all__.extend(
+        [
+            "GalileoLunaClient",
+            "ScorerInvokeInputs",
+            "ScorerInvokeRequest",
+            "ScorerInvokeResponse",
+            "LunaEvaluator",
+            "LunaEvaluatorConfig",
+            "LunaOperator",
+            "LUNA_AVAILABLE",
+        ]
+    )
 except ImportError:
     pass
 
@@ -69,12 +73,14 @@
         Luna2Operator,
     )
 
-    __all__.extend([
-        "Luna2Evaluator",
-        "Luna2EvaluatorConfig",
-        "Luna2Metric",
-        "Luna2Operator",
-        "LUNA2_AVAILABLE",
-    ])
+    __all__.extend(
+        [
+            "Luna2Evaluator",
+            "Luna2EvaluatorConfig",
+            "Luna2Metric",
+            "Luna2Operator",
+            "LUNA2_AVAILABLE",
+        ]
+    )
 except ImportError:
     pass

From 34f430df0b8934a670286ea4c9712254fd35e748 Mon Sep 17 00:00:00 2001
From: "namrata.ghadi" <namrata.ghadi@galileo.ai>
Date: Wed, 13 May 2026 21:56:33 -0700
Subject: [PATCH 08/11] update luna client schemas

---
 .../luna/client.py                            | 10 +++++++--
 .../galileo/tests/test_luna_evaluator.py      | 21 +++++++++++++++++--
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
index a2ccdc3f..86033339 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
@@ -66,7 +66,13 @@ def _as_float_or_none(value: JSONValue) -> float | None:
 
 
 def _has_value(value: JSONValue) -> bool:
-    return value is not None and value != ""
+    if value is None:
+        return False
+    if isinstance(value, str):
+        return value.strip() != ""
+    if isinstance(value, (list, dict)):
+        return len(value) > 0
+    return True
 
 
 class ScorerInvokeInputs(BaseModel):
@@ -264,7 +270,7 @@ async def invoke(
             httpx.HTTPStatusError: If the API returns an error status code.
             httpx.RequestError: If the request fails before a response is received.
         """
-        if input is None and output is None:
+        if not (_has_value(input) or _has_value(output)):
             raise ValueError("At least one of input or output must be provided.")
 
         request_body = ScorerInvokeRequest(
diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
index 9f4ae862..80a5e00b 100644
--- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py
+++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
@@ -74,14 +74,18 @@ def test_scorer_invoke_request_matches_api_schema_shape(self) -> None:
             "config": {"top_k": 1},
         }
 
-    def test_scorer_invoke_request_requires_input_or_output(self) -> None:
+    @pytest.mark.parametrize("empty_value", ["", " ", {}, []])
+    def test_scorer_invoke_request_requires_input_or_output(self, empty_value: object) -> None:
         from agent_control_evaluator_galileo.luna import ScorerInvokeRequest
 
         # Given/When/Then: the request mirrors API validation
         with pytest.raises(
             ValidationError, match="Either inputs.query or inputs.response must be set"
         ):
-            ScorerInvokeRequest(scorer_label="toxicity", inputs={})
+            ScorerInvokeRequest(
+                scorer_label="toxicity",
+                inputs={"query": empty_value, "response": empty_value},
+            )
 
     def test_scorer_invoke_response_matches_api_schema_shape(self) -> None:
         from agent_control_evaluator_galileo.luna import ScorerInvokeResponse
@@ -270,6 +274,19 @@ async def test_client_requires_project_id_for_internal_jwt(self) -> None:
         with pytest.raises(ValueError, match="project_id is required"):
             await client.invoke(scorer_label="toxicity", output="model answer")
 
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize("empty_value", ["", " ", {}, []])
+    async def test_client_rejects_missing_input_and_output_values(self, empty_value: object) -> None:
+        from agent_control_evaluator_galileo.luna import GalileoLunaClient
+
+        # Given: a Luna client and scorer input values that API treats as missing
+        with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=True):
+            client = GalileoLunaClient(api_url="https://api.default.svc.cluster.local:8088")
+
+        # When/Then: the client rejects the request before calling API
+        with pytest.raises(ValueError, match="At least one of input or output must be provided"):
+            await client.invoke(scorer_label="toxicity", input=empty_value, output=empty_value)
+
 
 class TestLunaEvaluator:
     """Tests for direct Luna evaluator behavior."""

From ad0b2dc98b30fcaffe0c5897cfee08e96de83e03 Mon Sep 17 00:00:00 2001
From: "namrata.ghadi" <namrata.ghadi@galileo.ai>
Date: Wed, 13 May 2026 21:59:37 -0700
Subject: [PATCH 09/11] fix tests

---
 evaluators/contrib/galileo/tests/test_luna_evaluator.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
index 80a5e00b..5cf1fcf8 100644
--- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py
+++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
@@ -276,7 +276,9 @@ async def test_client_requires_project_id_for_internal_jwt(self) -> None:
 
     @pytest.mark.asyncio
     @pytest.mark.parametrize("empty_value", ["", " ", {}, []])
-    async def test_client_rejects_missing_input_and_output_values(self, empty_value: object) -> None:
+    async def test_client_rejects_missing_input_and_output_values(
+        self, empty_value: object
+    ) -> None:
         from agent_control_evaluator_galileo.luna import GalileoLunaClient
 
         # Given: a Luna client and scorer input values that API treats as missing

From 81cea0471518dd22e0964889412155d5122881de Mon Sep 17 00:00:00 2001
From: "namrata.ghadi" <namrata.ghadi@galileo.ai>
Date: Thu, 14 May 2026 15:11:17 -0700
Subject: [PATCH 10/11] remove unwanted fields

---
 .../luna/config.py                            | 16 -----------
 .../luna/evaluator.py                         | 15 ++---------
 .../galileo/tests/test_luna_evaluator.py      | 27 ++-----------------
 3 files changed, 4 insertions(+), 54 deletions(-)

diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
index 1e41a554..7bf5de48 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
@@ -38,10 +38,6 @@ class LunaEvaluatorConfig(EvaluatorConfig):
         operator: Local comparison operator. Numeric operators use threshold as a number.
         scorer_config: Optional scorer-specific config sent as ``config``.
         timeout_ms: Request timeout in milliseconds.
-        on_error: Error policy: allow=fail open, deny=fail closed.
-        payload_field: Force selected data into input or output. If omitted, root step
-            payloads with input/output use both fields; scalar data is inferred from scorer label.
-        include_raw_response: Include the raw API response in EvaluatorResult metadata.
     """
 
     scorer_label: str = Field(..., min_length=1, description="Luna scorer label to invoke")
@@ -69,18 +65,6 @@ class LunaEvaluatorConfig(EvaluatorConfig):
         le=60000,
         description="Request timeout in milliseconds (1-60 seconds)",
     )
-    on_error: Literal["allow", "deny"] = Field(
-        default="allow",
-        description="Action on error: 'allow' (fail open) or 'deny' (fail closed)",
-    )
-    payload_field: Literal["input", "output"] | None = Field(
-        default=None,
-        description="Explicitly set which scorer payload field receives scalar selected data.",
-    )
-    include_raw_response: bool = Field(
-        default=False,
-        description="Include the raw scorer response in result metadata.",
-    )
 
     @model_validator(mode="after")
     def validate_threshold(self) -> LunaEvaluatorConfig:
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
index a5b3f248..f9e0ad0d 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
@@ -126,12 +126,6 @@ def _get_client(self) -> GalileoLunaClient:
 
     def _prepare_payload(self, data: Any) -> tuple[str | None, str | None]:
         """Prepare scorer input/output fields from selected data."""
-        if self.config.payload_field is not None:
-            text = _coerce_payload_text(data)
-            if self.config.payload_field == "output":
-                return None, text
-            return text, None
-
         if isinstance(data, dict):
             input_text = _extract_dict_text(data, "input")
             output_text = _extract_dict_text(data, "output")
@@ -236,25 +230,20 @@ def _metadata(self, response: ScorerInvokeResponse) -> dict[str, Any]:
             "execution_time_seconds": response.execution_time,
             "error_message": response.error_message,
         }
-        if self.config.include_raw_response:
-            metadata["raw_response"] = response.raw_response
         return metadata
 
     def _handle_error(self, error: Exception) -> EvaluatorResult:
-        fallback = self.config.on_error
-        matched = fallback == "deny"
         error_detail = str(error)
         return EvaluatorResult(
-            matched=matched,
+            matched=False,
             confidence=0.0,
             message=f"Luna evaluation error: {error_detail}",
             metadata={
                 "error": error_detail,
                 "error_type": type(error).__name__,
                 "scorer_label": self.config.scorer_label,
-                "fallback_action": fallback,
             },
-            error=None if matched else error_detail,
+            error=error_detail,
         )
 
     async def aclose(self) -> None:
diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
index 5cf1fcf8..1b0bcef8 100644
--- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py
+++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
@@ -435,7 +435,7 @@ async def test_evaluator_fail_open_sets_error(self) -> None:
         from agent_control_evaluator_galileo.luna import LunaEvaluator
         from agent_control_evaluator_galileo.luna.client import GalileoLunaClient
 
-        # Given: default fail-open behavior
+        # Given: fixed fail-open behavior for scorer errors
         evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5})
 
         with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
@@ -448,27 +448,4 @@ async def test_evaluator_fail_open_sets_error(self) -> None:
         assert result.matched is False
         assert result.error == "service unavailable"
         assert result.metadata is not None
-        assert result.metadata["fallback_action"] == "allow"
-
-    @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
-    @pytest.mark.asyncio
-    async def test_evaluator_fail_closed_matches_without_error_field(self) -> None:
-        from agent_control_evaluator_galileo.luna import LunaEvaluator
-        from agent_control_evaluator_galileo.luna.client import GalileoLunaClient
-
-        # Given: fail-closed behavior for scorer errors
-        evaluator = LunaEvaluator.from_dict(
-            {"scorer_label": "toxicity", "threshold": 0.5, "on_error": "deny"}
-        )
-
-        with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke:
-            mock_invoke.side_effect = RuntimeError("service unavailable")
-
-            # When: the scorer call fails
-            result = await evaluator.evaluate("hello")
-
-        # Then: the control matches so deny/steer actions can be applied by the engine
-        assert result.matched is True
-        assert result.error is None
-        assert result.metadata is not None
-        assert result.metadata["fallback_action"] == "deny"
+        assert "fallback_action" not in result.metadata

From f3cf8f72609c599833542c75a0b0c408255e789c Mon Sep 17 00:00:00 2001
From: "namrata.ghadi" <namrata.ghadi@galileo.ai>
Date: Thu, 14 May 2026 16:41:04 -0700
Subject: [PATCH 11/11] remove project_id from evaluator config

---
 .../luna/client.py                            | 20 +------
 .../luna/config.py                            |  6 --
 .../luna/evaluator.py                         |  2 -
 .../galileo/tests/test_luna_evaluator.py      | 57 ++++++++++---------
 4 files changed, 33 insertions(+), 52 deletions(-)

diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
index 86033339..caca997e 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py
@@ -9,7 +9,6 @@
 from hmac import new as hmac_new
 from json import dumps
 from time import time
-from uuid import UUID
 
 import httpx
 from agent_control_models import JSONObject, JSONValue
@@ -29,7 +28,6 @@ def _b64url(data: bytes) -> str:
 
 def _internal_auth_token(
     api_secret: str,
-    project_id: str | UUID,
     ttl_seconds: int = DEFAULT_INTERNAL_TOKEN_TTL_SECS,
 ) -> str:
     """Create the internal JWT expected by Galileo API internal routes."""
@@ -37,7 +35,6 @@ def _internal_auth_token(
     header = {"alg": "HS256", "typ": "JWT"}
     payload = {
         "internal": True,
-        "project_id": str(project_id),
         "scope": "scorers.invoke",
         "iat": now,
         "exp": now + ttl_seconds,
@@ -90,13 +87,11 @@ class ScorerInvokeRequest(BaseModel):
     Attributes:
         inputs: Selected scorer input values.
         scorer_label: Preset, registered, or fine-tuned scorer label.
-        project_id: Optional Galileo project UUID for project-scoped scorer resolution.
         config: Optional scorer-specific configuration.
     """
 
     scorer_label: str = Field(min_length=1)
     inputs: ScorerInvokeInputs
-    project_id: str | UUID | None = None
     config: JSONObject | None = None
 
     @model_validator(mode="after")
@@ -222,21 +217,13 @@ async def _get_client(self) -> httpx.AsyncClient:
 
     def _endpoint_and_headers(
         self,
-        project_id: str | UUID | None,
         headers: dict[str, str] | None,
     ) -> tuple[str, dict[str, str]]:
         request_headers = dict(headers or {})
         if self.api_secret is None:
             return f"{self.api_base}{PUBLIC_SCORER_INVOKE_PATH}", request_headers
 
-        if project_id is None:
-            raise ValueError(
-                "project_id is required when using GALILEO_API_SECRET_KEY internal auth."
-            )
-
-        request_headers["Authorization"] = (
-            f"Bearer {_internal_auth_token(self.api_secret, project_id)}"
-        )
+        request_headers["Authorization"] = f"Bearer {_internal_auth_token(self.api_secret)}"
         return f"{self.api_base}{INTERNAL_SCORER_INVOKE_PATH}", request_headers
 
     async def invoke(
@@ -245,7 +232,6 @@ async def invoke(
         scorer_label: str,
         input: JSONValue = None,
         output: JSONValue = None,
-        project_id: str | UUID | None = None,
         config: JSONObject | None = None,
         timeout: float = DEFAULT_TIMEOUT_SECS,
         headers: dict[str, str] | None = None,
@@ -256,7 +242,6 @@ async def invoke(
             scorer_label: Preset, registered, or fine-tuned scorer label.
             input: Optional user/system prompt text.
             output: Optional model response text.
-            project_id: Optional Galileo project UUID for project-scoped scorer resolution.
             config: Optional scorer-specific configuration.
             timeout: Request timeout in seconds.
             headers: Additional request headers.
@@ -278,10 +263,9 @@ async def invoke(
             inputs=ScorerInvokeInputs(
                 query="" if input is None else input, response="" if output is None else output
             ),
-            project_id=project_id,
             config=config,
         ).to_dict()
-        endpoint, request_headers = self._endpoint_and_headers(project_id, headers)
+        endpoint, request_headers = self._endpoint_and_headers(headers)
 
         logger.debug("[GalileoLunaClient] POST %s", endpoint)
         logger.debug("[GalileoLunaClient] Request body: %s", request_body)
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
index 7bf5de48..0f0d86d5 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 from typing import Literal
-from uuid import UUID
 
 from agent_control_evaluators import EvaluatorConfig
 from agent_control_models import JSONObject, JSONValue
@@ -33,7 +32,6 @@ class LunaEvaluatorConfig(EvaluatorConfig):
 
     Attributes:
         scorer_label: Preset, registered, or fine-tuned scorer label.
-        project_id: Optional Galileo project UUID for project-scoped scorer resolution.
         threshold: Local threshold used by the evaluator for comparison.
         operator: Local comparison operator. Numeric operators use threshold as a number.
         scorer_config: Optional scorer-specific config sent as ``config``.
@@ -41,10 +39,6 @@ class LunaEvaluatorConfig(EvaluatorConfig):
     """
 
     scorer_label: str = Field(..., min_length=1, description="Luna scorer label to invoke")
-    project_id: UUID | None = Field(
-        default=None,
-        description="Optional Galileo project UUID for project-scoped scorer resolution.",
-    )
     threshold: JSONValue = Field(
         default=0.5,
         description="Local threshold used to decide whether the control matches.",
diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
index f9e0ad0d..15798074 100644
--- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
+++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py
@@ -192,7 +192,6 @@ async def evaluate(self, data: Any) -> EvaluatorResult:
                 scorer_label=self.config.scorer_label,
                 input=input_text if _has_text(input_text) else None,
                 output=output_text if _has_text(output_text) else None,
-                project_id=self.config.project_id,
                 config=self.config.scorer_config,
                 timeout=self.get_timeout_seconds(),
             )
@@ -222,7 +221,6 @@ async def evaluate(self, data: Any) -> EvaluatorResult:
     def _metadata(self, response: ScorerInvokeResponse) -> dict[str, Any]:
         metadata: dict[str, Any] = {
             "scorer_label": response.scorer_label or self.config.scorer_label,
-            "project_id": str(self.config.project_id) if self.config.project_id else None,
             "score": response.score,
             "threshold": self.config.threshold,
             "operator": self.config.operator,
diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
index 1b0bcef8..4e1f45b8 100644
--- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py
+++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py
@@ -28,7 +28,6 @@ def test_config_accepts_direct_scorer_fields(self) -> None:
         # Given: a direct scorer config with local thresholding
         config = LunaEvaluatorConfig(
             scorer_label="toxicity",
-            project_id="12345678-1234-5678-1234-567812345678",
             threshold=0.7,
             operator="gte",
             config={"temperature": 0},
@@ -36,7 +35,6 @@ def test_config_accepts_direct_scorer_fields(self) -> None:
 
         # Then: config is retained without Protect concepts
         assert config.scorer_label == "toxicity"
-        assert str(config.project_id) == "12345678-1234-5678-1234-567812345678"
         assert config.threshold == 0.7
         assert config.operator == "gte"
         assert config.scorer_config == {"temperature": 0}
@@ -55,11 +53,10 @@ class TestGalileoLunaClient:
     def test_scorer_invoke_request_matches_api_schema_shape(self) -> None:
         from agent_control_evaluator_galileo.luna import ScorerInvokeInputs, ScorerInvokeRequest
 
-        # Given: a scorer request with project context and scorer config
+        # Given: a scorer request with scorer config
         request = ScorerInvokeRequest(
             scorer_label="toxicity",
             inputs=ScorerInvokeInputs(query={"messages": [{"role": "user", "content": "hello"}]}),
-            project_id="12345678-1234-5678-1234-567812345678",
             config={"top_k": 1},
         )
 
@@ -70,7 +67,6 @@ def test_scorer_invoke_request_matches_api_schema_shape(self) -> None:
                 "query": {"messages": [{"role": "user", "content": "hello"}]},
                 "response": "",
             },
-            "project_id": "12345678-1234-5678-1234-567812345678",
             "config": {"top_k": 1},
         }
 
@@ -185,7 +181,6 @@ def handler(request: httpx.Request) -> httpx.Response:
                 scorer_label="toxicity",
                 input="user prompt",
                 output="model answer",
-                project_id="12345678-1234-5678-1234-567812345678",
                 config={"top_k": 1},
             )
         finally:
@@ -197,7 +192,6 @@ def handler(request: httpx.Request) -> httpx.Response:
         assert captured["body"] == {
             "scorer_label": "toxicity",
             "inputs": {"query": "user prompt", "response": "model answer"},
-            "project_id": "12345678-1234-5678-1234-567812345678",
             "config": {"top_k": 1},
         }
         assert "stage_name" not in captured["body"]
@@ -232,16 +226,12 @@ def handler(request: httpx.Request) -> httpx.Response:
         client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler))
 
         try:
-            # When: invoking a scorer with project context
-            response = await client.invoke(
-                scorer_label="toxicity",
-                output="model answer",
-                project_id="12345678-1234-5678-1234-567812345678",
-            )
+            # When: invoking a scorer with internal JWT auth
+            response = await client.invoke(scorer_label="toxicity", output="model answer")
         finally:
             await client.close()
 
-        # Then: the internal scorer endpoint is called with a project-bound JWT
+        # Then: the internal scorer endpoint is called with an internal JWT
         assert response.score == 0.82
         assert (
             captured["url"] == "https://api.default.svc.cluster.local:8088/internal/scorers/invoke"
@@ -249,7 +239,6 @@ def handler(request: httpx.Request) -> httpx.Response:
         assert captured["body"] == {
             "scorer_label": "toxicity",
             "inputs": {"query": "", "response": "model answer"},
-            "project_id": "12345678-1234-5678-1234-567812345678",
         }
         headers = captured["headers"]
         assert isinstance(headers, dict)
@@ -259,20 +248,41 @@ def handler(request: httpx.Request) -> httpx.Response:
         assert auth_header.startswith("Bearer ")
         token_payload = _decode_jwt_payload(auth_header.removeprefix("Bearer "))
         assert token_payload["internal"] is True
-        assert token_payload["project_id"] == "12345678-1234-5678-1234-567812345678"
         assert token_payload["scope"] == "scorers.invoke"
 
     @pytest.mark.asyncio
-    async def test_client_requires_project_id_for_internal_jwt(self) -> None:
+    async def test_client_uses_internal_jwt_without_api_key(self) -> None:
         from agent_control_evaluator_galileo.luna import GalileoLunaClient
 
         # Given: a Luna client configured with internal JWT auth
         with patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True):
             client = GalileoLunaClient(api_url="https://api.default.svc.cluster.local:8088")
 
-        # When/Then: project_id is required because API uses it as the internal auth context
-        with pytest.raises(ValueError, match="project_id is required"):
-            await client.invoke(scorer_label="toxicity", output="model answer")
+        captured: dict[str, object] = {}
+
+        def handler(request: httpx.Request) -> httpx.Response:
+            captured["headers"] = dict(request.headers)
+            return httpx.Response(
+                200,
+                json={"scorer_label": "toxicity", "score": 0.82, "status": "success"},
+            )
+
+        client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler))
+        try:
+            # When: invoking without project context
+            response = await client.invoke(scorer_label="toxicity", output="model answer")
+        finally:
+            await client.close()
+
+        # Then: internal JWT auth still works
+        assert response.score == 0.82
+        headers = captured["headers"]
+        assert isinstance(headers, dict)
+        auth_header = headers["authorization"]
+        assert isinstance(auth_header, str)
+        token_payload = _decode_jwt_payload(auth_header.removeprefix("Bearer "))
+        assert token_payload["internal"] is True
+        assert token_payload["scope"] == "scorers.invoke"
 
     @pytest.mark.asyncio
     @pytest.mark.parametrize("empty_value", ["", " ", {}, []])
@@ -314,12 +324,11 @@ def test_evaluator_init_accepts_api_secret(self) -> None:
         evaluator = LunaEvaluator.from_dict(
             {
                 "scorer_label": "toxicity",
-                "project_id": "12345678-1234-5678-1234-567812345678",
                 "threshold": 0.5,
             }
         )
 
-        assert str(evaluator.config.project_id) == "12345678-1234-5678-1234-567812345678"
+        assert evaluator.config.scorer_label == "toxicity"
 
     @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"})
     @pytest.mark.asyncio
@@ -331,7 +340,6 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None:
         evaluator = LunaEvaluator.from_dict(
             {
                 "scorer_label": "toxicity",
-                "project_id": "12345678-1234-5678-1234-567812345678",
                 "threshold": 0.7,
                 "operator": "gte",
                 "timeout_ms": 5000,
@@ -360,7 +368,6 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None:
         assert result.confidence == 0.82
         assert result.metadata == {
             "scorer_label": "toxicity",
-            "project_id": "12345678-1234-5678-1234-567812345678",
             "score": 0.82,
             "threshold": 0.7,
             "operator": "gte",
@@ -372,7 +379,6 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None:
             scorer_label="toxicity",
             input="user prompt",
             output="model answer",
-            project_id=evaluator.config.project_id,
             config=None,
             timeout=5.0,
         )
@@ -405,7 +411,6 @@ async def test_evaluator_returns_non_match_below_threshold(self) -> None:
             scorer_label="toxicity",
             input="hello",
             output=None,
-            project_id=None,
             config=None,
             timeout=10.0,
         )