From 5c5ca692b80f334af94ed98bb8f08e0e9302858a Mon Sep 17 00:00:00 2001
From: Yun Kim <yun.kim@datadoghq.com>
Date: Wed, 11 Mar 2026 23:16:32 -0400
Subject: [PATCH 1/2] Update anthropic tests only for python new cache metrics

---
 .../llm/anthropic/test_anthropic_llmobs.py    | 76 +++++++------------
 1 file changed, 26 insertions(+), 50 deletions(-)

diff --git a/tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py b/tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py
index c74a58e9bb9..41655e07d5c 100644
--- a/tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py
+++ b/tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py
@@ -1,5 +1,5 @@
 from tests.integration_frameworks.llm.utils import assert_llmobs_span_event
-from utils import features, scenarios
+from utils import context, features, scenarios
 from utils.docker_fixtures import FrameworkTestClientApi, TestAgentAPI
 
 from .utils import BaseAnthropicTest
@@ -8,6 +8,24 @@
 from unittest import mock
 import json
 
+
+def _expected_llmobs_metrics():
+    """Return expected LLMObs token metrics for anthropic spans.
+
+    Ephemeral cache TTL metrics are only emitted by the Python tracer.
+    """
+    metrics = {
+        "input_tokens": mock.ANY,
+        "output_tokens": mock.ANY,
+        "total_tokens": mock.ANY,
+        "cache_read_input_tokens": mock.ANY,
+        "cache_write_input_tokens": mock.ANY,
+    }
+    if context.library == "python":
+        metrics["ephemeral_1h_input_tokens"] = mock.ANY
+        metrics["ephemeral_5m_input_tokens"] = mock.ANY
+    return metrics
+
 TOOLS = [
     {
         "name": "get_weather",
@@ -75,13 +93,7 @@ def test_create(self, test_agent: TestAgentAPI, test_client: FrameworkTestClient
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics={
-                "input_tokens": mock.ANY,
-                "output_tokens": mock.ANY,
-                "total_tokens": mock.ANY,
-                "cache_read_input_tokens": mock.ANY,
-                "cache_write_input_tokens": mock.ANY,
-            },
+            metrics=_expected_llmobs_metrics(),
         )
 
     def test_create_stream_method(self, test_agent: TestAgentAPI, test_client: FrameworkTestClientApi):
@@ -118,13 +130,7 @@ def test_create_stream_method(self, test_agent: TestAgentAPI, test_client: Frame
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics={
-                "input_tokens": mock.ANY,
-                "output_tokens": mock.ANY,
-                "total_tokens": mock.ANY,
-                "cache_read_input_tokens": mock.ANY,
-                "cache_write_input_tokens": mock.ANY,
-            },
+            metrics=_expected_llmobs_metrics(),
         )
 
     @pytest.mark.parametrize("stream", [True, False])
@@ -175,13 +181,7 @@ def test_create_content_block(self, test_agent: TestAgentAPI, test_client: Frame
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics={
-                "input_tokens": mock.ANY,
-                "output_tokens": mock.ANY,
-                "total_tokens": mock.ANY,
-                "cache_read_input_tokens": mock.ANY,
-                "cache_write_input_tokens": mock.ANY,
-            },
+            metrics=_expected_llmobs_metrics(),
         )
 
     @pytest.mark.parametrize("stream", [True, False])
@@ -312,13 +312,7 @@ def test_create_multiple_system_prompts(
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics={
-                "input_tokens": mock.ANY,
-                "output_tokens": mock.ANY,
-                "total_tokens": mock.ANY,
-                "cache_read_input_tokens": mock.ANY,
-                "cache_write_input_tokens": mock.ANY,
-            },
+            metrics=_expected_llmobs_metrics(),
         )
 
     @pytest.mark.parametrize("stream", [True, False])
@@ -378,13 +372,7 @@ def test_create_with_tools(self, test_agent: TestAgentAPI, test_client: Framewor
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics={
-                "input_tokens": mock.ANY,
-                "output_tokens": mock.ANY,
-                "total_tokens": mock.ANY,
-                "cache_read_input_tokens": mock.ANY,
-                "cache_write_input_tokens": mock.ANY,
-            },
+            metrics=_expected_llmobs_metrics(),
         )
 
     @pytest.mark.parametrize("stream", [True, False])
@@ -495,13 +483,7 @@ def test_create_tool_result(self, test_agent: TestAgentAPI, test_client: Framewo
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics={
-                "input_tokens": mock.ANY,
-                "output_tokens": mock.ANY,
-                "total_tokens": mock.ANY,
-                "cache_read_input_tokens": mock.ANY,
-                "cache_write_input_tokens": mock.ANY,
-            },
+            metrics=_expected_llmobs_metrics(),
         )
 
     @pytest.mark.parametrize("stream", [True, False])
@@ -568,13 +550,7 @@ def test_create_redact_image_input(
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics={
-                "input_tokens": mock.ANY,
-                "output_tokens": mock.ANY,
-                "total_tokens": mock.ANY,
-                "cache_read_input_tokens": mock.ANY,
-                "cache_write_input_tokens": mock.ANY,
-            },
+            metrics=_expected_llmobs_metrics(),
         )
 
     @pytest.mark.parametrize("stream", [True, False])

From a33f54bd912bbc9ea649e5cd18a339a379c6471b Mon Sep 17 00:00:00 2001
From: Yun Kim <yun.kim@datadoghq.com>
Date: Thu, 12 Mar 2026 15:46:56 -0400
Subject: [PATCH 2/2] Revert missing_feature for python

---
 manifests/python.yml                          |  7 --
 .../llm/anthropic/test_anthropic_llmobs.py    | 91 +++++++++++++------
 2 files changed, 65 insertions(+), 33 deletions(-)

diff --git a/manifests/python.yml b/manifests/python.yml
index 1463b4b7d29..299e7898b7b 100644
--- a/manifests/python.yml
+++ b/manifests/python.yml
@@ -1080,15 +1080,8 @@ manifest:
   tests/ffe/test_exposures.py: v4.2.0-dev
   tests/integration_frameworks/llm/anthropic/test_anthropic_apm.py::TestAnthropicApmMessages: v3.16.0
   tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages: v3.16.0
-  tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create: missing_feature  # ephemeral cache TTL metrics not yet released
-  tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_content_block: missing_feature  # ephemeral cache TTL metrics not yet released
   tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_error: bug (MLOB-1234)
-  ? tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_multiple_system_prompts
-  : missing_feature  # ephemeral cache TTL metrics not yet released
-  tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_redact_image_input: missing_feature  # ephemeral cache TTL metrics not yet released
-  tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_stream_method: missing_feature  # ephemeral cache TTL metrics not yet released
   tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_tool_result: bug (MLOB-1234)
-  tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_with_tools: missing_feature  # ephemeral cache TTL metrics not yet released
   tests/integration_frameworks/llm/google_genai/test_google_genai_apm.py::TestGoogleGenAiEmbedContent: v3.11.0
   tests/integration_frameworks/llm/google_genai/test_google_genai_apm.py::TestGoogleGenAiGenerateContent: v3.11.0
   tests/integration_frameworks/llm/google_genai/test_google_genai_llmobs.py::TestGoogleGenAiEmbedContent: v3.13.0
diff --git a/tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py b/tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py
index 41655e07d5c..1bf59e370d2 100644
--- a/tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py
+++ b/tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py
@@ -1,5 +1,5 @@
 from tests.integration_frameworks.llm.utils import assert_llmobs_span_event
-from utils import context, features, scenarios
+from utils import features, scenarios
 from utils.docker_fixtures import FrameworkTestClientApi, TestAgentAPI
 
 from .utils import BaseAnthropicTest
@@ -8,24 +8,6 @@
 from unittest import mock
 import json
 
-
-def _expected_llmobs_metrics():
-    """Return expected LLMObs token metrics for anthropic spans.
-
-    Ephemeral cache TTL metrics are only emitted by the Python tracer.
-    """
-    metrics = {
-        "input_tokens": mock.ANY,
-        "output_tokens": mock.ANY,
-        "total_tokens": mock.ANY,
-        "cache_read_input_tokens": mock.ANY,
-        "cache_write_input_tokens": mock.ANY,
-    }
-    if context.library == "python":
-        metrics["ephemeral_1h_input_tokens"] = mock.ANY
-        metrics["ephemeral_5m_input_tokens"] = mock.ANY
-    return metrics
-
 TOOLS = [
     {
         "name": "get_weather",
@@ -93,7 +75,15 @@ def test_create(self, test_agent: TestAgentAPI, test_client: FrameworkTestClient
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics=_expected_llmobs_metrics(),
+            metrics={
+                "input_tokens": mock.ANY,
+                "output_tokens": mock.ANY,
+                "total_tokens": mock.ANY,
+                "cache_read_input_tokens": mock.ANY,
+                "cache_write_input_tokens": mock.ANY,
+                "ephemeral_1h_input_tokens": mock.ANY,
+                "ephemeral_5m_input_tokens": mock.ANY,
+            },
         )
 
     def test_create_stream_method(self, test_agent: TestAgentAPI, test_client: FrameworkTestClientApi):
@@ -130,7 +120,15 @@ def test_create_stream_method(self, test_agent: TestAgentAPI, test_client: Frame
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics=_expected_llmobs_metrics(),
+            metrics={
+                "input_tokens": mock.ANY,
+                "output_tokens": mock.ANY,
+                "total_tokens": mock.ANY,
+                "cache_read_input_tokens": mock.ANY,
+                "cache_write_input_tokens": mock.ANY,
+                "ephemeral_1h_input_tokens": mock.ANY,
+                "ephemeral_5m_input_tokens": mock.ANY,
+            },
         )
 
     @pytest.mark.parametrize("stream", [True, False])
@@ -181,7 +179,15 @@ def test_create_content_block(self, test_agent: TestAgentAPI, test_client: Frame
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics=_expected_llmobs_metrics(),
+            metrics={
+                "input_tokens": mock.ANY,
+                "output_tokens": mock.ANY,
+                "total_tokens": mock.ANY,
+                "cache_read_input_tokens": mock.ANY,
+                "cache_write_input_tokens": mock.ANY,
+                "ephemeral_1h_input_tokens": mock.ANY,
+                "ephemeral_5m_input_tokens": mock.ANY,
+            },
         )
 
     @pytest.mark.parametrize("stream", [True, False])
@@ -312,7 +318,15 @@ def test_create_multiple_system_prompts(
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics=_expected_llmobs_metrics(),
+            metrics={
+                "input_tokens": mock.ANY,
+                "output_tokens": mock.ANY,
+                "total_tokens": mock.ANY,
+                "cache_read_input_tokens": mock.ANY,
+                "cache_write_input_tokens": mock.ANY,
+                "ephemeral_1h_input_tokens": mock.ANY,
+                "ephemeral_5m_input_tokens": mock.ANY,
+            },
         )
 
     @pytest.mark.parametrize("stream", [True, False])
@@ -372,7 +386,15 @@ def test_create_with_tools(self, test_agent: TestAgentAPI, test_client: Framewor
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics=_expected_llmobs_metrics(),
+            metrics={
+                "input_tokens": mock.ANY,
+                "output_tokens": mock.ANY,
+                "total_tokens": mock.ANY,
+                "cache_read_input_tokens": mock.ANY,
+                "cache_write_input_tokens": mock.ANY,
+                "ephemeral_1h_input_tokens": mock.ANY,
+                "ephemeral_5m_input_tokens": mock.ANY,
+            },
         )
 
     @pytest.mark.parametrize("stream", [True, False])
@@ -483,7 +505,15 @@ def test_create_tool_result(self, test_agent: TestAgentAPI, test_client: Framewo
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics=_expected_llmobs_metrics(),
+            metrics={
+                "input_tokens": mock.ANY,
+                "output_tokens": mock.ANY,
+                "total_tokens": mock.ANY,
+                "cache_read_input_tokens": mock.ANY,
+                "cache_write_input_tokens": mock.ANY,
+                "ephemeral_1h_input_tokens": mock.ANY,
+                "ephemeral_5m_input_tokens": mock.ANY,
+            },
         )
 
     @pytest.mark.parametrize("stream", [True, False])
@@ -550,7 +580,15 @@ def test_create_redact_image_input(
                 "max_tokens": 100,
                 "temperature": 0.5,
             },
-            metrics=_expected_llmobs_metrics(),
+            metrics={
+                "input_tokens": mock.ANY,
+                "output_tokens": mock.ANY,
+                "total_tokens": mock.ANY,
+                "cache_read_input_tokens": mock.ANY,
+                "cache_write_input_tokens": mock.ANY,
+                "ephemeral_1h_input_tokens": mock.ANY,
+                "ephemeral_5m_input_tokens": mock.ANY,
+            },
         )
 
     @pytest.mark.parametrize("stream", [True, False])
@@ -606,4 +644,5 @@ def test_create_prompt_caching(
         write_span_event, read_span_event = span_events
 
         assert write_span_event["metrics"]["cache_write_input_tokens"] == 6163
+        assert write_span_event["metrics"]["ephemeral_5m_input_tokens"] == 6163
         assert read_span_event["metrics"]["cache_read_input_tokens"] == 6163