diff --git a/.github/workflows/run-end-to-end.yml b/.github/workflows/run-end-to-end.yml index a6e49081e41..e6ac962b475 100644 --- a/.github/workflows/run-end-to-end.yml +++ b/.github/workflows/run-end-to-end.yml @@ -235,6 +235,11 @@ jobs: env: DD_API_KEY: ${{ secrets.DD_API_KEY }} DD_APP_KEY: ${{ secrets.DD_APPLICATION_KEY }} + - name: Run APM_TRACING_OTLP scenario + if: always() && steps.build.outcome == 'success' && contains(inputs.scenarios, '"APM_TRACING_OTLP"') + run: ./run.sh APM_TRACING_OTLP + env: + DD_API_KEY: ${{ secrets.DD_API_KEY }} - name: Run APM_TRACING_EFFICIENT_PAYLOAD scenario if: always() && steps.build.outcome == 'success' && contains(inputs.scenarios, '"APM_TRACING_EFFICIENT_PAYLOAD"') run: ./run.sh APM_TRACING_EFFICIENT_PAYLOAD diff --git a/manifests/dotnet.yml b/manifests/dotnet.yml index 950a196186e..e1c6a783b3f 100644 --- a/manifests/dotnet.yml +++ b/manifests/dotnet.yml @@ -755,6 +755,7 @@ manifest: tests/k8s_lib_injection/test_k8s_lib_injection_appsec.py::TestK8sLibInjectionAppsecClusterEnabled: v3.36.0 tests/k8s_lib_injection/test_k8s_lib_injection_appsec.py::TestK8sLibInjectionAppsecDisabledByDefault: v3.36.0 tests/otel/test_context_propagation.py::Test_Otel_Context_Propagation_Default_Propagator_Api: v3.9.0 + tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP: missing_feature tests/otel_tracing_e2e/test_e2e.py::Test_OTelLogE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelMetricE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelTracingE2E: irrelevant diff --git a/manifests/golang.yml b/manifests/golang.yml index 8e8fa4dff97..f2cd81fd59e 100644 --- a/manifests/golang.yml +++ b/manifests/golang.yml @@ -981,6 +981,7 @@ manifest: - weblog_declaration: "*": incomplete_test_app (endpoint not implemented) net-http: v1.70.1 + tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP: missing_feature tests/otel_tracing_e2e/test_e2e.py::Test_OTelLogE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelMetricE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelTracingE2E: irrelevant diff --git a/manifests/java.yml b/manifests/java.yml index 949f140f3dd..a056538d6bd 100644 --- a/manifests/java.yml +++ b/manifests/java.yml @@ -3465,6 +3465,7 @@ manifest: spring-boot-openliberty: v1.58.2+06122213c8 # Modified by easy win activation script uds-spring-boot: v1.58.2+06122213c8 # Modified by easy win activation script spring-boot-payara: v1.58.2+06122213c8 # Modified by easy win activation script + tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP: missing_feature tests/otel_tracing_e2e/test_e2e.py::Test_OTelLogE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelMetricE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelTracingE2E: irrelevant diff --git a/manifests/nodejs.yml b/manifests/nodejs.yml index b69f959d2e8..8c4ba5740b8 100644 --- a/manifests/nodejs.yml +++ b/manifests/nodejs.yml @@ -1845,6 +1845,7 @@ manifest: fastify: *ref_5_26_0 tests/otel/test_context_propagation.py::Test_Otel_Context_Propagation_Default_Propagator_Api::test_propagation_extract: incomplete_test_app (Node.js extract endpoint doesn't seem to be working.) tests/otel/test_context_propagation.py::Test_Otel_Context_Propagation_Default_Propagator_Api::test_propagation_inject: incomplete_test_app (Node.js inject endpoint doesn't seem to be working.) + tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP: missing_feature tests/otel_tracing_e2e/test_e2e.py::Test_OTelLogE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelMetricE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelTracingE2E: irrelevant diff --git a/manifests/php.yml b/manifests/php.yml index 0ab061c603a..de2447ce4fc 100644 --- a/manifests/php.yml +++ b/manifests/php.yml @@ -615,6 +615,7 @@ manifest: tests/k8s_lib_injection/test_k8s_lib_injection_profiling.py::TestK8sLibInjectioProfilingClusterOverride: v1.9.0 tests/k8s_lib_injection/test_k8s_lib_injection_profiling.py::TestK8sLibInjectioProfilingDisabledByDefault: v1.9.0 tests/otel/test_context_propagation.py::Test_Otel_Context_Propagation_Default_Propagator_Api: incomplete_test_app (endpoint not implemented) + tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP: missing_feature tests/otel_tracing_e2e/test_e2e.py::Test_OTelLogE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelMetricE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelTracingE2E: irrelevant diff --git a/manifests/python.yml b/manifests/python.yml index 775bb6a74de..6119ee85b2f 100644 --- a/manifests/python.yml +++ b/manifests/python.yml @@ -1503,6 +1503,7 @@ manifest: flask-poc: v2.19.0 uds-flask: v4.3.1 # Modified by easy win activation script uwsgi-poc: v4.3.1 # Modified by easy win activation script + tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP: missing_feature tests/otel_tracing_e2e/test_e2e.py::Test_OTelLogE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelMetricE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelTracingE2E: irrelevant diff --git a/manifests/ruby.yml b/manifests/ruby.yml index 3aae1c8766b..707a8aa9cc6 100644 --- a/manifests/ruby.yml +++ b/manifests/ruby.yml @@ -1228,6 +1228,7 @@ manifest: "*": incomplete_test_app (endpoint not implemented) rails72: v2.0.0 tests/otel/test_context_propagation.py::Test_Otel_Context_Propagation_Default_Propagator_Api::test_propagation_extract: incomplete_test_app (Ruby extract seems to fail even though it should be supported) + tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP: missing_feature tests/otel_tracing_e2e/test_e2e.py::Test_OTelLogE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelMetricE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelTracingE2E: irrelevant diff --git a/manifests/rust.yml b/manifests/rust.yml index cdb320e3558..b098202c802 100644 --- a/manifests/rust.yml +++ b/manifests/rust.yml @@ -47,6 +47,7 @@ manifest: tests/integrations/test_mongo.py::Test_Mongo: missing_feature (Endpoint is not implemented on weblog) tests/integrations/test_service_overrides.py::Test_SqlServiceNameSource: irrelevant (Only implemented for Java) tests/integrations/test_sql.py::Test_Sql: missing_feature (Endpoint is not implemented on weblog) + tests/otel/test_tracing_otlp.py::Test_Otel_Tracing_OTLP: missing_feature tests/otel_tracing_e2e/test_e2e.py::Test_OTelLogE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelMetricE2E: irrelevant tests/otel_tracing_e2e/test_e2e.py::Test_OTelTracingE2E: irrelevant diff --git a/tests/otel/test_tracing_otlp.py b/tests/otel/test_tracing_otlp.py new file mode 100644 index 00000000000..562949d3e37 --- /dev/null +++ b/tests/otel/test_tracing_otlp.py @@ -0,0 +1,151 @@ +# Unless explicitly stated otherwise all files in this repository are licensed under the the Apache License Version 2.0. +# This product includes software developed at Datadog (https://www.datadoghq.com/). +# Copyright 2024 Datadog, Inc. + +import time +import re +from utils import weblog, interfaces, scenarios, features +from utils.dd_constants import SpanKind, StatusCode +from typing import Any +from collections.abc import Iterator + + +def _snake_to_camel(snake_key: str) -> str: + parts = snake_key.split("_") + return parts[0].lower() + "".join(p.capitalize() for p in parts[1:]) + + +def get_otlp_key(d: dict[str, Any] | None, snake_case_key: str, *, is_json: bool, default: Any = None) -> Any: # noqa: ANN401 + """Look up a field by its snake_case name when is_json is false, or its camelCase equivalent when is_json is true. + Fields must be camelCase for JSON Protobuf encoding. See https://opentelemetry.io/docs/specs/otlp/#json-protobuf-encoding + """ + if d is None: + return default + key = _snake_to_camel(snake_case_key) if is_json else snake_case_key + return d.get(key, default) + + +def get_keyvalue_generator(attributes: list[dict]) -> Iterator[tuple[str, Any]]: + for key_value in attributes: + if key_value["value"].get("string_value"): + yield key_value["key"], key_value["value"]["string_value"] + elif key_value["value"].get("stringValue"): + yield key_value["key"], key_value["value"]["stringValue"] + elif key_value["value"].get("bool_value"): + yield key_value["key"], key_value["value"]["bool_value"] + elif key_value["value"].get("boolValue"): + yield key_value["key"], key_value["value"]["boolValue"] + elif key_value["value"].get("int_value"): + yield key_value["key"], key_value["value"]["int_value"] + elif key_value["value"].get("intValue"): + yield key_value["key"], key_value["value"]["intValue"] + elif key_value["value"].get("double_value"): + yield key_value["key"], key_value["value"]["double_value"] + elif key_value["value"].get("doubleValue"): + yield key_value["key"], key_value["value"]["doubleValue"] + elif key_value["value"].get("array_value"): + yield key_value["key"], key_value["value"]["array_value"] + elif key_value["value"].get("arrayValue"): + yield key_value["key"], key_value["value"]["arrayValue"] + elif key_value["value"].get("kvlist_value"): + yield key_value["key"], key_value["value"]["kvlist_value"] + elif key_value["value"].get("kvlistValue"): + yield key_value["key"], key_value["value"]["kvlistValue"] + elif key_value["value"].get("bytes_value"): + yield key_value["key"], key_value["value"]["bytes_value"] + elif key_value["value"].get("bytesValue"): + yield key_value["key"], key_value["value"]["bytesValue"] + else: + raise ValueError(f"Unknown attribute value: {key_value['value']}") + + +# @scenarios.apm_tracing_e2e_otel +@features.otel_api +@scenarios.apm_tracing_otlp +class Test_Otel_Tracing_OTLP: + def setup_single_server_trace(self): + self.start_time_ns = time.time_ns() + self.req = weblog.get("/") + self.end_time_ns = time.time_ns() + + def test_single_server_trace(self): + """Validates the required elements of the OTLP payload for a single trace""" + data = list(interfaces.open_telemetry.get_otel_spans(self.req)) + + # Assert that there is only one OTLP request containing the desired server span + assert len(data) == 1 + request, content, span = data[0] + + # Determine if JSON Protobuf Encoding was used for the OTLP request (rather than Binary Protobuf) + # We need to assert that we match the OTLP specification, which has some odd encoding rules when using JSON: https://opentelemetry.io/docs/specs/otlp/#json-protobuf-encoding + request_headers = {key.lower(): value for key, value in request.get("headers")} + is_json = request_headers.get("content-type") == "application/json" + + # Assert that there is only one resource span (i.e. SDK) in the OTLP request + resource_spans = get_otlp_key(content, "resource_spans", is_json=is_json) + expected_key = _snake_to_camel("resource_spans") if is_json else "resource_spans" + assert resource_spans is not None, f"missing '{expected_key}' on content: {content}" + assert len(resource_spans) == 1, f"expected 1 resource span, got {len(resource_spans)}" + resource_span = resource_spans[0] + + attributes = { + key_value["key"]: get_otlp_key(key_value["value"], "string_value", is_json=is_json) + for key_value in resource_span.get("resource").get("attributes") + } + + # Assert that the resource attributes contain the service-level attributes and tracer-level attributes we expect + # TODO: Assert the following attributes: runtime-id, git.commit.sha, git.repository_url + assert attributes.get("service.name") == "weblog" + assert attributes.get("service.version") == "1.0.0" + assert ( + attributes.get("deployment.environment.name") == "system-tests" + or attributes.get("deployment.environment") == "system-tests" + ) + assert attributes.get("telemetry.sdk.name") == "datadog" + assert "telemetry.sdk.language" in attributes + assert "telemetry.sdk.version" in attributes + + # Assert that the `traceId` and `spanId` JSON fields are valid case-insensitive hexadecimal strings, not base64-encoded strings as defined in the standard Protobuf JSON Mapping. + # See https://opentelemetry.io/docs/specs/otlp/#json-protobuf-encoding + # TODO: Assert against trace_id and span_id fields in the protobuf encoding as well + if is_json: + assert re.match(r"^[0-9a-fA-F]{32}$", span.get("traceId")), ( + f"traceId is not a valid case-insensitive hexadecimal string, got {span.get('traceId')}" + ) + assert re.match(r"^[0-9a-fA-F]{16}$", span.get("spanId")), ( + f"spanId is not a valid case-insensitive hexadecimal string, got {span.get('spanId')}" + ) + + # Assert that the span fields match the expected values + span_start_time_ns = int(get_otlp_key(span, "start_time_unix_nano", is_json=is_json)) + span_end_time_ns = int(get_otlp_key(span, "end_time_unix_nano", is_json=is_json)) + assert span_start_time_ns >= self.start_time_ns + assert span_end_time_ns >= span_start_time_ns + assert span_end_time_ns <= self.end_time_ns + + assert get_otlp_key(span, "name", is_json=is_json) + assert get_otlp_key(span, "kind", is_json=is_json) == SpanKind.SERVER.value + assert get_otlp_key(span, "attributes", is_json=is_json) is not None + assert ( + get_otlp_key(span, "status", is_json=is_json) is None + or get_otlp_key(span, "status", is_json=is_json).get("code") == StatusCode.STATUS_CODE_UNSET.value + ) + + # Assert HTTP tags + # Convert attributes list to a dictionary, but for now only handle key_value objects with stringValue + span_attributes = dict(get_keyvalue_generator(get_otlp_key(span, "attributes", is_json=is_json))) + method = span_attributes.get("http.method") or span_attributes.get("http.request.method") + status_code = span_attributes.get("http.status_code") or span_attributes.get("http.response.status_code") + assert method == "GET", f"HTTP method is not GET, got {method}" + assert status_code is not None + assert int(status_code) == 200, f"HTTP status code is not 200, got {int(status_code)}" + + def setup_unsampled_trace(self): + self.req = weblog.get("/", headers={"traceparent": "00-11111111111111110000000000000001-0000000000000001-00"}) + + def test_unsampled_trace(self): + """Validates that the spans from a non-sampled trace are not exported.""" + data = list(interfaces.open_telemetry.get_otel_spans(self.req)) + + # Assert that the span from this test case was not exported + assert len(data) == 0, f"Expected no weblog spans in the OTLP trace payload, got {data}" diff --git a/utils/_context/_scenarios/__init__.py b/utils/_context/_scenarios/__init__.py index d53cd5fb2dd..099f6e33a47 100644 --- a/utils/_context/_scenarios/__init__.py +++ b/utils/_context/_scenarios/__init__.py @@ -578,6 +578,18 @@ class _Scenarios: require_api_key=True, doc="", ) + apm_tracing_otlp = EndToEndScenario( + "APM_TRACING_OTLP", + weblog_env={ + "OTEL_TRACES_EXPORTER": "otlp", + "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": f"http://proxy:{ProxyPorts.open_telemetry_weblog}/v1/traces", + "OTEL_EXPORTER_OTLP_TRACES_HEADERS": "dd-protocol=otlp,dd-otlp-path=agent", + }, + backend_interface_timeout=5, + require_api_key=True, + include_opentelemetry=True, + doc="", + ) apm_tracing_efficient_payload = EndToEndScenario( "APM_TRACING_EFFICIENT_PAYLOAD", diff --git a/utils/_context/_scenarios/endtoend.py b/utils/_context/_scenarios/endtoend.py index d8f5efc34f3..92fe0266b93 100644 --- a/utils/_context/_scenarios/endtoend.py +++ b/utils/_context/_scenarios/endtoend.py @@ -204,6 +204,7 @@ def __init__( runtime_metrics_enabled: bool = False, backend_interface_timeout: int = 0, include_buddies: bool = False, + include_opentelemetry: bool = False, require_api_key: bool = False, other_weblog_containers: tuple[type[TestedContainer], ...] = (), ) -> None: @@ -283,6 +284,7 @@ def __init__( self.agent_interface_timeout = agent_interface_timeout self.backend_interface_timeout = backend_interface_timeout self._library_interface_timeout = library_interface_timeout + self.include_opentelemetry = include_opentelemetry def configure(self, config: pytest.Config): if self._require_api_key and "DD_API_KEY" not in os.environ and not self.replay: @@ -290,6 +292,7 @@ def configure(self, config: pytest.Config): self.weblog_infra.configure(config) self._set_containers_dependancies() + self.weblog_container.environment["DD_API_KEY"] = os.environ.get("DD_API_KEY") super().configure(config) interfaces.agent.configure(self.host_log_folder, replay=self.replay) @@ -299,6 +302,9 @@ def configure(self, config: pytest.Config): interfaces.library_stdout.configure(self.host_log_folder, replay=self.replay) interfaces.agent_stdout.configure(self.host_log_folder, replay=self.replay) + if self.include_opentelemetry: + interfaces.open_telemetry.configure(self.host_log_folder, replay=self.replay) + for container in self.buddies: container.interface.configure(self.host_log_folder, replay=self.replay) @@ -360,7 +366,11 @@ def _get_weblog_system_info(self): def _start_interfaces_watchdog(self): super().start_interfaces_watchdog( - [interfaces.library, interfaces.agent] + [container.interface for container in self.buddies] + [interfaces.library, interfaces.agent] + + [container.interface for container in self.buddies] + + [interfaces.open_telemetry] + if self.include_opentelemetry + else [] ) def _set_weblog_domain(self): @@ -420,6 +430,10 @@ def _wait_and_stop_containers(self, *, force_interface_timout_to_zero: bool): interfaces.backend.load_data_from_logs() + if self.include_opentelemetry: + interfaces.open_telemetry.load_data_from_logs() + interfaces.open_telemetry.check_deserialization_errors() + else: self._wait_interface( interfaces.library, 0 if force_interface_timout_to_zero else self.library_interface_timeout @@ -444,6 +458,11 @@ def _wait_and_stop_containers(self, *, force_interface_timout_to_zero: bool): interfaces.backend, 0 if force_interface_timout_to_zero else self.backend_interface_timeout ) + if self.include_opentelemetry: + self._wait_interface( + interfaces.open_telemetry, 0 if force_interface_timout_to_zero else self.backend_interface_timeout + ) + def _wait_interface(self, interface: ProxyBasedInterfaceValidator, timeout: int): logger.terminal.write_sep("-", f"Wait for {interface} ({timeout}s)") logger.terminal.flush() diff --git a/utils/dd_constants.py b/utils/dd_constants.py index b2bf0e3b539..507704bee78 100644 --- a/utils/dd_constants.py +++ b/utils/dd_constants.py @@ -108,6 +108,7 @@ class SamplingMechanism(IntEnum): AI_GUARD = 13 +# See https://github.com/open-telemetry/opentelemetry-proto/blob/v1.9.0/opentelemetry/proto/trace/v1/trace.proto#L153 class SpanKind(IntEnum): UNSPECIFIED = 0 INTERNAL = 1 @@ -115,3 +116,10 @@ class SpanKind(IntEnum): CLIENT = 3 PRODUCER = 4 CONSUMER = 5 + + +# See https://github.com/open-telemetry/opentelemetry-proto/blob/v1.9.0/opentelemetry/proto/trace/v1/trace.proto#L316 +class StatusCode(IntEnum): + STATUS_CODE_UNSET = 0 + STATUS_CODE_OK = 1 + STATUS_CODE_ERROR = 2 diff --git a/utils/interfaces/_open_telemetry.py b/utils/interfaces/_open_telemetry.py index 54c9381631c..23836c50a99 100644 --- a/utils/interfaces/_open_telemetry.py +++ b/utils/interfaces/_open_telemetry.py @@ -35,6 +35,35 @@ def get_otel_trace_id(self, request: HttpResponse): for span in scope_span.get("spans"): for attribute in span.get("attributes", []): attr_key = attribute.get("key") - attr_val = attribute.get("value").get("stringValue") - if attr_key == "http.request.headers.user-agent" and rid in attr_val: + attr_val = attribute.get("value").get("string_value") or attribute.get("value").get( + "stringValue" + ) + if (attr_key == "http.request.headers.user-agent" and rid in attr_val) or ( + attr_key == "http.useragent" and rid in attr_val + ): yield span.get("traceId") + + def get_otel_spans(self, request: HttpResponse): + paths = ["/api/v0.2/traces", "/v1/traces"] + rid = request.get_rid() + + if rid: + logger.debug(f"Try to find traces related to request {rid}") + + for data in self.get_data(path_filters=paths): + content = data.get("request").get("content") + resource_spans = content.get("resource_spans") or content.get("resourceSpans") + for resource_span in resource_spans: + scope_spans = resource_span.get("scope_spans") or resource_span.get("scopeSpans") + for scope_span in scope_spans: + for span in scope_span.get("spans"): + for attribute in span.get("attributes", []): + attr_key = attribute.get("key") + attr_val = attribute.get("value").get("string_value") or attribute.get("value").get( + "stringValue" + ) + if (attr_key == "http.request.headers.user-agent" and rid in attr_val) or ( + attr_key == "http.useragent" and rid in attr_val + ): + yield data.get("request"), content, span + break # Skip to next span diff --git a/utils/scripts/ci_orchestrators/workflow_data.py b/utils/scripts/ci_orchestrators/workflow_data.py index ee62da8facf..692755f8c6d 100644 --- a/utils/scripts/ci_orchestrators/workflow_data.py +++ b/utils/scripts/ci_orchestrators/workflow_data.py @@ -591,6 +591,7 @@ def _is_supported(library: str, weblog: str, scenario: str, _ci_environment: str "endtoend": [ "AGENT_NOT_SUPPORTING_SPAN_EVENTS", "APM_TRACING_E2E_OTEL", + "APM_TRACING_OTLP", "APM_TRACING_E2E_SINGLE_SPAN", "APPSEC_API_SECURITY", "APPSEC_API_SECURITY_NO_RESPONSE_BODY",