diff --git a/BUILD.bazel b/BUILD.bazel index 890c00e0..c81bd18f 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -17,6 +17,7 @@ cc_library( "src/datadog/datadog_agent_config.cpp", "src/datadog/datadog_agent.cpp", "src/datadog/default_http_client_null.cpp", + "src/datadog/endpoint_inferral.cpp", "src/datadog/environment.cpp", "src/datadog/error.cpp", "src/datadog/extraction_util.cpp", @@ -60,6 +61,7 @@ cc_library( "src/datadog/datadog_agent.h", "src/datadog/default_http_client.h", "src/datadog/extracted_data.h", + "src/datadog/endpoint_inferral.h", "src/datadog/extraction_util.h", "src/datadog/glob.h", "src/datadog/hex.h", @@ -97,6 +99,7 @@ cc_library( "include/datadog/event_scheduler.h", "include/datadog/expected.h", "include/datadog/http_client.h", + "include/datadog/http_endpoint_calculation_mode.h", "include/datadog/id_generator.h", "include/datadog/injection_options.h", "include/datadog/logger.h", diff --git a/CMakeLists.txt b/CMakeLists.txt index e2100d51..2569b429 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,6 +173,7 @@ target_sources(dd-trace-cpp-objects src/datadog/collector_response.cpp src/datadog/datadog_agent_config.cpp src/datadog/datadog_agent.cpp + src/datadog/endpoint_inferral.cpp src/datadog/environment.cpp src/datadog/error.cpp src/datadog/extraction_util.cpp diff --git a/bin/format b/bin/format index 652e1678..6a5d66ac 100755 --- a/bin/format +++ b/bin/format @@ -2,8 +2,12 @@ set -e +# Determine the script directory and repository root +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd) + # Go to the repository root directory. -cd "$(dirname "$0")"/.. +cd "$REPO_ROOT" # clang-format's behavior changes between versions, even given the same # configuration. @@ -41,16 +45,16 @@ if [ "$(docker image ls --quiet $image | wc -l)" -eq 0 ]; then esac fi -mount_path=/mnt/host +mount_path=/mnt/repo -# File paths passed to the dockerized clang-format need to be resolved and -# prefixed with the bind mount path. +# File paths passed to the dockerized clang-format need to be relative to +# the repository root and prefixed with the bind mount path. # # Non-path arguments (flags) are left alone. process_arg() { case "$1" in -*) printf '%s\0' "$1" ;; - *) printf '%s/%s\0' "$mount_path" "$(realpath "$1")" ;; + *) printf '%s/%s\0' "$mount_path" "$1" ;; esac } @@ -65,7 +69,7 @@ docker_clang_format() { --volume /etc/passwd:/etc/passwd:ro \ --volume /etc/group:/etc/group:ro \ --user "$(id -u):$(id -g)" \ - --mount "type=bind,source=/,destination=$mount_path" \ + --mount "type=bind,source=$REPO_ROOT,destination=$mount_path" \ "$image" \ clang-format $formatter_options } diff --git a/include/datadog/config.h b/include/datadog/config.h index 3a26c78d..bfff6059 100644 --- a/include/datadog/config.h +++ b/include/datadog/config.h @@ -28,6 +28,8 @@ enum class ConfigName : char { TRACE_BAGGAGE_MAX_BYTES, TRACE_BAGGAGE_MAX_ITEMS, APM_TRACING_ENABLED, + TRACE_RESOURCE_RENAMING_ENABLED, + TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT, }; // Represents metadata for configuration parameters diff --git a/include/datadog/environment.h b/include/datadog/environment.h index 680b89af..358d0bcb 100644 --- a/include/datadog/environment.h +++ b/include/datadog/environment.h @@ -23,44 +23,46 @@ namespace environment { // To enforce correspondence between `enum Variable` and `variable_names`, the // preprocessor is used so that the DD_* symbols are listed exactly once. -#define LIST_ENVIRONMENT_VARIABLES(MACRO) \ - MACRO(DD_AGENT_HOST) \ - MACRO(DD_ENV) \ - MACRO(DD_INSTRUMENTATION_TELEMETRY_ENABLED) \ - MACRO(DD_PROPAGATION_STYLE_EXTRACT) \ - MACRO(DD_PROPAGATION_STYLE_INJECT) \ - MACRO(DD_REMOTE_CONFIGURATION_ENABLED) \ - MACRO(DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS) \ - MACRO(DD_SERVICE) \ - MACRO(DD_SPAN_SAMPLING_RULES) \ - MACRO(DD_SPAN_SAMPLING_RULES_FILE) \ - MACRO(DD_TRACE_PROPAGATION_STYLE_EXTRACT) \ - MACRO(DD_TRACE_PROPAGATION_STYLE_INJECT) \ - MACRO(DD_TRACE_PROPAGATION_STYLE) \ - MACRO(DD_TAGS) \ - MACRO(DD_TRACE_AGENT_PORT) \ - MACRO(DD_TRACE_AGENT_URL) \ - MACRO(DD_TRACE_DEBUG) \ - MACRO(DD_TRACE_ENABLED) \ - MACRO(DD_TRACE_RATE_LIMIT) \ - MACRO(DD_TRACE_REPORT_HOSTNAME) \ - MACRO(DD_TRACE_SAMPLE_RATE) \ - MACRO(DD_TRACE_SAMPLING_RULES) \ - MACRO(DD_TRACE_STARTUP_LOGS) \ - MACRO(DD_TRACE_TAGS_PROPAGATION_MAX_LENGTH) \ - MACRO(DD_VERSION) \ - MACRO(DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED) \ - MACRO(DD_TELEMETRY_HEARTBEAT_INTERVAL) \ - MACRO(DD_TELEMETRY_METRICS_ENABLED) \ - MACRO(DD_TELEMETRY_METRICS_INTERVAL_SECONDS) \ - MACRO(DD_TELEMETRY_DEBUG) \ - MACRO(DD_TRACE_BAGGAGE_MAX_ITEMS) \ - MACRO(DD_TRACE_BAGGAGE_MAX_BYTES) \ - MACRO(DD_TELEMETRY_LOG_COLLECTION_ENABLED) \ - MACRO(DD_INSTRUMENTATION_INSTALL_ID) \ - MACRO(DD_INSTRUMENTATION_INSTALL_TYPE) \ - MACRO(DD_INSTRUMENTATION_INSTALL_TIME) \ - MACRO(DD_APM_TRACING_ENABLED) \ +#define LIST_ENVIRONMENT_VARIABLES(MACRO) \ + MACRO(DD_AGENT_HOST) \ + MACRO(DD_ENV) \ + MACRO(DD_INSTRUMENTATION_TELEMETRY_ENABLED) \ + MACRO(DD_PROPAGATION_STYLE_EXTRACT) \ + MACRO(DD_PROPAGATION_STYLE_INJECT) \ + MACRO(DD_REMOTE_CONFIGURATION_ENABLED) \ + MACRO(DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS) \ + MACRO(DD_SERVICE) \ + MACRO(DD_SPAN_SAMPLING_RULES) \ + MACRO(DD_SPAN_SAMPLING_RULES_FILE) \ + MACRO(DD_TRACE_PROPAGATION_STYLE_EXTRACT) \ + MACRO(DD_TRACE_PROPAGATION_STYLE_INJECT) \ + MACRO(DD_TRACE_PROPAGATION_STYLE) \ + MACRO(DD_TAGS) \ + MACRO(DD_TRACE_AGENT_PORT) \ + MACRO(DD_TRACE_AGENT_URL) \ + MACRO(DD_TRACE_DEBUG) \ + MACRO(DD_TRACE_ENABLED) \ + MACRO(DD_TRACE_RATE_LIMIT) \ + MACRO(DD_TRACE_REPORT_HOSTNAME) \ + MACRO(DD_TRACE_SAMPLE_RATE) \ + MACRO(DD_TRACE_SAMPLING_RULES) \ + MACRO(DD_TRACE_STARTUP_LOGS) \ + MACRO(DD_TRACE_TAGS_PROPAGATION_MAX_LENGTH) \ + MACRO(DD_VERSION) \ + MACRO(DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED) \ + MACRO(DD_TELEMETRY_HEARTBEAT_INTERVAL) \ + MACRO(DD_TELEMETRY_METRICS_ENABLED) \ + MACRO(DD_TELEMETRY_METRICS_INTERVAL_SECONDS) \ + MACRO(DD_TELEMETRY_DEBUG) \ + MACRO(DD_TRACE_BAGGAGE_MAX_ITEMS) \ + MACRO(DD_TRACE_BAGGAGE_MAX_BYTES) \ + MACRO(DD_TELEMETRY_LOG_COLLECTION_ENABLED) \ + MACRO(DD_INSTRUMENTATION_INSTALL_ID) \ + MACRO(DD_INSTRUMENTATION_INSTALL_TYPE) \ + MACRO(DD_INSTRUMENTATION_INSTALL_TIME) \ + MACRO(DD_APM_TRACING_ENABLED) \ + MACRO(DD_TRACE_RESOURCE_RENAMING_ENABLED) \ + MACRO(DD_TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT) \ MACRO(DD_EXTERNAL_ENV) #define WITH_COMMA(ARG) ARG, diff --git a/include/datadog/http_client.h b/include/datadog/http_client.h index d1349f5c..7186acbe 100644 --- a/include/datadog/http_client.h +++ b/include/datadog/http_client.h @@ -26,6 +26,7 @@ class HTTPClient { std::string scheme; // http, https, or unix std::string authority; // domain:port or /path/to/socket std::string path; // resource, e.g. /v0.4/traces + std::string query; // query string without '?' static Expected parse(StringView input); }; diff --git a/include/datadog/http_endpoint_calculation_mode.h b/include/datadog/http_endpoint_calculation_mode.h new file mode 100644 index 00000000..61299f07 --- /dev/null +++ b/include/datadog/http_endpoint_calculation_mode.h @@ -0,0 +1,34 @@ +#pragma once + +// This component provides an enumeration that controls how the http.endpoint +// tag is calculated for HTTP spans. + +#include + +namespace datadog { +namespace tracing { + +// `HttpEndpointCalculationMode` determines when and how the http.endpoint tag +// is inferred from http.url for HTTP spans. +// +// The http.endpoint tag provides a normalized, parameterized version of the +// HTTP path (e.g., "/users/{param:int}" instead of "/users/123"). This helps +// aggregate similar requests and reduce cardinality in monitoring systems. +enum class HttpEndpointCalculationMode : std::uint8_t { + // Do not calculate http.endpoint. The tag will not be set unless explicitly + // provided by the user. + DISABLED, + + // Calculate http.endpoint from http.url only when http.route is not present. + // This mode acts as a fallback - if instrumentation provides http.route, + // use that; otherwise, infer http.endpoint from the URL path. + FALLBACK, + + // Always calculate http.endpoint from http.url, even when http.route is + // present. Both tags will be set, allowing for comparison between + // user-provided routes and automatically inferred endpoints. + ALWAYS_CALCULATE, +}; + +} // namespace tracing +} // namespace datadog diff --git a/include/datadog/trace_segment.h b/include/datadog/trace_segment.h index d95b71a4..a9fcf171 100644 --- a/include/datadog/trace_segment.h +++ b/include/datadog/trace_segment.h @@ -37,6 +37,7 @@ #include "runtime_id.h" #include "sampling_decision.h" #include "sampling_priority.h" +#include "tracer_config.h" namespace datadog { namespace telemetry { @@ -79,6 +80,8 @@ class TraceSegment { std::shared_ptr config_manager_; + HttpEndpointCalculationMode resource_renaming_mode_; + bool tracing_enabled_; public: @@ -97,6 +100,7 @@ class TraceSegment { Optional additional_w3c_tracestate, Optional additional_datadog_w3c_tracestate, std::unique_ptr local_root, + HttpEndpointCalculationMode resource_renaming_mode, bool tracing_enabled = true); const SpanDefaults& defaults() const; diff --git a/include/datadog/tracer.h b/include/datadog/tracer.h index dd33a929..1ba99891 100644 --- a/include/datadog/tracer.h +++ b/include/datadog/tracer.h @@ -55,6 +55,7 @@ class Tracer { bool baggage_injection_enabled_; bool baggage_extraction_enabled_; bool tracing_enabled_; + HttpEndpointCalculationMode resource_renaming_mode_; public: // Create a tracer configured using the specified `config`, and optionally: diff --git a/include/datadog/tracer_config.h b/include/datadog/tracer_config.h index a94e9578..21b2f687 100644 --- a/include/datadog/tracer_config.h +++ b/include/datadog/tracer_config.h @@ -15,6 +15,7 @@ #include "clock.h" #include "datadog_agent_config.h" #include "expected.h" +#include "http_endpoint_calculation_mode.h" #include "propagation_style.h" #include "runtime_id.h" #include "span_defaults.h" @@ -178,6 +179,15 @@ struct TracerConfig { /// Overridden by the `DD_APM_TRACING_ENABLED` environment variable. Defaults /// to `true`. Optional tracing_enabled; + + // Whether generation of http.endpoint is enabled. This is disabled by + // default. + Optional resource_renaming_enabled; + + // Whether http.endpoint is always calculated, even when http.route is + // present. This is disabled by default. + // This option is ignored if `resource_renaming_enabled` is not `true`. + Optional resource_renaming_always_simplified_endpoint; }; // `FinalizedTracerConfig` contains `Tracer` implementation details derived from @@ -218,6 +228,7 @@ class FinalizedTracerConfig final { std::shared_ptr event_scheduler; std::shared_ptr http_client; bool tracing_enabled; + HttpEndpointCalculationMode resource_renaming_mode; }; // Return a `FinalizedTracerConfig` from the specified `config` and from any diff --git a/src/datadog/endpoint_inferral.cpp b/src/datadog/endpoint_inferral.cpp new file mode 100644 index 00000000..15c7ac2f --- /dev/null +++ b/src/datadog/endpoint_inferral.cpp @@ -0,0 +1,185 @@ +#include "endpoint_inferral.h" + +#include + +namespace datadog::tracing { + +namespace { + +constexpr size_t MAX_COMPONENTS = 8; + +inline constexpr bool is_digit(char c) { return c >= '0' && c <= '9'; } +inline constexpr bool is_hex_alpha(char c) { + return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); +} +inline constexpr bool is_delim(char c) { + return c == '.' || c == '_' || c == '-'; +} +inline constexpr bool is_str_special(char c) { + return c == '%' || c == '&' || c == '\'' || c == '(' || c == ')' || + c == '*' || c == '+' || c == ',' || c == ':' || c == '=' || c == '@'; +} + +/* +clang-format off +{param:int} [1-9][0-9]+ len≥2, digits only, first 1–9 +{param:int_id} (?=.*[0-9])[0-9._-]{3,} len≥3, [0-9._-], must contain digit +{param:hex} (?=.*[0-9])[A-Fa-f0-9]{6,} len≥6, hex digits, must contain decimal digit +{param:hex_id} (?=.*[0-9])[A-Fa-f0-9._-]{6,} len≥6, hex+._-, must contain decimal digit +{param:str} .{20,}|.*[%&'()*+,:=@].* any chars, valid if len≥20 or contains special +clang-format on +*/ +enum component_type : std::uint8_t { + none = 0, + is_int = 1 << 0, + is_int_id = 1 << 1, + is_hex = 1 << 2, + is_hex_id = 1 << 3, + is_str = 1 << 4, +}; +static constexpr auto all_components = + is_int | is_int_id | is_hex | is_hex_id | is_str; +static_assert(all_components == (is_str << 1) - 1); + +StringView to_string(component_type type) { + switch (type) { + case component_type::is_int: + return "{param:int}"; + case component_type::is_int_id: + return "{param:int_id}"; + case component_type::is_hex: + return "{param:hex}"; + case component_type::is_hex_id: + return "{param:hex_id}"; + case component_type::is_str: + return "{param:str}"; + case component_type::none: + // should not be reached + return ""; + } + // should never reach here + return ""; +} + +component_type component_replacement(StringView path) noexcept { + // viable_components is a bitset of the component types not yet excluded + std::uint8_t viable_components = all_components; + bool found_special_char = false; + bool found_digit = false; + + if (path.size() < 2) { + viable_components &= ~(component_type::is_int | component_type::is_int_id | + component_type::is_hex | component_type::is_hex_id); + } else if (path.size() < 3) { + viable_components &= ~(component_type::is_int_id | component_type::is_hex | + component_type::is_hex_id); + } else if (path.size() < 6) { + viable_components &= ~(component_type::is_hex | component_type::is_hex_id); + } + + // is_int does not allow a leading 0 + if (!path.empty() && path[0] == '0') { + viable_components &= ~component_type::is_int; + } + + for (std::size_t i = 0; i < path.size(); ++i) { + char c = path[i]; + + if (is_str_special(c)) { + found_special_char = true; + viable_components &= + ~(component_type::is_int | component_type::is_int_id | + component_type::is_hex | component_type::is_hex_id); + } else if (is_hex_alpha(c)) { + viable_components &= + ~(component_type::is_int | component_type::is_int_id); + } else if (is_delim(c)) { + viable_components &= ~(component_type::is_int | component_type::is_hex); + } else if (is_digit(c)) { + found_digit = true; + } else { + // other character + viable_components &= + ~(component_type::is_int | component_type::is_int_id | + component_type::is_hex | component_type::is_hex_id); + } + } + + // is_str requires a special char or a size >= 20 + if (!found_special_char && path.size() < 20) { + viable_components &= ~component_type::is_str; + } + + // hex, and hex_id require a digit + if (!found_digit) { + viable_components &= ~(component_type::is_hex | component_type::is_hex_id); + } + + if (viable_components == 0) { + return component_type::none; + } + + // Get least significant set bit to determine component w/ highest precedence + // c++20: use std::countr_zero + std::uint8_t lsb = static_cast( + viable_components & + static_cast(-static_cast(viable_components))); + return static_cast(lsb); +} +} // namespace + +std::string infer_endpoint(StringView path) { + // Expects a clean path without query string (e.g., "/api/users/123") + if (path.empty() || path.front() != '/') { + return "/"; + } + + std::string result{}; + size_t component_count = 0; + bool final_slash = true; + + path.remove_prefix(1); // drop the leading '/' + while (!path.empty()) { + auto slash_pos = path.find('/'); + + StringView component = path.substr(0, slash_pos); + + // remove current component from the path (for the next iteration) + if (slash_pos == StringView::npos) { + path = StringView{}; + final_slash = false; + } else { + path.remove_prefix(slash_pos + 1); + } + + if (component.empty()) { + continue; + } + + result.append("/"); + + // replace the literal component with the appropriate placeholder + // (if it matches one of the patterns) + auto type = component_replacement(component); + if (type == component_type::none) { + result.append(component); + } else { + result.append(to_string(type)); + } + if (++component_count >= MAX_COMPONENTS) { + break; + } + } + + if (result.empty()) { + return "/"; + } + + if (final_slash) { + result.append("/"); + } + + return result; +} + +} // namespace datadog::tracing diff --git a/src/datadog/endpoint_inferral.h b/src/datadog/endpoint_inferral.h new file mode 100644 index 00000000..0425b90d --- /dev/null +++ b/src/datadog/endpoint_inferral.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +#include + +namespace datadog::tracing { + +// Infer the endpoint pattern from a URL path by replacing parameters with +// placeholders like {param:int}, {param:hex}, etc. +// +// The input should be a clean path without query string (e.g., +// "/api/users/123"). URL parsing should be handled by the caller using +// HTTPClient::URL::parse(). +std::string infer_endpoint(StringView path); + +} // namespace datadog::tracing diff --git a/src/datadog/http_client.cpp b/src/datadog/http_client.cpp index 25d3cad6..c8b4a95f 100644 --- a/src/datadog/http_client.cpp +++ b/src/datadog/http_client.cpp @@ -61,7 +61,7 @@ Expected HTTPClient::URL::parse(StringView input) { std::move(message)}; } return HTTPClient::URL{std::string(scheme), std::string(authority_and_path), - ""}; + "", ""}; } // The scheme is either "http" or "https". This means that the part after @@ -70,12 +70,31 @@ Expected HTTPClient::URL::parse(StringView input) { // the Datadog Agent service, and so they will not have a resource // location. Still, let's parse it properly. const auto after_authority = authority_and_path.find('/'); + + std::string path; + std::string query; + if (after_authority != StringView::npos) { + StringView path_and_query = authority_and_path.substr(after_authority); +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overread" +#endif + const auto query_pos = path_and_query.find('?'); +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + if (query_pos != StringView::npos) { + path = std::string(path_and_query.substr(0, query_pos)); + query = std::string(path_and_query.substr(query_pos + 1)); + } else { + path = std::string(path_and_query); + } + } + return HTTPClient::URL{ std::string(scheme), std::string(authority_and_path.substr(0, after_authority)), - (after_authority == StringView::npos) - ? "" - : std::string(authority_and_path.substr(after_authority))}; + std::move(path), std::move(query)}; } } // namespace tracing diff --git a/src/datadog/tags.cpp b/src/datadog/tags.cpp index df73dec9..4b2bc997 100644 --- a/src/datadog/tags.cpp +++ b/src/datadog/tags.cpp @@ -10,6 +10,9 @@ const std::string span_type = "span.type"; const std::string operation_name = "operation"; const std::string resource_name = "resource.name"; const std::string version = "version"; +const std::string http_endpoint = "http.endpoint"; +const std::string http_route = "http.route"; +const std::string http_url = "http.url"; namespace internal { diff --git a/src/datadog/tags.h b/src/datadog/tags.h index 9c7ea231..8d9f4d60 100644 --- a/src/datadog/tags.h +++ b/src/datadog/tags.h @@ -19,6 +19,9 @@ extern const std::string span_type; extern const std::string operation_name; extern const std::string resource_name; extern const std::string version; +extern const std::string http_endpoint; +extern const std::string http_route; +extern const std::string http_url; namespace internal { extern const std::string propagation_error; diff --git a/src/datadog/telemetry/telemetry_impl.cpp b/src/datadog/telemetry/telemetry_impl.cpp index 3b484d3a..2089bebd 100644 --- a/src/datadog/telemetry/telemetry_impl.cpp +++ b/src/datadog/telemetry/telemetry_impl.cpp @@ -46,8 +46,8 @@ const telemetry::Distribution bytes_sent{"telemetry_api.bytes", "telemetry", /// The time it takes to send the payload sent to the endpoint in ms, tagged by /// the endpoint (`endpoint:agent`, `endpoint:agentless`). -const telemetry::Distribution request_duration{"telemetry_api.ms", "telemetry", - true}; +[[maybe_unused]] const telemetry::Distribution request_duration{ + "telemetry_api.ms", "telemetry", true}; } // namespace internal_metrics @@ -109,6 +109,10 @@ std::string to_string(datadog::tracing::ConfigName name) { return "trace_baggage_max_items"; case ConfigName::APM_TRACING_ENABLED: return "apm_tracing_enabled"; + case ConfigName::TRACE_RESOURCE_RENAMING_ENABLED: + return "trace_resource_renaming_enabled"; + case ConfigName::TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT: + return "trace_resource_renaming_always_simplified_endpoint"; } std::abort(); diff --git a/src/datadog/trace_segment.cpp b/src/datadog/trace_segment.cpp index 3cc9ebc8..61450863 100644 --- a/src/datadog/trace_segment.cpp +++ b/src/datadog/trace_segment.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,7 @@ #include #include "config_manager.h" +#include "endpoint_inferral.h" #include "hex.h" #include "platform_util.h" #include "span_data.h" @@ -79,6 +81,33 @@ void inject_trace_tags( } } +void maybe_calculate_http_endpoint(HttpEndpointCalculationMode renaming_mode, + SpanData& local_root) { + // calculate http.endpoint if: + // a) the feature is not disabled, and + // b) the tag http.endpoint is not already set, and + // c) http.url is set, and + // d) http.route is not set or resource_renaming_mode is ALWAYS_CALCULATE + if (renaming_mode == HttpEndpointCalculationMode::DISABLED || + local_root.tags.find(tags::http_endpoint) != local_root.tags.end()) { + return; + } + auto http_url_tag = local_root.tags.find(tags::http_url); + const bool should_calculate_endpoint = + http_url_tag != local_root.tags.end() && + (renaming_mode == HttpEndpointCalculationMode::ALWAYS_CALCULATE || + local_root.tags.find(tags::http_route) == local_root.tags.end()); + + if (should_calculate_endpoint) { + Expected url_result = + HTTPClient::URL::parse(http_url_tag->second); + if (url_result.has_value()) { + const std::string& path = url_result->path; + local_root.tags[tags::http_endpoint] = + infer_endpoint(path.empty() ? "/" : path); + } + } +} } // namespace TraceSegment::TraceSegment( @@ -96,7 +125,9 @@ TraceSegment::TraceSegment( Optional sampling_decision, Optional additional_w3c_tracestate, Optional additional_datadog_w3c_tracestate, - std::unique_ptr local_root, bool apm_tracing_enabled) + std::unique_ptr local_root, + HttpEndpointCalculationMode resource_renaming_mode, + bool apm_tracing_enabled) : logger_(logger), collector_(collector), trace_sampler_(trace_sampler), @@ -114,6 +145,7 @@ TraceSegment::TraceSegment( additional_datadog_w3c_tracestate_( std::move(additional_datadog_w3c_tracestate)), config_manager_(config_manager), + resource_renaming_mode_(resource_renaming_mode), tracing_enabled_(apm_tracing_enabled) { assert(logger_); assert(collector_); @@ -228,7 +260,7 @@ void TraceSegment::span_finished() { } // RFC seems to only mandate that this be set if the trace is kept. - // However, system-tests expect this to be always be set. + // However, system-tests expect this to always be set. // Add it all the time; can't hurt if (!tracing_enabled_) { local_root.numeric_tags[tags::internal::apm_enabled] = 0; @@ -245,6 +277,8 @@ void TraceSegment::span_finished() { span.tags[tags::internal::runtime_id] = runtime_id_.string(); } + maybe_calculate_http_endpoint(resource_renaming_mode_, local_root); + if (config_manager_->report_traces()) { telemetry::distribution::add(metrics::tracer::trace_chunk_size, spans_.size()); diff --git a/src/datadog/tracer.cpp b/src/datadog/tracer.cpp index 8015ed72..55174b4e 100644 --- a/src/datadog/tracer.cpp +++ b/src/datadog/tracer.cpp @@ -59,7 +59,8 @@ Tracer::Tracer(const FinalizedTracerConfig& config, baggage_opts_(config.baggage_opts), baggage_injection_enabled_(false), baggage_extraction_enabled_(false), - tracing_enabled_(config.tracing_enabled) { + tracing_enabled_(config.tracing_enabled), + resource_renaming_mode_(config.resource_renaming_mode) { telemetry::init(config.telemetry, signature_, logger_, config.http_client, config.event_scheduler, config.agent_url); if (config.report_hostname) { @@ -197,7 +198,7 @@ Span Tracer::create_span(const SpanConfig& config) { nullopt /* origin */, tags_header_max_size_, std::move(trace_tags), nullopt /* sampling_decision */, nullopt /* additional_w3c_tracestate */, nullopt /* additional_datadog_w3c_tracestate*/, std::move(span_data), - tracing_enabled_); + resource_renaming_mode_, tracing_enabled_); Span span{span_data_ptr, segment, [generator = generator_]() { return generator->span_id(); }, clock_}; @@ -424,7 +425,7 @@ Expected Tracer::extract_span(const DictReader& reader, std::move(sampling_decision), std::move(merged_context.additional_w3c_tracestate), std::move(merged_context.additional_datadog_w3c_tracestate), - std::move(span_data), tracing_enabled_); + std::move(span_data), resource_renaming_mode_, tracing_enabled_); Span span{span_data_ptr, segment, [generator = generator_]() { return generator->span_id(); }, clock_}; diff --git a/src/datadog/tracer_config.cpp b/src/datadog/tracer_config.cpp index 1af94a05..852035a5 100644 --- a/src/datadog/tracer_config.cpp +++ b/src/datadog/tracer_config.cpp @@ -133,6 +133,16 @@ Expected load_tracer_env_config(Logger &logger) { env_cfg.tracing_enabled = !falsy(*apm_enabled_env); } + if (auto resource_renaming_enabled_env = + lookup(environment::DD_TRACE_RESOURCE_RENAMING_ENABLED)) { + env_cfg.resource_renaming_enabled = !falsy(*resource_renaming_enabled_env); + } + if (auto resource_renaming_always_simplified_endpoint_env = lookup( + environment::DD_TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT)) { + env_cfg.resource_renaming_always_simplified_endpoint = + !falsy(*resource_renaming_always_simplified_endpoint_env); + } + // Baggage if (auto baggage_items_env = lookup(environment::DD_TRACE_BAGGAGE_MAX_ITEMS)) { @@ -453,6 +463,40 @@ Expected finalize_config(const TracerConfig &user_config, ConfigMetadata(ConfigName::APM_TRACING_ENABLED, to_string(final_config.tracing_enabled), origin); + { + // Resource Renaming Enabled + bool resource_renaming_enabled; + std::tie(origin, resource_renaming_enabled) = + pick(env_config->resource_renaming_enabled, + user_config.resource_renaming_enabled, false); + + final_config.metadata[ConfigName::TRACE_RESOURCE_RENAMING_ENABLED] = + ConfigMetadata(ConfigName::TRACE_RESOURCE_RENAMING_ENABLED, + to_string(resource_renaming_enabled), origin); + + // Resource Renaming Always Simplified Endpoint + bool resource_renaming_always_simplified_endpoint; + std::tie(origin, resource_renaming_always_simplified_endpoint) = + pick(env_config->resource_renaming_always_simplified_endpoint, + user_config.resource_renaming_always_simplified_endpoint, false); + final_config.metadata + [ConfigName::TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT] = + ConfigMetadata( + ConfigName::TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT, + to_string(resource_renaming_always_simplified_endpoint), origin); + + if (!resource_renaming_enabled) { + final_config.resource_renaming_mode = + HttpEndpointCalculationMode::DISABLED; + } else if (resource_renaming_always_simplified_endpoint) { + final_config.resource_renaming_mode = + HttpEndpointCalculationMode::ALWAYS_CALCULATE; + } else { + final_config.resource_renaming_mode = + HttpEndpointCalculationMode::FALLBACK; + } + } + // Whether APM tracing is enabled. This affects whether the // "Datadog-Client-Computed-Stats: yes" header is sent with trace requests. if (!final_config.tracing_enabled) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c4754be1..7571aa8b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -39,6 +39,7 @@ add_executable(tests test_tracer_config.cpp test_tracer.cpp test_trace_sampler.cpp + test_endpoint_inferral.cpp remote_config/test_remote_config.cpp ) diff --git a/test/test_curl.cpp b/test/test_curl.cpp index 96ba1591..3c88ef88 100644 --- a/test/test_curl.cpp +++ b/test/test_curl.cpp @@ -187,7 +187,7 @@ CURL_TEST("parse response headers and body") { // verify that the received response headers are as expected. Optional post_error; std::exception_ptr exception; - const HTTPClient::URL url = {"http", "whatever", ""}; + const HTTPClient::URL url = {"http", "whatever", "", ""}; const auto result = client->post( url, ignore, "whatever", [&](int status, const DictReader &headers, std::string body) { @@ -236,7 +236,7 @@ CURL_TEST("bad multi-handle means error mode") { const auto client = std::make_shared(logger, clock, library); REQUIRE(logger->first_error().code == Error::CURL_HTTP_CLIENT_SETUP_FAILED); - const HTTPClient::URL url = {"http", "whatever", ""}; + const HTTPClient::URL url = {"http", "whatever", "", ""}; const auto dummy_deadline = clock().tick + std::chrono::seconds(10); const auto result = client->post(url, ignore, "dummy body", ignore, ignore, dummy_deadline); @@ -258,7 +258,7 @@ CURL_TEST("bad std::thread means error mode") { REQUIRE(logger->first_error().code == Error::CURL_HTTP_CLIENT_SETUP_FAILED); const auto dummy_deadline = clock().tick + std::chrono::seconds(10); - const HTTPClient::URL url = {"http", "whatever", ""}; + const HTTPClient::URL url = {"http", "whatever", "", ""}; const auto result = client->post(url, ignore, "dummy body", ignore, ignore, dummy_deadline); REQUIRE_FALSE(result); @@ -278,7 +278,7 @@ CURL_TEST("fail to allocate request handle") { MockCurlLibrary library; const auto client = std::make_shared(logger, clock, library); - const HTTPClient::URL url = {"http", "whatever", ""}; + const HTTPClient::URL url = {"http", "whatever", "", ""}; const auto dummy_deadline = clock().tick + std::chrono::seconds(10); const auto result = client->post(url, ignore, "dummy body", ignore, ignore, dummy_deadline); @@ -421,7 +421,7 @@ CURL_TEST("handles are always cleaned up") { SECTION("when the response is delivered") { Optional post_error; std::exception_ptr exception; - const HTTPClient::URL url = {"http", "whatever", ""}; + const HTTPClient::URL url = {"http", "whatever", "", ""}; const auto dummy_deadline = clock().tick + std::chrono::seconds(10); const auto result = client->post( url, ignore, "whatever", @@ -446,7 +446,7 @@ CURL_TEST("handles are always cleaned up") { SECTION("when an error occurs") { Optional post_error; - const HTTPClient::URL url = {"http", "whatever", ""}; + const HTTPClient::URL url = {"http", "whatever", "", ""}; const auto dummy_deadline = clock().tick + std::chrono::seconds(10); library.message_result_ = CURLE_COULDNT_CONNECT; // any error would do const auto result = client->post( @@ -459,7 +459,7 @@ CURL_TEST("handles are always cleaned up") { } SECTION("when we shut down while a request is in flight") { - const HTTPClient::URL url = {"http", "whatever", ""}; + const HTTPClient::URL url = {"http", "whatever", "", ""}; const auto dummy_deadline = clock().tick + std::chrono::seconds(10); library.delay_message_ = true; const auto result = @@ -479,7 +479,7 @@ CURL_TEST("post() deadline exceeded before request start") { const auto clock = default_clock; Curl client{std::make_shared(), clock}; - const HTTPClient::URL url = {"http", "whatever", ""}; + const HTTPClient::URL url = {"http", "whatever", "", ""}; const std::string body; const auto deadline = clock().tick - std::chrono::milliseconds(1); Optional error_delivered; diff --git a/test/test_endpoint_inferral.cpp b/test/test_endpoint_inferral.cpp new file mode 100644 index 00000000..1449cd87 --- /dev/null +++ b/test/test_endpoint_inferral.cpp @@ -0,0 +1,73 @@ +#include + +#include + +#include "test.h" + +using namespace datadog::tracing; + +#define TEST_ENDPOINT(x) TEST_CASE(x, "[endpoint_inferral]") + +TEST_ENDPOINT("invalid inputs and root") { + CHECK(infer_endpoint("") == "/"); + CHECK(infer_endpoint("abc") == "/"); + CHECK(infer_endpoint("/") == "/"); + CHECK(infer_endpoint("////") == "/"); +} + +TEST_ENDPOINT("skips empty components") { + CHECK(infer_endpoint("/a//b") == "/a/b"); + CHECK(infer_endpoint("/a/b/") == "/a/b/"); +} + +TEST_ENDPOINT("int and int_id replacement") { + CHECK(infer_endpoint("/users/12") == "/users/{param:int}"); + CHECK(infer_endpoint("/v1/0-1_2.3") == "/v1/{param:int_id}"); + CHECK(infer_endpoint("/x/09") == "/x/09"); // leading zero not int + CHECK(infer_endpoint("/1") == "/1"); // single digit not int/int_id +} + +TEST_ENDPOINT("hex and hex_id replacement") { + CHECK(infer_endpoint("/x/abcde9") == "/x/{param:hex}"); + CHECK(infer_endpoint("/x/ab_cd-9") == "/x/{param:hex_id}"); +} + +TEST_ENDPOINT("long sequences of more than 20 chars yield str") { + std::string longseg(20, 'a'); + std::string path = std::string("/x/") + longseg; + CHECK(infer_endpoint(path) == "/x/{param:str}"); +} + +TEST_ENDPOINT("other specials yield str") { + const char specials[] = {'%', '&', '\'', '(', ')', '*', + '+', ',', ':', '=', '@'}; + for (char c : specials) { + std::string s = "/x/a"; + s.push_back(c); + s.push_back('b'); + CHECK(infer_endpoint(s) == "/x/{param:str}"); + } +} + +TEST_ENDPOINT("max components limit") { + const char* expected = + "/{param:int}/{param:int}/{param:int}/{param:int}/{param:int}/" + "{param:int}/{param:int}/{param:int}/"; + CHECK(infer_endpoint("/11/22/33/44/55/66/77/88/99/12") == expected); +} + +TEST_ENDPOINT("minimum length boundaries") { + // int_id requires length ≥ 3 + CHECK(infer_endpoint("/x/0-") == "/x/0-"); + CHECK(infer_endpoint("/x/0__") == "/x/{param:int_id}"); + + // hex requires length ≥ 6 + CHECK(infer_endpoint("/x/abcd9") == "/x/abcd9"); + + // hex_id requires length ≥ 6 + CHECK(infer_endpoint("/x/ab_c9") == "/x/ab_c9"); + CHECK(infer_endpoint("/x/ab_cd9") == "/x/{param:hex_id}"); + + // str requires length ≥ 20 (when no special characters) + CHECK(infer_endpoint("/x/aaaaaaaaaaaaaaaaaaa") == "/x/aaaaaaaaaaaaaaaaaaa"); +} diff --git a/test/test_trace_segment.cpp b/test/test_trace_segment.cpp index bf7e6108..94d167e6 100644 --- a/test/test_trace_segment.cpp +++ b/test/test_trace_segment.cpp @@ -492,3 +492,111 @@ TEST_CASE("independent of Tracer") { tracer.reset(); } + +TEST_CASE("http.endpoint population") { + TracerConfig config; + config.service = "testsvc"; + const auto collector = std::make_shared(); + config.collector = collector; + config.logger = std::make_shared(); + + SECTION("DISABLED -> never adds http.endpoint") { + // default is disabled + auto finalized = finalize_config(config); + REQUIRE(finalized); + Tracer tracer{*finalized}; + { + auto span = tracer.create_span(); + span.set_tag(tags::http_url, "http://example.com/users/12?x=y"); + } + REQUIRE(collector->span_count() == 1); + const auto& span = collector->first_span(); + REQUIRE(span.tags.count(tags::http_endpoint) == 0); + } + + SECTION("FALLBACK mode -> adds only when http.route is absent") { + config.resource_renaming_enabled = {true}; + config.resource_renaming_always_simplified_endpoint = {false}; + auto finalized = finalize_config(config); + REQUIRE(finalized); + REQUIRE(finalized->resource_renaming_mode == + HttpEndpointCalculationMode::FALLBACK); + + SECTION("route absent -> endpoint added from url path") { + Tracer tracer{*finalized}; + { + auto span = tracer.create_span(); + span.set_tag(tags::http_url, "http://example.com/users/12?x=y"); + } + REQUIRE(collector->span_count() == 1); + const auto& span = collector->first_span(); + REQUIRE(span.tags.count(tags::http_endpoint) == 1); + CHECK(span.tags.at(tags::http_endpoint) == "/users/{param:int}"); + } + + SECTION("route present -> endpoint not added") { + collector->chunks.clear(); + Tracer tracer{*finalized}; + { + auto span = tracer.create_span(); + span.set_tag(tags::http_url, "http://example.com/users/12"); + span.set_tag(tags::http_route, "/users/:id"); + } + REQUIRE(collector->span_count() == 1); + const auto& span = collector->first_span(); + REQUIRE(span.tags.count(tags::http_endpoint) == 0); + } + } + + SECTION("ALWAYS_CALCULATE -> adds even when http.route is present") { + config.resource_renaming_enabled = {true}; + config.resource_renaming_always_simplified_endpoint = {true}; + auto finalized = finalize_config(config); + REQUIRE(finalized); + REQUIRE(finalized->resource_renaming_mode == + HttpEndpointCalculationMode::ALWAYS_CALCULATE); + + Tracer tracer{*finalized}; + { + auto span = tracer.create_span(); + span.set_tag(tags::http_url, "http://example.com/notes/99"); + span.set_tag(tags::http_route, "/notes/:id"); + } + REQUIRE(collector->span_count() == 1); + const auto& span = collector->first_span(); + REQUIRE(span.tags.count(tags::http_endpoint) == 1); + CHECK(span.tags.at(tags::http_endpoint) == "/notes/{param:int}"); + } + + SECTION("http.url absent -> never adds") { + config.resource_renaming_enabled = {true}; + config.resource_renaming_always_simplified_endpoint = {true}; + auto finalized = finalize_config(config); + REQUIRE(finalized); + Tracer tracer{*finalized}; + { + auto span = tracer.create_span(); + // no http.url + } + REQUIRE(collector->span_count() == 1); + const auto& span = collector->first_span(); + REQUIRE(span.tags.count(tags::http_endpoint) == 0); + } + + SECTION("pre-existing http.endpoint is preserved") { + config.resource_renaming_enabled = {true}; + config.resource_renaming_always_simplified_endpoint = {true}; + auto finalized = finalize_config(config); + REQUIRE(finalized); + Tracer tracer{*finalized}; + { + auto span = tracer.create_span(); + span.set_tag(tags::http_url, "http://example.com/widgets/123"); + span.set_tag(tags::http_endpoint, "/pre/set"); + } + REQUIRE(collector->span_count() == 1); + const auto& span = collector->first_span(); + REQUIRE(span.tags.count(tags::http_endpoint) == 1); + CHECK(span.tags.at(tags::http_endpoint) == "/pre/set"); + } +}