diff --git a/manifests/cpp_httpd.yml b/manifests/cpp_httpd.yml index de031f5373d..6a76f408d13 100644 --- a/manifests/cpp_httpd.yml +++ b/manifests/cpp_httpd.yml @@ -33,6 +33,7 @@ manifest: tests/debugger/test_debugger_probe_snapshot.py::Test_Debugger_Line_Probe_Snaphots::test_process_tags_snapshot_svc: missing_feature (Not yet implemented) tests/ffe/test_dynamic_evaluation.py: missing_feature tests/ffe/test_exposures.py: missing_feature + tests/ffe/test_flag_eval_metrics.py: missing_feature tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_error: bug (MLOB-1234) tests/integrations/crossed_integrations/: missing_feature (Endpoint not implemented) tests/integrations/crossed_integrations/test_sqs.py::Test_SQS_PROPAGATION_VIA_AWS_XRAY_HEADERS: irrelevant (Localstack SQS does not support AWS Xray Header parsing) diff --git a/manifests/cpp_kong.yml b/manifests/cpp_kong.yml index d3accd80733..07c1d7bdfef 100644 --- a/manifests/cpp_kong.yml +++ b/manifests/cpp_kong.yml @@ -8,6 +8,7 @@ manifest: tests/appsec/: irrelevant (ASM is not implemented in Kong plugin) tests/debugger/: irrelevant tests/ffe/: missing_feature + tests/ffe/test_flag_eval_metrics.py: missing_feature tests/integrations/: missing_feature (Endpoints not implemented) tests/otel/: irrelevant (library does not implement OpenTelemetry) tests/parametric/: irrelevant (Parametric scenario is not applied on Kong) diff --git a/manifests/cpp_nginx.yml b/manifests/cpp_nginx.yml index cfa9595418d..5e7df1d66f7 100644 --- a/manifests/cpp_nginx.yml +++ b/manifests/cpp_nginx.yml @@ -234,6 +234,7 @@ manifest: tests/docker_ssi/test_docker_ssi_appsec.py::TestDockerSSIAppsecFeatures::test_telemetry_source_ssi: missing_feature tests/ffe/test_dynamic_evaluation.py: missing_feature tests/ffe/test_exposures.py: missing_feature + tests/ffe/test_flag_eval_metrics.py: missing_feature tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_error: bug (MLOB-1234) tests/integrations/crossed_integrations/test_kafka.py::Test_Kafka: missing_feature tests/integrations/crossed_integrations/test_kinesis.py::Test_Kinesis_PROPAGATION_VIA_MESSAGE_ATTRIBUTES: missing_feature diff --git a/manifests/dotnet.yml b/manifests/dotnet.yml index b2ffb0082cf..950a196186e 100644 --- a/manifests/dotnet.yml +++ b/manifests/dotnet.yml @@ -690,6 +690,7 @@ manifest: tests/docker_ssi/test_docker_ssi_appsec.py::TestDockerSSIAppsecFeatures::test_telemetry_source_ssi: v3.36.0 tests/ffe/test_dynamic_evaluation.py: v3.36.0 tests/ffe/test_exposures.py: v3.36.0 + tests/ffe/test_flag_eval_metrics.py: missing_feature tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_error: bug (MLOB-1234) tests/integrations/crossed_integrations/test_kafka.py::Test_Kafka: v2.0.0-prerelease tests/integrations/crossed_integrations/test_kinesis.py::Test_Kinesis_PROPAGATION_VIA_MESSAGE_ATTRIBUTES: missing_feature diff --git a/manifests/golang.yml b/manifests/golang.yml index bc0e4572f5e..8e8fa4dff97 100644 --- a/manifests/golang.yml +++ b/manifests/golang.yml @@ -819,6 +819,7 @@ manifest: tests/ffe/test_dynamic_evaluation.py::Test_FFE_RC_Down_From_Start: v2.4.0 tests/ffe/test_dynamic_evaluation.py::Test_FFE_RC_Unavailable: v2.4.0 tests/ffe/test_exposures.py: v2.6.0-dev # Easy win for chi, echo, gin, net-http, net-http-orchestrion, uds-echo and version 2.5.0 + tests/ffe/test_flag_eval_metrics.py: v2.7.0-dev tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_error: bug (MLOB-1234) tests/integrations/crossed_integrations/test_kafka.py::Test_Kafka: - weblog_declaration: diff --git a/manifests/java.yml b/manifests/java.yml index 54717ba2901..3dcaffa9558 100644 --- a/manifests/java.yml +++ b/manifests/java.yml @@ -3062,6 +3062,7 @@ manifest: "*": irrelevant spring-boot: v1.56.0 tests/ffe/test_exposures.py::Test_FFE_EXP_5_Missing_Targeting_Key: bug (FFL-1729) + tests/ffe/test_flag_eval_metrics.py: missing_feature tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_error: bug (MLOB-1234) tests/integrations/crossed_integrations/test_kafka.py::Test_Kafka: - weblog_declaration: diff --git a/manifests/nodejs.yml b/manifests/nodejs.yml index e6fddee1767..b69f959d2e8 100644 --- a/manifests/nodejs.yml +++ b/manifests/nodejs.yml @@ -1585,6 +1585,7 @@ manifest: "*": incomplete_test_app express4: *ref_5_77_0 tests/ffe/test_exposures.py::Test_FFE_EXP_5_Missing_Targeting_Key: bug (FFL-1730) + tests/ffe/test_flag_eval_metrics.py: missing_feature tests/integration_frameworks/llm/anthropic/test_anthropic_apm.py::TestAnthropicApmMessages: *ref_5_71_0 tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages: *ref_5_71_0 tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_error: bug (MLOB-1234) diff --git a/manifests/php.yml b/manifests/php.yml index 88158b954eb..0ab061c603a 100644 --- a/manifests/php.yml +++ b/manifests/php.yml @@ -550,6 +550,7 @@ manifest: tests/docker_ssi/test_docker_ssi_crash.py::TestDockerSSICrash::test_crash: missing_feature (No implemented the endpoint /crashme) tests/ffe/test_dynamic_evaluation.py: missing_feature tests/ffe/test_exposures.py: missing_feature + tests/ffe/test_flag_eval_metrics.py: missing_feature tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_error: bug (MLOB-1234) tests/integrations/crossed_integrations/test_kafka.py::Test_Kafka: missing_feature tests/integrations/crossed_integrations/test_kinesis.py::Test_Kinesis_PROPAGATION_VIA_MESSAGE_ATTRIBUTES: missing_feature diff --git a/manifests/python.yml b/manifests/python.yml index 1463b4b7d29..775bb6a74de 100644 --- a/manifests/python.yml +++ b/manifests/python.yml @@ -1078,6 +1078,7 @@ manifest: tests/ffe/test_dynamic_evaluation.py::Test_FFE_RC_Down_From_Start: v4.0.0 tests/ffe/test_dynamic_evaluation.py::Test_FFE_RC_Unavailable: flaky (FFL-1622) tests/ffe/test_exposures.py: v4.2.0-dev + tests/ffe/test_flag_eval_metrics.py: missing_feature tests/integration_frameworks/llm/anthropic/test_anthropic_apm.py::TestAnthropicApmMessages: v3.16.0 tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages: v3.16.0 tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create: missing_feature # ephemeral cache TTL metrics not yet released diff --git a/manifests/ruby.yml b/manifests/ruby.yml index ff685b1d55b..3aae1c8766b 100644 --- a/manifests/ruby.yml +++ b/manifests/ruby.yml @@ -1093,6 +1093,7 @@ manifest: - weblog_declaration: "*": irrelevant rails72: v2.23.0-dev + tests/ffe/test_flag_eval_metrics.py: missing_feature tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_error: bug (MLOB-1234) tests/integrations/crossed_integrations/test_kafka.py::Test_Kafka: - weblog_declaration: diff --git a/manifests/rust.yml b/manifests/rust.yml index d64f69ad8b9..cdb320e3558 100644 --- a/manifests/rust.yml +++ b/manifests/rust.yml @@ -21,6 +21,7 @@ manifest: tests/docker_ssi/test_docker_ssi_appsec.py::TestDockerSSIAppsecFeatures::test_telemetry_source_ssi: missing_feature tests/ffe/test_dynamic_evaluation.py: missing_feature tests/ffe/test_exposures.py: missing_feature + tests/ffe/test_flag_eval_metrics.py: missing_feature tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create_error: bug (MLOB-1234) tests/integrations/crossed_integrations/test_sqs.py::Test_SQS_PROPAGATION_VIA_AWS_XRAY_HEADERS: irrelevant (Localstack SQS does not support AWS Xray Header parsing) tests/integrations/test_cassandra.py::Test_Cassandra: missing_feature (Endpoint is not implemented on weblog) diff --git a/tests/ffe/test_flag_eval_metrics.py b/tests/ffe/test_flag_eval_metrics.py new file mode 100644 index 00000000000..c0116d8564a --- /dev/null +++ b/tests/ffe/test_flag_eval_metrics.py @@ -0,0 +1,363 @@ +"""Test feature flag evaluation metrics via OTel Metrics API.""" + +from utils import ( + weblog, + interfaces, + scenarios, + features, + remote_config as rc, +) + + +RC_PRODUCT = "FFE_FLAGS" +RC_PATH = f"datadog/2/{RC_PRODUCT}" + + +def make_ufc_fixture(flag_key: str, variant_key: str = "on", variation_type: str = "STRING", *, enabled: bool = True): + """Create a UFC fixture with the given flag configuration.""" + values: dict[str, dict[str, str | bool]] = { + "STRING": {"on": "on-value", "off": "off-value"}, + "BOOLEAN": {"on": True, "off": False}, + } + var_values = values[variation_type] + + return { + "createdAt": "2024-04-17T19:40:53.716Z", + "format": "SERVER", + "environment": {"name": "Test"}, + "flags": { + flag_key: { + "key": flag_key, + "enabled": enabled, + "variationType": variation_type, + "variations": { + "on": {"key": "on", "value": var_values["on"]}, + "off": {"key": "off", "value": var_values["off"]}, + }, + "allocations": [ + { + "key": "default-allocation", + "rules": [], + "splits": [{"variationKey": variant_key, "shards": []}], + "doLog": True, + } + ], + } + }, + } + + +def find_eval_metrics(flag_key: str | None = None): + """Find feature_flag.evaluations metrics in agent data. + + Returns a list of metric points matching the metric name, optionally filtered by flag key tag. + """ + results = [] + for _, point in interfaces.agent.get_metrics(): + if point.get("metric") != "feature_flag.evaluations": + continue + + tags = point.get("tags", []) + if flag_key is not None: + tag_match = any(t == f"feature_flag.key:{flag_key}" for t in tags) + if not tag_match: + continue + + results.append(point) + return results + + +def get_tag_value(tags: list[str], key: str): + """Extract a tag value from a list of 'key:value' strings.""" + prefix = f"{key}:" + for tag in tags: + if tag.startswith(prefix): + return tag[len(prefix) :] + return None + + +@scenarios.feature_flagging_and_experimentation +@features.feature_flags_exposures +class Test_FFE_Eval_Metric_Basic: + """Test that a flag evaluation produces a feature_flag.evaluations metric.""" + + def setup_ffe_eval_metric_basic(self): + rc.tracer_rc_state.reset().apply() + + config_id = "ffe-eval-metric-basic" + self.flag_key = "eval-metric-basic-flag" + rc.tracer_rc_state.set_config(f"{RC_PATH}/{config_id}/config", make_ufc_fixture(self.flag_key)).apply() + + self.r = weblog.post( + "/ffe", + json={ + "flag": self.flag_key, + "variationType": "STRING", + "defaultValue": "default", + "targetingKey": "user-1", + "attributes": {}, + }, + ) + + def test_ffe_eval_metric_basic(self): + """Test that flag evaluation produces a metric with correct tags.""" + assert self.r.status_code == 200, f"Flag evaluation failed: {self.r.text}" + + metrics = find_eval_metrics(self.flag_key) + assert len(metrics) > 0, ( + f"Expected at least one feature_flag.evaluations metric for flag '{self.flag_key}', " + f"but found none. All eval metrics: {find_eval_metrics()}" + ) + + # Verify tags on the first matching metric point + point = metrics[0] + tags = point.get("tags", []) + + assert get_tag_value(tags, "feature_flag.key") == self.flag_key, ( + f"Expected tag feature_flag.key:{self.flag_key}, got tags: {tags}" + ) + assert get_tag_value(tags, "feature_flag.result.variant") == "on", ( + f"Expected tag feature_flag.result.variant:on, got tags: {tags}" + ) + assert get_tag_value(tags, "feature_flag.result.reason") == "static", ( + f"Expected tag feature_flag.result.reason:static, got tags: {tags}" + ) + assert get_tag_value(tags, "feature_flag.result.allocation_key") == "default-allocation", ( + f"Expected tag feature_flag.result.allocation_key:default-allocation, got tags: {tags}" + ) + + +@scenarios.feature_flagging_and_experimentation +@features.feature_flags_exposures +class Test_FFE_Eval_Metric_Count: + """Test that multiple evaluations of the same flag produce correct metric count.""" + + def setup_ffe_eval_metric_count(self): + rc.tracer_rc_state.reset().apply() + + config_id = "ffe-eval-metric-count" + self.flag_key = "eval-metric-count-flag" + rc.tracer_rc_state.set_config(f"{RC_PATH}/{config_id}/config", make_ufc_fixture(self.flag_key)).apply() + + self.eval_count = 5 + self.responses = [] + for _ in range(self.eval_count): + r = weblog.post( + "/ffe", + json={ + "flag": self.flag_key, + "variationType": "STRING", + "defaultValue": "default", + "targetingKey": "user-1", + "attributes": {}, + }, + ) + self.responses.append(r) + + def test_ffe_eval_metric_count(self): + """Test that N evaluations produce metric count = N.""" + for i, r in enumerate(self.responses): + assert r.status_code == 200, f"Request {i + 1} failed: {r.text}" + + metrics = find_eval_metrics(self.flag_key) + assert len(metrics) > 0, ( + f"Expected at least one feature_flag.evaluations metric for flag '{self.flag_key}', but found none." + ) + + # Sum all data points for this flag (agent may split across multiple series entries) + total_count = 0 + for point in metrics: + points = point.get("points", []) + for p in points: + # points format: {"value": N, "timestamp": "..."} (v2 series API) + if isinstance(p, dict): + total_count += p.get("value", 0) + elif isinstance(p, list) and len(p) >= 2: + total_count += p[1] + + assert total_count >= self.eval_count, f"Expected metric count >= {self.eval_count}, got {total_count}" + + +@scenarios.feature_flagging_and_experimentation +@features.feature_flags_exposures +class Test_FFE_Eval_Metric_Different_Flags: + """Test that different flags produce separate metric series.""" + + def setup_ffe_eval_metric_different_flags(self): + rc.tracer_rc_state.reset().apply() + + config_id = "ffe-eval-metric-diff" + self.flag_a = "eval-metric-flag-a" + self.flag_b = "eval-metric-flag-b" + + # Create config with both flags + fixture = { + "createdAt": "2024-04-17T19:40:53.716Z", + "format": "SERVER", + "environment": {"name": "Test"}, + "flags": { + self.flag_a: { + "key": self.flag_a, + "enabled": True, + "variationType": "STRING", + "variations": { + "on": {"key": "on", "value": "on-value"}, + "off": {"key": "off", "value": "off-value"}, + }, + "allocations": [ + { + "key": "default-allocation", + "rules": [], + "splits": [{"variationKey": "on", "shards": []}], + "doLog": True, + } + ], + }, + self.flag_b: { + "key": self.flag_b, + "enabled": True, + "variationType": "STRING", + "variations": { + "on": {"key": "on", "value": "on-value"}, + "off": {"key": "off", "value": "off-value"}, + }, + "allocations": [ + { + "key": "default-allocation", + "rules": [], + "splits": [{"variationKey": "on", "shards": []}], + "doLog": True, + } + ], + }, + }, + } + rc.tracer_rc_state.set_config(f"{RC_PATH}/{config_id}/config", fixture).apply() + + self.r_a = weblog.post( + "/ffe", + json={ + "flag": self.flag_a, + "variationType": "STRING", + "defaultValue": "default", + "targetingKey": "user-1", + "attributes": {}, + }, + ) + self.r_b = weblog.post( + "/ffe", + json={ + "flag": self.flag_b, + "variationType": "STRING", + "defaultValue": "default", + "targetingKey": "user-1", + "attributes": {}, + }, + ) + + def test_ffe_eval_metric_different_flags(self): + """Test that each flag key gets its own metric series.""" + assert self.r_a.status_code == 200, f"Flag A evaluation failed: {self.r_a.text}" + assert self.r_b.status_code == 200, f"Flag B evaluation failed: {self.r_b.text}" + + metrics_a = find_eval_metrics(self.flag_a) + metrics_b = find_eval_metrics(self.flag_b) + + assert len(metrics_a) > 0, f"Expected metric for flag '{self.flag_a}', found none. All: {find_eval_metrics()}" + assert len(metrics_b) > 0, f"Expected metric for flag '{self.flag_b}', found none. All: {find_eval_metrics()}" + + +@scenarios.feature_flagging_and_experimentation +@features.feature_flags_exposures +class Test_FFE_Eval_Metric_Error: + """Test that evaluating a non-existent flag produces metric with error tags.""" + + def setup_ffe_eval_metric_error(self): + rc.tracer_rc_state.reset().apply() + + # Set up config with a different flag than what we'll request + config_id = "ffe-eval-metric-error" + rc.tracer_rc_state.set_config(f"{RC_PATH}/{config_id}/config", make_ufc_fixture("some-other-flag")).apply() + + self.flag_key = "non-existent-eval-metric-flag" + self.r = weblog.post( + "/ffe", + json={ + "flag": self.flag_key, + "variationType": "STRING", + "defaultValue": "default", + "targetingKey": "user-1", + "attributes": {}, + }, + ) + + def test_ffe_eval_metric_error(self): + """Test that error evaluations produce metric with error.type tag.""" + assert self.r.status_code == 200, f"Flag evaluation request failed: {self.r.text}" + + metrics = find_eval_metrics(self.flag_key) + assert len(metrics) > 0, ( + f"Expected metric for non-existent flag '{self.flag_key}', found none. All: {find_eval_metrics()}" + ) + + point = metrics[0] + tags = point.get("tags", []) + + assert get_tag_value(tags, "feature_flag.result.reason") == "error", ( + f"Expected reason 'error', got tags: {tags}" + ) + assert get_tag_value(tags, "error.type") == "flag_not_found", ( + f"Expected error.type 'flag_not_found', got tags: {tags}" + ) + + +@scenarios.feature_flagging_and_experimentation +@features.feature_flags_exposures +class Test_FFE_Eval_Metric_Type_Mismatch: + """Test that requesting the wrong type produces a metric with type_mismatch error. + + This configures a STRING flag but evaluates it as BOOLEAN. The type + conversion error happens *after* the core evaluate() returns, inside the + type-specific method (BooleanEvaluation). Recording metrics via a + Finally hook catches this; the old evaluate()-level defer would have + recorded a success (targeting_match) instead. + """ + + def setup_ffe_eval_metric_type_mismatch(self): + rc.tracer_rc_state.reset().apply() + + config_id = "ffe-eval-metric-type-mismatch" + self.flag_key = "eval-metric-type-mismatch-flag" + # Flag is configured as STRING + rc.tracer_rc_state.set_config( + f"{RC_PATH}/{config_id}/config", make_ufc_fixture(self.flag_key, variation_type="STRING") + ).apply() + + # But we evaluate it as BOOLEAN → type mismatch + self.r = weblog.post( + "/ffe", + json={ + "flag": self.flag_key, + "variationType": "BOOLEAN", + "defaultValue": False, + "targetingKey": "user-1", + "attributes": {}, + }, + ) + + def test_ffe_eval_metric_type_mismatch(self): + """Test that type conversion errors produce metric with error.type:type_mismatch.""" + assert self.r.status_code == 200, f"Flag evaluation request failed: {self.r.text}" + + metrics = find_eval_metrics(self.flag_key) + assert len(metrics) > 0, f"Expected metric for flag '{self.flag_key}', found none. All: {find_eval_metrics()}" + + point = metrics[0] + tags = point.get("tags", []) + + assert get_tag_value(tags, "feature_flag.result.reason") == "error", ( + f"Expected reason 'error' for type mismatch, got tags: {tags}" + ) + assert get_tag_value(tags, "error.type") == "type_mismatch", ( + f"Expected error.type 'type_mismatch', got tags: {tags}" + ) diff --git a/utils/_context/_scenarios/__init__.py b/utils/_context/_scenarios/__init__.py index 048384bf5d1..d53cd5fb2dd 100644 --- a/utils/_context/_scenarios/__init__.py +++ b/utils/_context/_scenarios/__init__.py @@ -542,7 +542,10 @@ class _Scenarios: weblog_env={ "DD_EXPERIMENTAL_FLAGGING_PROVIDER_ENABLED": "true", "DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS": "0.2", + "DD_METRICS_OTEL_ENABLED": "true", + "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT": "http://agent:4318/v1/metrics", }, + agent_interface_timeout=30, doc="", scenario_groups=[scenario_groups.ffe], ) diff --git a/utils/build/docker/golang/app/_shared/common/ffe.go b/utils/build/docker/golang/app/_shared/common/ffe.go index 07f08a11994..929d0bd7eaa 100644 --- a/utils/build/docker/golang/app/_shared/common/ffe.go +++ b/utils/build/docker/golang/app/_shared/common/ffe.go @@ -33,7 +33,26 @@ func FFeEval() func(writer http.ResponseWriter, request *http.Request) { return } - val := ofClient.Object(request.Context(), body.Flag, body.DefaultValue, of.NewEvaluationContext(body.TargetingKey, body.Attributes)) + ctx := request.Context() + evalCtx := of.NewEvaluationContext(body.TargetingKey, body.Attributes) + + var val any + switch body.VariationType { + case "BOOLEAN": + defBool, _ := body.DefaultValue.(bool) + val, _ = ofClient.BooleanValue(ctx, body.Flag, defBool, evalCtx) + case "STRING": + defStr, _ := body.DefaultValue.(string) + val, _ = ofClient.StringValue(ctx, body.Flag, defStr, evalCtx) + case "INTEGER": + defFloat, _ := body.DefaultValue.(float64) + val, _ = ofClient.IntValue(ctx, body.Flag, int64(defFloat), evalCtx) + case "NUMERIC": + defFloat, _ := body.DefaultValue.(float64) + val, _ = ofClient.FloatValue(ctx, body.Flag, defFloat, evalCtx) + default: + val = ofClient.Object(ctx, body.Flag, body.DefaultValue, evalCtx) + } writer.WriteHeader(http.StatusOK)