From f29acd144bdee2e2d65f191d12b586c4a0dd0831 Mon Sep 17 00:00:00 2001 From: Leo Romanovsky Date: Wed, 25 Feb 2026 11:43:27 +0100 Subject: [PATCH 1/6] fix(openfeature): block initialize() until RC config arrives DataDogProvider.initialize() now blocks until Remote Config delivers the first FFE configuration or a configurable timeout expires (default 30s). This matches the behavior of Java (CountDownLatch), Go (sync.Cond), and Node.js (Promise) providers. Previously, initialize() returned immediately without config, causing the OpenFeature SDK to emit PROVIDER_READY prematurely. Flag evaluations in this window silently returned defaults. Fixes: FFL-1843 --- ddtrace/internal/openfeature/_provider.py | 62 +++++++++++++++++------ ddtrace/internal/settings/openfeature.py | 10 ++++ 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/ddtrace/internal/openfeature/_provider.py b/ddtrace/internal/openfeature/_provider.py index 9245efc5dc8..6bc68b2ffda 100644 --- a/ddtrace/internal/openfeature/_provider.py +++ b/ddtrace/internal/openfeature/_provider.py @@ -8,6 +8,7 @@ from collections import OrderedDict from collections.abc import MutableMapping from importlib.metadata import version +import threading import typing from openfeature.evaluation_context import EvaluationContext @@ -87,12 +88,21 @@ class DataDogProvider(AbstractProvider): Feature Flags and Experimentation (FFE) product. """ - def __init__(self, *args: typing.Any, **kwargs: typing.Any): + def __init__(self, *args: typing.Any, init_timeout: typing.Optional[float] = None, **kwargs: typing.Any): super().__init__(*args, **kwargs) self._metadata = Metadata(name="Datadog") self._status = ProviderStatus.NOT_READY self._config_received = False + # Init timeout: constructor arg takes priority, then env var (default 30s) + if init_timeout is not None: + self._init_timeout = init_timeout + else: + self._init_timeout = ffe_config.initialization_timeout_ms / 1000.0 + + # Event used to block initialize() until config arrives + self._config_event = threading.Event() + # Cache for reported exposures to prevent duplicates # Stores mapping of (flag_key, subject_id) -> (allocation_key, variant_key) # Using LRU cache with maxsize of 65536 to prevent unbounded memory growth @@ -119,16 +129,17 @@ def initialize(self, evaluation_context: EvaluationContext) -> None: """ Initialize the provider. - Called by the OpenFeature SDK when the provider is set. - Provider Creation → NOT_READY - ↓ - First Remote Config Payload - ↓ - READY (emits PROVIDER_READY event) - ↓ - Shutdown - ↓ - NOT_READY + Blocks until Remote Config delivers the first FFE configuration or + the initialization timeout expires. This matches the behavior of the + Java (CountDownLatch), Go (sync.Cond), and Node.js (Promise) providers. + + The timeout is configurable via: + - Constructor: DataDogProvider(init_timeout=10.0) # seconds + - Env var: DD_EXPERIMENTAL_FLAGGING_PROVIDER_INITIALIZATION_TIMEOUT_MS=10000 + + Provider lifecycle: + NOT_READY → initialize() blocks → config arrives → READY + NOT_READY → initialize() blocks → timeout → raises ProviderNotReadyError """ if not self._enabled: return @@ -139,12 +150,27 @@ def initialize(self, evaluation_context: EvaluationContext) -> None: except ServiceStatusError: logger.debug("Exposure writer is already running", exc_info=True) - # If configuration was already received before initialization, emit ready now + # Fast path: config already available (RC delivered before set_provider) config = _get_ffe_config() - if config is not None and not self._config_received: + if config is not None: self._config_received = True self._status = ProviderStatus.READY - self._emit_ready_event() + return # SDK will dispatch PROVIDER_READY + + # Block until config arrives or timeout expires + logger.debug("Waiting up to %.1fs for initial FFE configuration from Remote Config", self._init_timeout) + if not self._config_event.wait(timeout=self._init_timeout): + # Timeout expired without receiving config + from openfeature.exception import ProviderNotReadyError + + raise ProviderNotReadyError( + f"Provider timed out after {self._init_timeout:.1f}s waiting for " + "initial configuration from Remote Config" + ) + + # Config received during wait + self._config_received = True + self._status = ProviderStatus.READY def shutdown(self) -> None: """ @@ -168,6 +194,7 @@ def shutdown(self) -> None: _unregister_provider(self) self._status = ProviderStatus.NOT_READY self._config_received = False + self._config_event.clear() def resolve_boolean_details( self, @@ -423,12 +450,17 @@ def on_configuration_received(self) -> None: """ Called when a Remote Configuration payload is received and processed. - Emits PROVIDER_READY event on first configuration. + Unblocks initialize() if it's waiting, and emits PROVIDER_READY for + late arrivals (config received after initialize() timed out). """ + # Always signal the event to unblock initialize() if it's waiting + self._config_event.set() + if not self._config_received: self._config_received = True self._status = ProviderStatus.READY logger.debug("First FFE configuration received, provider is now READY") + # Emit READY for late recovery: config arrived after init timed out self._emit_ready_event() def _emit_ready_event(self) -> None: diff --git a/ddtrace/internal/settings/openfeature.py b/ddtrace/internal/settings/openfeature.py index 5149bcee322..ada8dd42fa1 100644 --- a/ddtrace/internal/settings/openfeature.py +++ b/ddtrace/internal/settings/openfeature.py @@ -30,10 +30,20 @@ class OpenFeatureConfig(DDConfig): default=1.0, ) + # Provider initialization timeout in milliseconds. + # Controls how long initialize() blocks waiting for the first Remote Config payload. + # Default is 30000ms (30 seconds), matching Java, Go, and Node.js SDKs. + initialization_timeout_ms = DDConfig.var( + int, + "DD_EXPERIMENTAL_FLAGGING_PROVIDER_INITIALIZATION_TIMEOUT_MS", + default=30000, + ) + _openfeature_config_keys = [ "experimental_flagging_provider_enabled", "ffe_intake_enabled", "ffe_intake_heartbeat_interval", + "initialization_timeout_ms", ] From 60001256d6f128dd2fc58582d1a7496b7ea4d3ba Mon Sep 17 00:00:00 2001 From: Leo Romanovsky Date: Wed, 25 Feb 2026 15:49:12 +0100 Subject: [PATCH 2/6] chore: add release note for openfeature init blocking fix --- ...fix-openfeature-init-blocking-70c8d5a99287cc49.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 releasenotes/notes/fix-openfeature-init-blocking-70c8d5a99287cc49.yaml diff --git a/releasenotes/notes/fix-openfeature-init-blocking-70c8d5a99287cc49.yaml b/releasenotes/notes/fix-openfeature-init-blocking-70c8d5a99287cc49.yaml new file mode 100644 index 00000000000..1036a173a79 --- /dev/null +++ b/releasenotes/notes/fix-openfeature-init-blocking-70c8d5a99287cc49.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + openfeature: This fix resolves an issue where ``DataDogProvider.initialize()`` returned immediately + without waiting for Remote Configuration data, causing the OpenFeature SDK to emit ``PROVIDER_READY`` + before flag configuration was available. Flag evaluations in this window silently returned default + values. The provider now blocks in ``initialize()`` until the first configuration arrives or a + configurable timeout expires (default 30s), matching the behavior of the Java, Go, and Node.js + providers. The timeout is configurable via the ``DD_EXPERIMENTAL_FLAGGING_PROVIDER_INITIALIZATION_TIMEOUT_MS`` + environment variable or the ``init_timeout`` constructor parameter. From e7306baff0af73498a03f6595287d31d18c58fe8 Mon Sep 17 00:00:00 2001 From: Leo Romanovsky Date: Wed, 25 Feb 2026 17:19:50 +0100 Subject: [PATCH 3/6] fix(openfeature): address review feedback - Rename init_timeout to initialization_timeout (Oleksii #1) - Eliminate _config_received bool, use threading.Event directly (Oleksii #2) - Remove Java/Go/Node.js references from docstring (Oleksii #3, Tyler #1) - Add debug log on fast-path when config already exists (Oleksii #4) - Remove redundant state updates after wait (Oleksii #5) - Reorder on_configuration_received: set status before signaling event (Oleksii #6) --- ddtrace/internal/openfeature/_provider.py | 56 +++++++++++------------ 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/ddtrace/internal/openfeature/_provider.py b/ddtrace/internal/openfeature/_provider.py index 6bc68b2ffda..8754c0f3e45 100644 --- a/ddtrace/internal/openfeature/_provider.py +++ b/ddtrace/internal/openfeature/_provider.py @@ -88,20 +88,20 @@ class DataDogProvider(AbstractProvider): Feature Flags and Experimentation (FFE) product. """ - def __init__(self, *args: typing.Any, init_timeout: typing.Optional[float] = None, **kwargs: typing.Any): + def __init__(self, *args: typing.Any, initialization_timeout: typing.Optional[float] = None, **kwargs: typing.Any): super().__init__(*args, **kwargs) self._metadata = Metadata(name="Datadog") self._status = ProviderStatus.NOT_READY - self._config_received = False - # Init timeout: constructor arg takes priority, then env var (default 30s) - if init_timeout is not None: - self._init_timeout = init_timeout + # Initialization timeout: constructor arg takes priority, then env var (default 30s) + if initialization_timeout is not None: + self._initialization_timeout = initialization_timeout else: - self._init_timeout = ffe_config.initialization_timeout_ms / 1000.0 + self._initialization_timeout = ffe_config.initialization_timeout_ms / 1000.0 - # Event used to block initialize() until config arrives - self._config_event = threading.Event() + # Event used to block initialize() until config arrives. + # Also serves as the "config received" flag via is_set(). + self._config_received = threading.Event() # Cache for reported exposures to prevent duplicates # Stores mapping of (flag_key, subject_id) -> (allocation_key, variant_key) @@ -130,16 +130,15 @@ def initialize(self, evaluation_context: EvaluationContext) -> None: Initialize the provider. Blocks until Remote Config delivers the first FFE configuration or - the initialization timeout expires. This matches the behavior of the - Java (CountDownLatch), Go (sync.Cond), and Node.js (Promise) providers. + the initialization timeout expires. The timeout is configurable via: - - Constructor: DataDogProvider(init_timeout=10.0) # seconds + - Constructor: DataDogProvider(initialization_timeout=10.0) # seconds - Env var: DD_EXPERIMENTAL_FLAGGING_PROVIDER_INITIALIZATION_TIMEOUT_MS=10000 Provider lifecycle: - NOT_READY → initialize() blocks → config arrives → READY - NOT_READY → initialize() blocks → timeout → raises ProviderNotReadyError + NOT_READY -> initialize() blocks -> config arrives -> READY + NOT_READY -> initialize() blocks -> timeout -> raises ProviderNotReadyError """ if not self._enabled: return @@ -153,24 +152,25 @@ def initialize(self, evaluation_context: EvaluationContext) -> None: # Fast path: config already available (RC delivered before set_provider) config = _get_ffe_config() if config is not None: - self._config_received = True + logger.debug("FFE configuration already available, provider is READY") + self._config_received.set() self._status = ProviderStatus.READY return # SDK will dispatch PROVIDER_READY # Block until config arrives or timeout expires - logger.debug("Waiting up to %.1fs for initial FFE configuration from Remote Config", self._init_timeout) - if not self._config_event.wait(timeout=self._init_timeout): + logger.debug( + "Waiting up to %.1fs for initial FFE configuration from Remote Config", self._initialization_timeout + ) + if not self._config_received.wait(timeout=self._initialization_timeout): # Timeout expired without receiving config from openfeature.exception import ProviderNotReadyError raise ProviderNotReadyError( - f"Provider timed out after {self._init_timeout:.1f}s waiting for " + f"Provider timed out after {self._initialization_timeout:.1f}s waiting for " "initial configuration from Remote Config" ) - # Config received during wait - self._config_received = True - self._status = ProviderStatus.READY + # Config received during wait -- on_configuration_received() already set status def shutdown(self) -> None: """ @@ -193,8 +193,7 @@ def shutdown(self) -> None: # Unregister provider _unregister_provider(self) self._status = ProviderStatus.NOT_READY - self._config_received = False - self._config_event.clear() + self._config_received.clear() def resolve_boolean_details( self, @@ -450,19 +449,18 @@ def on_configuration_received(self) -> None: """ Called when a Remote Configuration payload is received and processed. - Unblocks initialize() if it's waiting, and emits PROVIDER_READY for - late arrivals (config received after initialize() timed out). + Updates status first, then signals the event to unblock initialize(). + Emits PROVIDER_READY for late arrivals (config received after initialize() timed out). """ - # Always signal the event to unblock initialize() if it's waiting - self._config_event.set() - - if not self._config_received: - self._config_received = True + if not self._config_received.is_set(): self._status = ProviderStatus.READY logger.debug("First FFE configuration received, provider is now READY") # Emit READY for late recovery: config arrived after init timed out self._emit_ready_event() + # Signal the event last to unblock initialize() after status is updated + self._config_received.set() + def _emit_ready_event(self) -> None: """ Safely emit PROVIDER_READY event. From 39f16011a57c988fb49713bf8f26d60379c61e74 Mon Sep 17 00:00:00 2001 From: Leo Romanovsky Date: Wed, 25 Feb 2026 17:30:26 +0100 Subject: [PATCH 4/6] test(openfeature): add unit tests for blocking initialization Add 4 unit tests for the initialize() blocking behavior: - test_initialize_blocks_until_config_arrives: config mid-wait unblocks - test_initialize_fast_path_when_config_exists: pre-loaded config - test_initialize_timeout_raises: short timeout -> ERROR state - test_late_recovery_after_timeout: config after timeout -> READY Also update existing tests to use _config_received.is_set() since _config_received is now a threading.Event instead of a bool. --- tests/openfeature/test_provider_status.py | 111 +++++++++++++++++++++- 1 file changed, 106 insertions(+), 5 deletions(-) diff --git a/tests/openfeature/test_provider_status.py b/tests/openfeature/test_provider_status.py index 5cac8fb49de..3233d3b31c9 100644 --- a/tests/openfeature/test_provider_status.py +++ b/tests/openfeature/test_provider_status.py @@ -5,8 +5,12 @@ - NOT_READY by default - READY when first Remote Config payload is received - Event emission on status change +- Blocking initialization until config arrives or timeout """ +import threading +import time + from openfeature import api from openfeature.provider import ProviderStatus import pytest @@ -43,7 +47,7 @@ def test_provider_starts_not_ready(self): provider = DataDogProvider() assert provider._status == ProviderStatus.NOT_READY - assert provider._config_received is False + assert not provider._config_received.is_set() def test_provider_becomes_ready_after_first_config(self): """Test that provider becomes READY after receiving first configuration.""" @@ -61,7 +65,7 @@ def test_provider_becomes_ready_after_first_config(self): # Verify becomes READY assert provider._status == ProviderStatus.READY - assert provider._config_received is True + assert provider._config_received.is_set() finally: api.clear_providers() @@ -73,14 +77,14 @@ def test_provider_ready_event_emitted(self): try: # Provider should not have received config yet - assert not provider._config_received + assert not provider._config_received.is_set() # Process a configuration config = create_config(create_boolean_flag("test-flag", enabled=True)) process_ffe_configuration(config) # Provider should now have received config and be READY - assert provider._config_received + assert provider._config_received.is_set() assert provider._status == ProviderStatus.READY finally: api.clear_providers() @@ -140,7 +144,7 @@ def test_provider_status_after_shutdown(self): # Verify back to NOT_READY assert provider._status == ProviderStatus.NOT_READY - assert provider._config_received is False + assert not provider._config_received.is_set() finally: api.clear_providers() @@ -194,3 +198,100 @@ def on_provider_ready(event_details): finally: api.remove_handler(ProviderEvent.PROVIDER_READY, on_provider_ready) api.clear_providers() + + +class TestProviderInitializationBlocking: + """Test that initialize() blocks until config arrives or timeout expires.""" + + def test_initialize_blocks_until_config_arrives(self): + """initialize() should block and return once config is delivered mid-wait.""" + with override_global_config({"experimental_flagging_provider_enabled": True}): + provider = DataDogProvider(initialization_timeout=5.0) + + # Deliver config from a background thread after 0.5s + def deliver_config(): + time.sleep(0.5) + config = create_config(create_boolean_flag("test-flag", enabled=True)) + process_ffe_configuration(config) + + timer = threading.Thread(target=deliver_config, daemon=True) + timer.start() + + try: + start = time.monotonic() + api.set_provider(provider) + elapsed = time.monotonic() - start + + # Should have blocked for ~0.5s (not instant, not full timeout) + assert elapsed >= 0.3, f"initialize() returned too fast ({elapsed:.2f}s)" + assert elapsed < 4.0, f"initialize() took too long ({elapsed:.2f}s), should have unblocked at ~0.5s" + assert provider._status == ProviderStatus.READY + assert provider._config_received.is_set() + finally: + api.clear_providers() + + def test_initialize_fast_path_when_config_exists(self): + """initialize() should return immediately if config already exists.""" + with override_global_config({"experimental_flagging_provider_enabled": True}): + # Deliver config BEFORE creating provider + config = create_config(create_boolean_flag("test-flag", enabled=True)) + process_ffe_configuration(config) + + provider = DataDogProvider(initialization_timeout=5.0) + + try: + start = time.monotonic() + api.set_provider(provider) + elapsed = time.monotonic() - start + + # Should be near-instant (config already available) + assert elapsed < 1.0, f"initialize() took {elapsed:.2f}s, should be instant with pre-loaded config" + assert provider._status == ProviderStatus.READY + finally: + api.clear_providers() + + def test_initialize_timeout_raises(self): + """initialize() should raise ProviderNotReadyError after timeout expires.""" + from openfeature.exception import ProviderNotReadyError + + with override_global_config({"experimental_flagging_provider_enabled": True}): + provider = DataDogProvider(initialization_timeout=0.5) + + try: + start = time.monotonic() + # set_provider catches the exception and dispatches PROVIDER_ERROR + api.set_provider(provider) + elapsed = time.monotonic() - start + + # Should have blocked for ~0.5s (the timeout) + assert elapsed >= 0.3, f"initialize() returned too fast ({elapsed:.2f}s)" + assert elapsed < 2.0, f"initialize() took too long ({elapsed:.2f}s)" + + # Provider should be in ERROR state (SDK caught ProviderNotReadyError) + client = api.get_client() + assert client.get_provider_status() == ProviderStatus.ERROR + finally: + api.clear_providers() + + def test_late_recovery_after_timeout(self): + """Config arriving after timeout should transition provider to READY.""" + with override_global_config({"experimental_flagging_provider_enabled": True}): + provider = DataDogProvider(initialization_timeout=0.5) + + try: + # Let it timeout + api.set_provider(provider) + + # Provider should be in ERROR state + client = api.get_client() + assert client.get_provider_status() == ProviderStatus.ERROR + + # Now deliver config (late recovery) + config = create_config(create_boolean_flag("test-flag", enabled=True)) + process_ffe_configuration(config) + + # Provider should recover to READY + assert provider._status == ProviderStatus.READY + assert provider._config_received.is_set() + finally: + api.clear_providers() From 1c3fc388aed5a405640121fb9e93e331c6886081 Mon Sep 17 00:00:00 2001 From: Leo Romanovsky Date: Wed, 25 Feb 2026 17:36:16 +0100 Subject: [PATCH 5/6] fix(openfeature): move provider registration from __init__ to initialize Move _register_provider() from __init__() to initialize() so that a provider re-registers for RC config callbacks after shutdown + re-initialization. Previously, shutdown() called _unregister_provider() but __init__() only runs once, so re-initialization would leave the provider unable to receive config updates. Addresses review feedback from dd-oleksii and typotter. --- ddtrace/internal/openfeature/_provider.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ddtrace/internal/openfeature/_provider.py b/ddtrace/internal/openfeature/_provider.py index 8754c0f3e45..c3bd5c7a039 100644 --- a/ddtrace/internal/openfeature/_provider.py +++ b/ddtrace/internal/openfeature/_provider.py @@ -118,9 +118,6 @@ def __init__(self, *args: typing.Any, initialization_timeout: typing.Optional[fl "please set DD_EXPERIMENTAL_FLAGGING_PROVIDER_ENABLED=true to enable it", ) - # Register this provider instance for status updates - _register_provider(self) - def get_metadata(self) -> Metadata: """Returns provider metadata.""" return self._metadata @@ -143,6 +140,10 @@ def initialize(self, evaluation_context: EvaluationContext) -> None: if not self._enabled: return + # Register for RC config callbacks (in initialize, not __init__, so + # re-initialization after shutdown re-registers the provider) + _register_provider(self) + try: # Start the exposure writer for reporting start_exposure_writer() From 8c382da15dca507266d5de17db4dec5e862cde3b Mon Sep 17 00:00:00 2001 From: Oleksii Shmalko Date: Mon, 30 Mar 2026 18:33:37 +0300 Subject: [PATCH 6/6] chore: simplify release note --- ...feature-init-blocking-70c8d5a99287cc49.yaml | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/releasenotes/notes/fix-openfeature-init-blocking-70c8d5a99287cc49.yaml b/releasenotes/notes/fix-openfeature-init-blocking-70c8d5a99287cc49.yaml index 1036a173a79..3622bad9ca7 100644 --- a/releasenotes/notes/fix-openfeature-init-blocking-70c8d5a99287cc49.yaml +++ b/releasenotes/notes/fix-openfeature-init-blocking-70c8d5a99287cc49.yaml @@ -1,10 +1,14 @@ --- fixes: - | - openfeature: This fix resolves an issue where ``DataDogProvider.initialize()`` returned immediately - without waiting for Remote Configuration data, causing the OpenFeature SDK to emit ``PROVIDER_READY`` - before flag configuration was available. Flag evaluations in this window silently returned default - values. The provider now blocks in ``initialize()`` until the first configuration arrives or a - configurable timeout expires (default 30s), matching the behavior of the Java, Go, and Node.js - providers. The timeout is configurable via the ``DD_EXPERIMENTAL_FLAGGING_PROVIDER_INITIALIZATION_TIMEOUT_MS`` - environment variable or the ``init_timeout`` constructor parameter. + openfeature: This fix resolves an issue where ``DataDogProvider.initialize()`` returned before + configuration was received, causing the OpenFeature SDK to mark the provider as ready to serve + evaluations too early and flag evaluations to silently return default values. The provider now + waits for configuration before returning. +features: + - | + openfeature: This introduces a configurable initialization timeout for ``DataDogProvider``. + The timeout controls how long ``initialize()`` waits for configuration before returning, + and defaults to 30 seconds. Set it via the + ``DD_EXPERIMENTAL_FLAGGING_PROVIDER_INITIALIZATION_TIMEOUT_MS`` environment variable or the + ``init_timeout`` constructor parameter.