diff --git a/backend/core/services/completion_runner.py b/backend/core/services/completion_runner.py index 4b720d77..3718ca06 100644 --- a/backend/core/services/completion_runner.py +++ b/backend/core/services/completion_runner.py @@ -111,13 +111,19 @@ async def prepare( use_fallback: FallbackOption, completion_id: UUID, conversation_id: str | None, + stream: bool = False, ): + # Add stream information to metadata (only when True) + metadata_with_stream = {**metadata} + if stream: + metadata_with_stream["stream"] = True + runner = Runner( tenant_slug=self._tenant.slug, custom_configs=self._tenant.providers, agent=agent, version=version, - metadata=metadata, + metadata=metadata_with_stream, metric_tags={}, provider_factory=self._provider_factory, timeout=timeout or 240, @@ -127,7 +133,7 @@ async def prepare( agent_input=input, start_time=start_time, completion_id=completion_id, - metadata=metadata, + metadata=metadata_with_stream, conversation_id=conversation_id, ) return runner, builder diff --git a/backend/core/storage/clickhouse/_models/_ch_completion.py b/backend/core/storage/clickhouse/_models/_ch_completion.py index 0b68844d..9744e52f 100644 --- a/backend/core/storage/clickhouse/_models/_ch_completion.py +++ b/backend/core/storage/clickhouse/_models/_ch_completion.py @@ -129,6 +129,7 @@ class ClickhouseCompletion(BaseModel): # Origin of the run source: Literal["web", "api", "mcp"] = "api" + # Traces as array of strings traces: list[_Trace] = Field(default_factory=list) diff --git a/backend/protocol/api/_services/playground_service.py b/backend/protocol/api/_services/playground_service.py index f14ebac5..b76e5067 100644 --- a/backend/protocol/api/_services/playground_service.py +++ b/backend/protocol/api/_services/playground_service.py @@ -191,6 +191,8 @@ async def _run_version( use_cache: CacheUsage | None, ) -> CompletionOutputTuple: _log.debug("Playground: Running single completion", version_id=version.id, input_id=input.id) + # Playground completions are never streamed, so no need to add stream metadata + cached = await self._completion_runner.check_cache( completion_id=completion_id, agent=Agent(id=agent_id, uid=0), @@ -217,6 +219,7 @@ async def _run_version( use_fallback="never", conversation_id=None, completion_id=completion_id, + stream=False, ) completion = await self._completion_runner.run(runner, builder) diff --git a/backend/protocol/api/_services/run/run_service.py b/backend/protocol/api/_services/run/run_service.py index 8f904efe..34a22eba 100644 --- a/backend/protocol/api/_services/run/run_service.py +++ b/backend/protocol/api/_services/run/run_service.py @@ -127,12 +127,17 @@ async def _cached_response( prepared_run: PreparedRun, deprecated_function: bool, ): + # Add stream metadata for cached completions (only when True) + metadata_with_stream = {**prepared_run.metadata} + if stream: + metadata_with_stream["stream"] = True + cached = await self._completion_runner.check_cache( completion_id=completion_id, agent=Agent(id=prepared_run.agent_id, uid=0), version=prepared_run.version, input=prepared_run.agent_input, - metadata=prepared_run.metadata, + metadata=metadata_with_stream, use_cache=use_cache, ) if not cached: @@ -248,6 +253,7 @@ async def run(self, request: OpenAIProxyChatCompletionRequest, start_time: float use_fallback=use_fallback, conversation_id=request.conversation_id, completion_id=completion_id, + stream=stream, ) if stream: return await self._stream(runner, builder, request) diff --git a/web/src/app/completions/sections/table/cells/CompletionTableVersionCell.tsx b/web/src/app/completions/sections/table/cells/CompletionTableVersionCell.tsx index 0f1ef924..2d2926e8 100644 --- a/web/src/app/completions/sections/table/cells/CompletionTableVersionCell.tsx +++ b/web/src/app/completions/sections/table/cells/CompletionTableVersionCell.tsx @@ -50,11 +50,6 @@ function CompletionTableVersionCell({ value }: CompletionTableVersionCellProps) }); } - // Check stream (default: false) - if (obj.stream !== undefined && obj.stream !== false) { - nonDefaultEntries.push({ key: "stream", value: String(obj.stream) }); - } - // Check include_usage (default: false) if (obj.include_usage !== undefined && obj.include_usage !== false) { nonDefaultEntries.push({ diff --git a/web/src/components/utils/utils.ts b/web/src/components/utils/utils.ts index a902c6c9..573f2dbb 100644 --- a/web/src/components/utils/utils.ts +++ b/web/src/components/utils/utils.ts @@ -451,7 +451,6 @@ export function getVersionWithDefaults(version: Version): ExtendedVersion { tools: version.tools !== undefined ? version.tools : [], use_cache: extendedVersion.use_cache !== undefined ? extendedVersion.use_cache : "auto", max_tokens: extendedVersion.max_tokens !== undefined ? extendedVersion.max_tokens : "unlimited", - stream: extendedVersion.stream !== undefined ? extendedVersion.stream : false, include_usage: extendedVersion.include_usage !== undefined ? extendedVersion.include_usage : false, presence_penalty: extendedVersion.presence_penalty !== undefined ? extendedVersion.presence_penalty : 0, frequency_penalty: extendedVersion.frequency_penalty !== undefined ? extendedVersion.frequency_penalty : 0, diff --git a/web/src/components/version-details/VersionDetailsView.tsx b/web/src/components/version-details/VersionDetailsView.tsx index 6654284d..5b513f14 100644 --- a/web/src/components/version-details/VersionDetailsView.tsx +++ b/web/src/components/version-details/VersionDetailsView.tsx @@ -33,7 +33,6 @@ export function VersionDetailsView({ "tools", "use_cache", "max_tokens", - "stream", "include_usage", "presence_penalty", "frequency_penalty", @@ -122,12 +121,6 @@ export function VersionDetailsView({ {extendedVersion.max_tokens} - {/* Stream */} -
- Stream - {extendedVersion.stream ? "true" : "false"} -
- {/* Include Usage */}
Include Usage diff --git a/web/src/types/models.ts b/web/src/types/models.ts index cbd6698c..2a381d25 100644 --- a/web/src/types/models.ts +++ b/web/src/types/models.ts @@ -122,7 +122,6 @@ export interface Version { export interface ExtendedVersion extends Version { use_cache?: string | boolean; max_tokens?: number | string; - stream?: boolean; include_usage?: boolean; presence_penalty?: number; frequency_penalty?: number;