From 28935a1022bdca4e0d97ccd35a4602c7715cff6e Mon Sep 17 00:00:00 2001 From: mesutoezdil Date: Thu, 23 Apr 2026 13:09:08 +0200 Subject: [PATCH 1/2] feat(bedrock): add top_k and extended thinking support for Claude models Expose top_k and thinking_budget_tokens for Bedrock Claude models so operators can enable extended thinking mode via the ModelConfig CRD. Both fields are optional and only sent to the Converse API when set, using additionalModelRequestFields which is the correct mechanism for parameters outside the standard InferenceConfiguration block. Closes #1721 Signed-off-by: mesutoezdil --- go/adk/pkg/agent/agent.go | 8 ++- go/adk/pkg/models/bedrock.go | 67 +++++++++++++------ go/api/adk/types.go | 7 ++ .../crd/bases/kagent.dev_modelconfigs.yaml | 12 ++++ go/api/v1alpha2/modelconfig_types.go | 12 ++++ go/api/v1alpha2/zz_generated.deepcopy.go | 12 +++- .../translator/agent/adk_api_translator.go | 4 +- .../templates/kagent.dev_modelconfigs.yaml | 12 ++++ .../src/kagent/adk/models/_bedrock.py | 13 ++++ .../kagent-adk/src/kagent/adk/types.py | 9 +++ 10 files changed, 131 insertions(+), 25 deletions(-) diff --git a/go/adk/pkg/agent/agent.go b/go/adk/pkg/agent/agent.go index 3d6159768..2874e6189 100644 --- a/go/adk/pkg/agent/agent.go +++ b/go/adk/pkg/agent/agent.go @@ -285,9 +285,11 @@ func CreateLLM(ctx context.Context, m adk.Model, log logr.Logger) (adkmodel.LLM, } // Use Bedrock Converse API for ALL models (including Anthropic) cfg := &models.BedrockConfig{ - TransportConfig: transportConfigFromBase(m.BaseModel, nil), - Model: modelName, - Region: region, + TransportConfig: transportConfigFromBase(m.BaseModel, nil), + Model: modelName, + Region: region, + TopK: m.TopK, + ThinkingBudgetTokens: m.ThinkingBudgetTokens, } return models.NewBedrockModelWithLogger(ctx, cfg, log) diff --git a/go/adk/pkg/models/bedrock.go b/go/adk/pkg/models/bedrock.go index fbadd7c24..e2b2e619f 100644 --- a/go/adk/pkg/models/bedrock.go +++ b/go/adk/pkg/models/bedrock.go @@ -48,12 +48,13 @@ func sanitizeBedrockToolID(id string, idMap map[string]string, counter *int) str // BedrockConfig holds Bedrock configuration for the Converse API type BedrockConfig struct { TransportConfig - Model string - Region string - MaxTokens *int - Temperature *float64 - TopP *float64 - TopK *int + Model string + Region string + MaxTokens *int + Temperature *float64 + TopP *float64 + TopK *int + ThinkingBudgetTokens *int } // BedrockModel implements model.LLM for Amazon Bedrock using the Converse API. @@ -158,26 +159,51 @@ func (m *BedrockModel) GenerateContent(ctx context.Context, req *model.LLMReques } } + // Build model-specific additional fields (Claude top_k, thinking, etc.) + additionalFields := m.buildAdditionalModelRequestFields() + // Set telemetry attributes telemetry.SetLLMRequestAttributes(ctx, modelName, req) if stream { - m.generateStreaming(ctx, modelName, messages, systemPrompt, inferenceConfig, toolConfig, yield) + m.generateStreaming(ctx, modelName, messages, systemPrompt, inferenceConfig, toolConfig, additionalFields, yield) } else { - m.generateNonStreaming(ctx, modelName, messages, systemPrompt, inferenceConfig, toolConfig, yield) + m.generateNonStreaming(ctx, modelName, messages, systemPrompt, inferenceConfig, toolConfig, additionalFields, yield) + } + } +} + +// buildAdditionalModelRequestFields returns a document.Interface containing +// model-specific parameters that are not part of InferenceConfiguration. +// For Claude on Bedrock this includes top_k and thinking configuration. +// Returns nil when no extra fields are needed. +func (m *BedrockModel) buildAdditionalModelRequestFields() document.Interface { + fields := make(map[string]any) + if m.Config.TopK != nil { + fields["top_k"] = *m.Config.TopK + } + if m.Config.ThinkingBudgetTokens != nil { + fields["thinking"] = map[string]any{ + "type": "enabled", + "budget_tokens": *m.Config.ThinkingBudgetTokens, } } + if len(fields) == 0 { + return nil + } + return document.NewLazyDocument(fields) } // generateStreaming handles streaming responses from Bedrock ConverseStream. // It properly handles both text and tool use content blocks during streaming. -func (m *BedrockModel) generateStreaming(ctx context.Context, modelId string, messages []types.Message, systemPrompt []types.SystemContentBlock, inferenceConfig *types.InferenceConfiguration, toolConfig *types.ToolConfiguration, yield func(*model.LLMResponse, error) bool) { +func (m *BedrockModel) generateStreaming(ctx context.Context, modelId string, messages []types.Message, systemPrompt []types.SystemContentBlock, inferenceConfig *types.InferenceConfiguration, toolConfig *types.ToolConfiguration, additionalFields document.Interface, yield func(*model.LLMResponse, error) bool) { output, err := m.Client.ConverseStream(ctx, &bedrockruntime.ConverseStreamInput{ - ModelId: aws.String(modelId), - Messages: messages, - System: systemPrompt, - InferenceConfig: inferenceConfig, - ToolConfig: toolConfig, + ModelId: aws.String(modelId), + Messages: messages, + System: systemPrompt, + InferenceConfig: inferenceConfig, + ToolConfig: toolConfig, + AdditionalModelRequestFields: additionalFields, }) if err != nil { @@ -323,13 +349,14 @@ func (tc *streamingToolCall) parseArgs() map[string]any { } // generateNonStreaming handles non-streaming responses from Bedrock Converse. -func (m *BedrockModel) generateNonStreaming(ctx context.Context, modelId string, messages []types.Message, systemPrompt []types.SystemContentBlock, inferenceConfig *types.InferenceConfiguration, toolConfig *types.ToolConfiguration, yield func(*model.LLMResponse, error) bool) { +func (m *BedrockModel) generateNonStreaming(ctx context.Context, modelId string, messages []types.Message, systemPrompt []types.SystemContentBlock, inferenceConfig *types.InferenceConfiguration, toolConfig *types.ToolConfiguration, additionalFields document.Interface, yield func(*model.LLMResponse, error) bool) { output, err := m.Client.Converse(ctx, &bedrockruntime.ConverseInput{ - ModelId: aws.String(modelId), - Messages: messages, - System: systemPrompt, - InferenceConfig: inferenceConfig, - ToolConfig: toolConfig, + ModelId: aws.String(modelId), + Messages: messages, + System: systemPrompt, + InferenceConfig: inferenceConfig, + ToolConfig: toolConfig, + AdditionalModelRequestFields: additionalFields, }) if err != nil { diff --git a/go/api/adk/types.go b/go/api/adk/types.go index 5274df27d..eda249ac2 100644 --- a/go/api/adk/types.go +++ b/go/api/adk/types.go @@ -247,6 +247,13 @@ type Bedrock struct { BaseModel // Region is the AWS region where the model is available Region string `json:"region,omitempty"` + // TopK limits sampling to the top-k most probable tokens. + // Only supported by Claude models on Bedrock. + TopK *int `json:"top_k,omitempty"` + // ThinkingBudgetTokens enables extended thinking when set and specifies the + // maximum number of tokens Claude may use for internal reasoning. + // Only supported by Claude models that support extended thinking. + ThinkingBudgetTokens *int `json:"thinking_budget_tokens,omitempty"` } func (b *Bedrock) MarshalJSON() ([]byte, error) { diff --git a/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml b/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml index a0a1c0e44..342137305 100644 --- a/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml +++ b/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml @@ -483,6 +483,18 @@ spec: description: AWS region where the Bedrock model is available (e.g., us-east-1, us-west-2) type: string + thinkingBudgetTokens: + description: |- + ThinkingBudgetTokens sets the maximum number of tokens Claude may use for + internal reasoning before producing its response. When set, extended thinking + is enabled. Only supported by Claude models that support extended thinking. + Must be at least 1024 when set. + type: integer + topK: + description: |- + TopK limits the model to the k most probable next tokens at each step. + Only supported by Claude models on Bedrock. + type: integer required: - region type: object diff --git a/go/api/v1alpha2/modelconfig_types.go b/go/api/v1alpha2/modelconfig_types.go index 3e72aa080..e2f80e3ea 100644 --- a/go/api/v1alpha2/modelconfig_types.go +++ b/go/api/v1alpha2/modelconfig_types.go @@ -243,6 +243,18 @@ type BedrockConfig struct { // AWS region where the Bedrock model is available (e.g., us-east-1, us-west-2) // +required Region string `json:"region"` + + // TopK limits the model to the k most probable next tokens at each step. + // Only supported by Claude models on Bedrock. + // +optional + TopK *int `json:"topK,omitempty"` + + // ThinkingBudgetTokens sets the maximum number of tokens Claude may use for + // internal reasoning before producing its response. When set, extended thinking + // is enabled. Only supported by Claude models that support extended thinking. + // Must be at least 1024 when set. + // +optional + ThinkingBudgetTokens *int `json:"thinkingBudgetTokens,omitempty"` } // SAPAICoreConfig contains SAP AI Core-specific configuration options. diff --git a/go/api/v1alpha2/zz_generated.deepcopy.go b/go/api/v1alpha2/zz_generated.deepcopy.go index fdd5df965..16a16754d 100644 --- a/go/api/v1alpha2/zz_generated.deepcopy.go +++ b/go/api/v1alpha2/zz_generated.deepcopy.go @@ -323,6 +323,16 @@ func (in *BaseVertexAIConfig) DeepCopy() *BaseVertexAIConfig { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *BedrockConfig) DeepCopyInto(out *BedrockConfig) { *out = *in + if in.TopK != nil { + in, out := &in.TopK, &out.TopK + *out = new(int) + **out = **in + } + if in.ThinkingBudgetTokens != nil { + in, out := &in.ThinkingBudgetTokens, &out.ThinkingBudgetTokens + *out = new(int) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BedrockConfig. @@ -752,7 +762,7 @@ func (in *ModelConfigSpec) DeepCopyInto(out *ModelConfigSpec) { if in.Bedrock != nil { in, out := &in.Bedrock, &out.Bedrock *out = new(BedrockConfig) - **out = **in + (*in).DeepCopyInto(*out) } if in.SAPAICore != nil { in, out := &in.SAPAICore, &out.SAPAICore diff --git a/go/core/internal/controller/translator/agent/adk_api_translator.go b/go/core/internal/controller/translator/agent/adk_api_translator.go index 19e09add0..f2fc8ffc3 100644 --- a/go/core/internal/controller/translator/agent/adk_api_translator.go +++ b/go/core/internal/controller/translator/agent/adk_api_translator.go @@ -688,7 +688,9 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC Model: model.Spec.Model, Headers: model.Spec.DefaultHeaders, }, - Region: model.Spec.Bedrock.Region, + Region: model.Spec.Bedrock.Region, + TopK: model.Spec.Bedrock.TopK, + ThinkingBudgetTokens: model.Spec.Bedrock.ThinkingBudgetTokens, } // Populate TLS fields in BaseModel diff --git a/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml b/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml index a0a1c0e44..342137305 100644 --- a/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml +++ b/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml @@ -483,6 +483,18 @@ spec: description: AWS region where the Bedrock model is available (e.g., us-east-1, us-west-2) type: string + thinkingBudgetTokens: + description: |- + ThinkingBudgetTokens sets the maximum number of tokens Claude may use for + internal reasoning before producing its response. When set, extended thinking + is enabled. Only supported by Claude models that support extended thinking. + Must be at least 1024 when set. + type: integer + topK: + description: |- + TopK limits the model to the k most probable next tokens at each step. + Only supported by Claude models on Bedrock. + type: integer required: - region type: object diff --git a/python/packages/kagent-adk/src/kagent/adk/models/_bedrock.py b/python/packages/kagent-adk/src/kagent/adk/models/_bedrock.py index 8a89617b4..088518d21 100644 --- a/python/packages/kagent-adk/src/kagent/adk/models/_bedrock.py +++ b/python/packages/kagent-adk/src/kagent/adk/models/_bedrock.py @@ -195,6 +195,8 @@ class KAgentBedrockLlm(BaseLlm): """ extra_headers: Optional[dict[str, str]] = None + top_k: Optional[int] = None + thinking_budget_tokens: Optional[int] = None model_config = {"arbitrary_types_allowed": True} @cached_property @@ -244,6 +246,17 @@ async def generate_content_async( if inference_config: kwargs["inferenceConfig"] = inference_config + additional_fields: dict[str, Any] = {} + if self.top_k is not None: + additional_fields["top_k"] = self.top_k + if self.thinking_budget_tokens is not None: + additional_fields["thinking"] = { + "type": "enabled", + "budget_tokens": self.thinking_budget_tokens, + } + if additional_fields: + kwargs["additionalModelRequestFields"] = additional_fields + def _run_converse_stream(**kw): resp = client.converse_stream(**kw) return list(resp.get("stream", [])) diff --git a/python/packages/kagent-adk/src/kagent/adk/types.py b/python/packages/kagent-adk/src/kagent/adk/types.py index 2ebc23adc..31d2c5599 100644 --- a/python/packages/kagent-adk/src/kagent/adk/types.py +++ b/python/packages/kagent-adk/src/kagent/adk/types.py @@ -235,6 +235,13 @@ class Gemini(BaseLLM): class Bedrock(BaseLLM): region: str | None = None + # top_k limits sampling to the top-k most probable tokens. + # Only supported by Claude models on Bedrock. + top_k: int | None = None + # thinking_budget_tokens enables extended thinking and caps the number of + # tokens Claude may spend on internal reasoning. Must be at least 1024 when set. + # Only supported by Claude models that support extended thinking. + thinking_budget_tokens: int | None = None type: Literal["bedrock"] @@ -569,6 +576,8 @@ def _create_llm_from_model_config(model_config: ModelUnion): return KAgentBedrockLlm( model=model_config.model, extra_headers=extra_headers, + top_k=model_config.top_k, + thinking_budget_tokens=model_config.thinking_budget_tokens, ) if model_config.type == "sap_ai_core": from .models._sap_ai_core import KAgentSAPAICoreLlm From ea9cd68b55febf657cd841e8b448f11b2ec052df Mon Sep 17 00:00:00 2001 From: mesutoezdil Date: Fri, 24 Apr 2026 19:15:58 +0200 Subject: [PATCH 2/2] feat(bedrock): replace typed fields with raw additionalModelRequestFields Replace the typed topK and thinkingBudgetTokens fields in BedrockConfig with a single additionalModelRequestFields raw JSON field. This avoids growing the CRD with every new Claude-specific parameter and keeps the config close to how Bedrock models the API itself. CRD type uses *apiextensionsv1.JSON with +kubebuilder:pruning:PreserveUnknownFields so unknown nested keys are not pruned at admission time. The translator unmarshals the raw bytes into map[string]any before passing them to the Go ADK BedrockConfig. The Python ADK receives the same field as a plain dict and forwards it directly to the Converse API. Example ModelConfig usage: bedrock: region: us-east-1 additionalModelRequestFields: | {top_k: 5, thinking: {type: enabled, budget_tokens: 16000}} CRD manifests and Helm template regenerated via make manifests. Signed-off-by: mesutoezdil --- go/adk/pkg/agent/agent.go | 9 +++--- go/adk/pkg/models/bedrock.go | 31 ++++++------------- go/api/adk/types.go | 11 +++---- .../crd/bases/kagent.dev_modelconfigs.yaml | 20 +++++------- go/api/v1alpha2/modelconfig_types.go | 18 +++++------ go/api/v1alpha2/zz_generated.deepcopy.go | 14 +++------ .../translator/agent/adk_api_translator.go | 12 +++++-- .../templates/kagent.dev_modelconfigs.yaml | 20 +++++------- .../src/kagent/adk/models/_bedrock.py | 15 ++------- .../kagent-adk/src/kagent/adk/types.py | 14 +++------ 10 files changed, 64 insertions(+), 100 deletions(-) diff --git a/go/adk/pkg/agent/agent.go b/go/adk/pkg/agent/agent.go index 2874e6189..1a7953f99 100644 --- a/go/adk/pkg/agent/agent.go +++ b/go/adk/pkg/agent/agent.go @@ -285,11 +285,10 @@ func CreateLLM(ctx context.Context, m adk.Model, log logr.Logger) (adkmodel.LLM, } // Use Bedrock Converse API for ALL models (including Anthropic) cfg := &models.BedrockConfig{ - TransportConfig: transportConfigFromBase(m.BaseModel, nil), - Model: modelName, - Region: region, - TopK: m.TopK, - ThinkingBudgetTokens: m.ThinkingBudgetTokens, + TransportConfig: transportConfigFromBase(m.BaseModel, nil), + Model: modelName, + Region: region, + AdditionalModelRequestFields: m.AdditionalModelRequestFields, } return models.NewBedrockModelWithLogger(ctx, cfg, log) diff --git a/go/adk/pkg/models/bedrock.go b/go/adk/pkg/models/bedrock.go index e2b2e619f..69596e0a1 100644 --- a/go/adk/pkg/models/bedrock.go +++ b/go/adk/pkg/models/bedrock.go @@ -48,13 +48,12 @@ func sanitizeBedrockToolID(id string, idMap map[string]string, counter *int) str // BedrockConfig holds Bedrock configuration for the Converse API type BedrockConfig struct { TransportConfig - Model string - Region string - MaxTokens *int - Temperature *float64 - TopP *float64 - TopK *int - ThinkingBudgetTokens *int + Model string + Region string + MaxTokens *int + Temperature *float64 + TopP *float64 + AdditionalModelRequestFields map[string]any } // BedrockModel implements model.LLM for Amazon Bedrock using the Converse API. @@ -175,23 +174,13 @@ func (m *BedrockModel) GenerateContent(ctx context.Context, req *model.LLMReques // buildAdditionalModelRequestFields returns a document.Interface containing // model-specific parameters that are not part of InferenceConfiguration. -// For Claude on Bedrock this includes top_k and thinking configuration. -// Returns nil when no extra fields are needed. +// The raw map is forwarded as-is to the Bedrock Converse API. +// Returns nil when no extra fields are configured. func (m *BedrockModel) buildAdditionalModelRequestFields() document.Interface { - fields := make(map[string]any) - if m.Config.TopK != nil { - fields["top_k"] = *m.Config.TopK - } - if m.Config.ThinkingBudgetTokens != nil { - fields["thinking"] = map[string]any{ - "type": "enabled", - "budget_tokens": *m.Config.ThinkingBudgetTokens, - } - } - if len(fields) == 0 { + if len(m.Config.AdditionalModelRequestFields) == 0 { return nil } - return document.NewLazyDocument(fields) + return document.NewLazyDocument(m.Config.AdditionalModelRequestFields) } // generateStreaming handles streaming responses from Bedrock ConverseStream. diff --git a/go/api/adk/types.go b/go/api/adk/types.go index eda249ac2..602a45798 100644 --- a/go/api/adk/types.go +++ b/go/api/adk/types.go @@ -247,13 +247,10 @@ type Bedrock struct { BaseModel // Region is the AWS region where the model is available Region string `json:"region,omitempty"` - // TopK limits sampling to the top-k most probable tokens. - // Only supported by Claude models on Bedrock. - TopK *int `json:"top_k,omitempty"` - // ThinkingBudgetTokens enables extended thinking when set and specifies the - // maximum number of tokens Claude may use for internal reasoning. - // Only supported by Claude models that support extended thinking. - ThinkingBudgetTokens *int `json:"thinking_budget_tokens,omitempty"` + // AdditionalModelRequestFields passes model-specific parameters to Bedrock's + // additionalModelRequestFields in the Converse API. Use this for provider-specific + // options outside the standard InferenceConfiguration block. + AdditionalModelRequestFields map[string]any `json:"additional_model_request_fields,omitempty"` } func (b *Bedrock) MarshalJSON() ([]byte, error) { diff --git a/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml b/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml index 342137305..e31173eb9 100644 --- a/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml +++ b/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml @@ -479,22 +479,18 @@ spec: bedrock: description: AWS Bedrock-specific configuration properties: + additionalModelRequestFields: + description: |- + AdditionalModelRequestFields passes model-specific parameters to Bedrock's + additionalModelRequestFields in the Converse API. Use this for provider-specific + options that are not part of the standard InferenceConfiguration block, such as + Claude extended thinking or top_k. Values are forwarded as-is to the API. + Example: {"top_k": 5, "thinking": {"type": "enabled", "budget_tokens": 16000}} + x-kubernetes-preserve-unknown-fields: true region: description: AWS region where the Bedrock model is available (e.g., us-east-1, us-west-2) type: string - thinkingBudgetTokens: - description: |- - ThinkingBudgetTokens sets the maximum number of tokens Claude may use for - internal reasoning before producing its response. When set, extended thinking - is enabled. Only supported by Claude models that support extended thinking. - Must be at least 1024 when set. - type: integer - topK: - description: |- - TopK limits the model to the k most probable next tokens at each step. - Only supported by Claude models on Bedrock. - type: integer required: - region type: object diff --git a/go/api/v1alpha2/modelconfig_types.go b/go/api/v1alpha2/modelconfig_types.go index e2f80e3ea..9c3896986 100644 --- a/go/api/v1alpha2/modelconfig_types.go +++ b/go/api/v1alpha2/modelconfig_types.go @@ -17,6 +17,7 @@ limitations under the License. package v1alpha2 import ( + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -244,17 +245,14 @@ type BedrockConfig struct { // +required Region string `json:"region"` - // TopK limits the model to the k most probable next tokens at each step. - // Only supported by Claude models on Bedrock. + // AdditionalModelRequestFields passes model-specific parameters to Bedrock's + // additionalModelRequestFields in the Converse API. Use this for provider-specific + // options that are not part of the standard InferenceConfiguration block, such as + // Claude extended thinking or top_k. Values are forwarded as-is to the API. + // Example: {"top_k": 5, "thinking": {"type": "enabled", "budget_tokens": 16000}} // +optional - TopK *int `json:"topK,omitempty"` - - // ThinkingBudgetTokens sets the maximum number of tokens Claude may use for - // internal reasoning before producing its response. When set, extended thinking - // is enabled. Only supported by Claude models that support extended thinking. - // Must be at least 1024 when set. - // +optional - ThinkingBudgetTokens *int `json:"thinkingBudgetTokens,omitempty"` + // +kubebuilder:pruning:PreserveUnknownFields + AdditionalModelRequestFields *apiextensionsv1.JSON `json:"additionalModelRequestFields,omitempty"` } // SAPAICoreConfig contains SAP AI Core-specific configuration options. diff --git a/go/api/v1alpha2/zz_generated.deepcopy.go b/go/api/v1alpha2/zz_generated.deepcopy.go index 16a16754d..2b0ca618f 100644 --- a/go/api/v1alpha2/zz_generated.deepcopy.go +++ b/go/api/v1alpha2/zz_generated.deepcopy.go @@ -22,6 +22,7 @@ package v1alpha2 import ( "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -323,15 +324,10 @@ func (in *BaseVertexAIConfig) DeepCopy() *BaseVertexAIConfig { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *BedrockConfig) DeepCopyInto(out *BedrockConfig) { *out = *in - if in.TopK != nil { - in, out := &in.TopK, &out.TopK - *out = new(int) - **out = **in - } - if in.ThinkingBudgetTokens != nil { - in, out := &in.ThinkingBudgetTokens, &out.ThinkingBudgetTokens - *out = new(int) - **out = **in + if in.AdditionalModelRequestFields != nil { + in, out := &in.AdditionalModelRequestFields, &out.AdditionalModelRequestFields + *out = new(apiextensionsv1.JSON) + (*in).DeepCopyInto(*out) } } diff --git a/go/core/internal/controller/translator/agent/adk_api_translator.go b/go/core/internal/controller/translator/agent/adk_api_translator.go index f2fc8ffc3..26fb8e6cb 100644 --- a/go/core/internal/controller/translator/agent/adk_api_translator.go +++ b/go/core/internal/controller/translator/agent/adk_api_translator.go @@ -7,6 +7,7 @@ import ( _ "embed" "encoding/binary" "encoding/hex" + "encoding/json" "errors" "fmt" "maps" @@ -683,14 +684,19 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC } } } + var additionalFields map[string]any + if model.Spec.Bedrock.AdditionalModelRequestFields != nil { + if err := json.Unmarshal(model.Spec.Bedrock.AdditionalModelRequestFields.Raw, &additionalFields); err != nil { + return nil, nil, nil, fmt.Errorf("failed to unmarshal bedrock additionalModelRequestFields: %w", err) + } + } bedrock := &adk.Bedrock{ BaseModel: adk.BaseModel{ Model: model.Spec.Model, Headers: model.Spec.DefaultHeaders, }, - Region: model.Spec.Bedrock.Region, - TopK: model.Spec.Bedrock.TopK, - ThinkingBudgetTokens: model.Spec.Bedrock.ThinkingBudgetTokens, + Region: model.Spec.Bedrock.Region, + AdditionalModelRequestFields: additionalFields, } // Populate TLS fields in BaseModel diff --git a/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml b/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml index 342137305..e31173eb9 100644 --- a/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml +++ b/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml @@ -479,22 +479,18 @@ spec: bedrock: description: AWS Bedrock-specific configuration properties: + additionalModelRequestFields: + description: |- + AdditionalModelRequestFields passes model-specific parameters to Bedrock's + additionalModelRequestFields in the Converse API. Use this for provider-specific + options that are not part of the standard InferenceConfiguration block, such as + Claude extended thinking or top_k. Values are forwarded as-is to the API. + Example: {"top_k": 5, "thinking": {"type": "enabled", "budget_tokens": 16000}} + x-kubernetes-preserve-unknown-fields: true region: description: AWS region where the Bedrock model is available (e.g., us-east-1, us-west-2) type: string - thinkingBudgetTokens: - description: |- - ThinkingBudgetTokens sets the maximum number of tokens Claude may use for - internal reasoning before producing its response. When set, extended thinking - is enabled. Only supported by Claude models that support extended thinking. - Must be at least 1024 when set. - type: integer - topK: - description: |- - TopK limits the model to the k most probable next tokens at each step. - Only supported by Claude models on Bedrock. - type: integer required: - region type: object diff --git a/python/packages/kagent-adk/src/kagent/adk/models/_bedrock.py b/python/packages/kagent-adk/src/kagent/adk/models/_bedrock.py index 088518d21..e1ebdbfb6 100644 --- a/python/packages/kagent-adk/src/kagent/adk/models/_bedrock.py +++ b/python/packages/kagent-adk/src/kagent/adk/models/_bedrock.py @@ -195,8 +195,7 @@ class KAgentBedrockLlm(BaseLlm): """ extra_headers: Optional[dict[str, str]] = None - top_k: Optional[int] = None - thinking_budget_tokens: Optional[int] = None + additional_model_request_fields: Optional[dict[str, Any]] = None model_config = {"arbitrary_types_allowed": True} @cached_property @@ -246,16 +245,8 @@ async def generate_content_async( if inference_config: kwargs["inferenceConfig"] = inference_config - additional_fields: dict[str, Any] = {} - if self.top_k is not None: - additional_fields["top_k"] = self.top_k - if self.thinking_budget_tokens is not None: - additional_fields["thinking"] = { - "type": "enabled", - "budget_tokens": self.thinking_budget_tokens, - } - if additional_fields: - kwargs["additionalModelRequestFields"] = additional_fields + if self.additional_model_request_fields: + kwargs["additionalModelRequestFields"] = self.additional_model_request_fields def _run_converse_stream(**kw): resp = client.converse_stream(**kw) diff --git a/python/packages/kagent-adk/src/kagent/adk/types.py b/python/packages/kagent-adk/src/kagent/adk/types.py index 31d2c5599..635fca9c9 100644 --- a/python/packages/kagent-adk/src/kagent/adk/types.py +++ b/python/packages/kagent-adk/src/kagent/adk/types.py @@ -235,13 +235,10 @@ class Gemini(BaseLLM): class Bedrock(BaseLLM): region: str | None = None - # top_k limits sampling to the top-k most probable tokens. - # Only supported by Claude models on Bedrock. - top_k: int | None = None - # thinking_budget_tokens enables extended thinking and caps the number of - # tokens Claude may spend on internal reasoning. Must be at least 1024 when set. - # Only supported by Claude models that support extended thinking. - thinking_budget_tokens: int | None = None + # additional_model_request_fields passes model-specific parameters to Bedrock's + # additionalModelRequestFields in the Converse API. Use this for provider-specific + # options outside the standard InferenceConfiguration block. + additional_model_request_fields: dict | None = None type: Literal["bedrock"] @@ -576,8 +573,7 @@ def _create_llm_from_model_config(model_config: ModelUnion): return KAgentBedrockLlm( model=model_config.model, extra_headers=extra_headers, - top_k=model_config.top_k, - thinking_budget_tokens=model_config.thinking_budget_tokens, + additional_model_request_fields=model_config.additional_model_request_fields, ) if model_config.type == "sap_ai_core": from .models._sap_ai_core import KAgentSAPAICoreLlm