-
Notifications
You must be signed in to change notification settings - Fork 1.4k
feat(autoscaling): CPURequestsRemoveLimitsMemoryRequestsAndLimits + cluster burstable default #49314
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
feat(autoscaling): CPURequestsRemoveLimitsMemoryRequestsAndLimits + cluster burstable default #49314
Changes from all commits
43a1e0e
03f2a59
0506295
8d45f21
512e1ab
2a3b22f
be790ac
6750250
bebf5b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,10 +42,9 @@ const ( | |
|
|
||
| const inPlaceResizeSupportedCacheTTL = 15 * time.Minute | ||
|
|
||
| // removeLimitSentinel is stored in a container Limits map by applyVerticalConstraints (burstable | ||
| // mode) to signal "delete this limit from the pod instead of setting it to this value". | ||
| // Negative quantities are never valid as real Kubernetes resource values, making the intent | ||
| // unambiguous and easy to identify with quantity.Sign() < 0. | ||
| // removeLimitSentinel is a sentinel quantity stored in ContainerResources.Limits to | ||
| // signal that an existing limit must be actively deleted from the live pod. Negative | ||
| // quantities are never valid Kubernetes resource values, making the intent unambiguous. | ||
| var removeLimitSentinel = resource.MustParse("-1") | ||
|
|
||
| // isInPlaceResizeSupported checks whether the API server exposes the pods/resize | ||
|
|
@@ -424,6 +423,20 @@ func applyVerticalConstraints(verticalRecs *model.VerticalScalingValues, constra | |
| } | ||
| } | ||
|
|
||
| // ControlledValues=CPURequestsRemoveLimitsMemoryRequestsAndLimits: stamp the CPU | ||
| // limit with removeLimitSentinel so the pod patcher actively deletes any | ||
| // pre-existing CPU limit from the live pod (an absent entry would mean | ||
| // "leave untouched"). The sentinel survives the clamping and invariant | ||
| // steps below because both skip negative-quantity values. | ||
| if constraint.ControlledValues != nil && | ||
| *constraint.ControlledValues == datadoghqcommon.DatadogPodAutoscalerContainerControlledValuesCPURequestsRemoveLimitsMemoryRequestsAndLimits { | ||
| if cr.Limits == nil { | ||
| cr.Limits = corev1.ResourceList{} | ||
| } | ||
| cr.Limits[corev1.ResourceCPU] = removeLimitSentinel | ||
| modified = true | ||
| } | ||
|
|
||
| // Resolve min/max bounds for clamping. | ||
| // New top-level MinAllowed/MaxAllowed apply to both requests and limits. | ||
| // Deprecated Requests.MinAllowed/MaxAllowed apply to requests only. | ||
|
|
@@ -438,9 +451,10 @@ func applyVerticalConstraints(verticalRecs *model.VerticalScalingValues, constra | |
| modified = true | ||
| } | ||
|
|
||
| // Maintain invariant: limits >= requests for all resources where both exist | ||
| // Maintain invariant: limits >= requests for all resources where both exist. | ||
| // Skip sentinel limits (negative) — they are internal markers, not values. | ||
| for resourceName, reqQty := range cr.Requests { | ||
| if limQty, hasLimit := cr.Limits[resourceName]; hasLimit && limQty.Cmp(reqQty) < 0 { | ||
| if limQty, hasLimit := cr.Limits[resourceName]; hasLimit && limQty.Sign() >= 0 && limQty.Cmp(reqQty) < 0 { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should not be possible really as we won't set unset in request and if |
||
| cr.Limits[resourceName] = reqQty.DeepCopy() | ||
| modified = true | ||
| } | ||
|
|
@@ -507,12 +521,17 @@ func resolveMinMaxBounds(c *datadoghqcommon.DatadogPodAutoscalerContainerConstra | |
|
|
||
| // clampResourceList clamps each resource quantity in the list to [min, max]. | ||
| // Returns true if any values were modified. | ||
| // Negative-quantity entries (sentinels such as removeLimitSentinel) are skipped: | ||
| // they are internal markers for downstream consumers and must survive clamping. | ||
| func clampResourceList(rl corev1.ResourceList, minAllowed, maxAllowed corev1.ResourceList) bool { | ||
| if rl == nil { | ||
| return false | ||
| } | ||
| modified := false | ||
| for name, qty := range rl { | ||
| if qty.Sign() < 0 { // this use case is to support removeLimitSentinel | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't it check for the precise sentinel value |
||
| continue | ||
| } | ||
| clamped := false | ||
| if minQty, ok := minAllowed[name]; ok && qty.Cmp(minQty) < 0 { | ||
| qty = minQty.DeepCopy() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -388,7 +388,17 @@ func (p *PodAutoscalerInternal) SetActiveScalingValues(currentTime time.Time, ho | |
|
|
||
| // Update scaling values | ||
| p.scalingValues.Horizontal = selectScalingValues(horizontalActiveSource).Horizontal | ||
| p.scalingValues.Vertical = selectScalingValues(verticalActiveSource).Vertical | ||
|
|
||
| // selectScalingValues(nil) returns p.scalingValues — a self-assignment that would | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand this logic. If |
||
| // keep any previously-constrained vertical value (including a burstable sentinel) | ||
| // alive across cycles. When the backend stops emitting a vertical recommendation, | ||
| // reset Vertical to nil so the next sync sees "no recommendation" and clears live | ||
| // state instead of re-applying the stale sentinel. | ||
| if verticalActiveSource == nil { | ||
| p.scalingValues.Vertical = nil | ||
| } else { | ||
| p.scalingValues.Vertical = selectScalingValues(verticalActiveSource).Vertical | ||
| } | ||
|
|
||
| // Update error states based on main product recommendations | ||
| p.scalingValues.HorizontalError = p.mainScalingValues.HorizontalError | ||
|
|
@@ -709,11 +719,14 @@ func (p *PodAutoscalerInternal) IsHorizontalScalingEnabled() bool { | |
| return !(scaleUpDisabled && scaleDownDisabled) | ||
| } | ||
|
|
||
| // IsBurstable returns true if the burstable preview option is enabled for this autoscaler. | ||
| // The value is read directly from the cached previewOptions struct — no JSON decode per call. | ||
| // For profile-managed DPAs previewOptions is populated by UpdateFromProfile; for standalone | ||
| // DPAs it is populated by UpdateFromPodAutoscaler. | ||
| // IsBurstable returns true if burstable mode is enabled for this autoscaler. | ||
| // Burstable mode requires an explicit opt-in via spec.options.burstable or the | ||
| // preview annotation; there is no cluster-level default. | ||
| func (p *PodAutoscalerInternal) IsBurstable() bool { | ||
| spec := p.Spec() | ||
| if spec != nil && spec.Options != nil && spec.Options.Burstable != nil { | ||
| return *spec.Options.Burstable | ||
| } | ||
| return p.previewOptions.Burstable | ||
| } | ||
|
|
||
|
|
@@ -1062,7 +1075,7 @@ func (p *PodAutoscalerInternal) BuildStatus(currentTime metav1.Time, currentStat | |
| Source: p.scalingValues.Vertical.Source, | ||
| GeneratedAt: metav1.NewTime(p.scalingValues.Vertical.Timestamp), | ||
| Version: p.scalingValues.Vertical.ResourcesHash, | ||
| DesiredResources: containerResourcesForStatus(p.scalingValues.Vertical.ContainerResources), | ||
| DesiredResources: p.scalingValues.Vertical.ContainerResourcesForStatus(), | ||
| Scaled: p.scaledReplicas, | ||
| Evicted: p.evictedReplicas, | ||
| PodCPURequest: cpuReqSum, | ||
|
|
@@ -1159,44 +1172,20 @@ func (p *PodAutoscalerInternal) BuildStatus(currentTime metav1.Time, currentStat | |
| } | ||
| status.Conditions = append(status.Conditions, newCondition(rolloutStatus, verticalReason, verticalMessage, currentTime, datadoghqcommon.DatadogPodAutoscalerVerticalAbleToApply, existingConditions)) | ||
|
|
||
| return status | ||
| } | ||
|
|
||
| // containerResourcesForStatus returns a copy of the container resources with internal sentinel | ||
| // values removed from Limits and Requests so they are not surfaced in the DPA status. | ||
| // | ||
| // For Limits: applyVerticalConstraints (burstable mode) stores removeLimitSentinel (-1) in | ||
| // Limits[cpu] to signal "delete this CPU limit from the pod". A negative quantity is never a | ||
| // valid Kubernetes resource value, so only strictly positive quantities are kept in Limits. | ||
| func containerResourcesForStatus(in []datadoghqcommon.DatadogPodAutoscalerContainerResources) []datadoghqcommon.DatadogPodAutoscalerContainerResources { | ||
| if len(in) == 0 { | ||
| return in | ||
| } | ||
| out := make([]datadoghqcommon.DatadogPodAutoscalerContainerResources, len(in)) | ||
| for i, cr := range in { | ||
| out[i].Name = cr.Name | ||
| if len(cr.Limits) > 0 { | ||
| for k, v := range cr.Limits { | ||
| if v.Sign() > 0 { | ||
| if out[i].Limits == nil { | ||
| out[i].Limits = make(corev1.ResourceList, len(cr.Limits)) | ||
| } | ||
| out[i].Limits[k] = v | ||
| } | ||
| } | ||
| // spec.options.burstable is reported when explicitly set; the annotation fallback is | ||
| // reported only when true (it has no explicit false — absence means default). | ||
| spec := p.Spec() | ||
| if spec != nil && spec.Options != nil && spec.Options.Burstable != nil { | ||
| status.Options = &datadoghqcommon.DatadogPodAutoscalerOptionsStatus{ | ||
| Burstable: pointer.Ptr(*spec.Options.Burstable), | ||
| } | ||
| if len(cr.Requests) > 0 { | ||
| for k, v := range cr.Requests { | ||
| if v.Sign() >= 0 { | ||
| if out[i].Requests == nil { | ||
| out[i].Requests = make(corev1.ResourceList, len(cr.Requests)) | ||
| } | ||
| out[i].Requests[k] = v | ||
| } | ||
| } | ||
| } else if p.previewOptions.Burstable { | ||
| status.Options = &datadoghqcommon.DatadogPodAutoscalerOptionsStatus{ | ||
| Burstable: pointer.Ptr(true), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't reflect cluster setting then? Another reason to remove |
||
| } | ||
| } | ||
| return out | ||
|
|
||
| return status | ||
| } | ||
|
|
||
| // Private helpers | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's exactly the same code as https://github.com/DataDog/datadog-agent/pull/49314/changes#diff-09a884e70bc600c169d6ff913c0cf90530fe84f8218f24c38d45ca53cdefae83R466-R480
I think
burstableearlier should be just updated to set from the ControlledValues as well.