diff --git a/.jules/bolt.md b/.jules/bolt.md
index 65fb9e85e3..4f1efa986d 100644
--- a/.jules/bolt.md
+++ b/.jules/bolt.md
@@ -38,3 +38,7 @@
 **Learning:** The `quota.Compute` function, a critical hot path for every request, was performing multiple redundant lookups (`GetCompletionRatioWithThreeLayers`, `ResolveEffectivePricing`, `ResolveModelConfig`) that each performed similar map lookups and expensive deep-cloning of `ModelConfig` objects. These objects contain large nested structures for media pricing (Image, Audio, Video) that are entirely unused during standard token-based quota calculation.
 
 **Action:** Consolidate pricing resolution into a single `ResolveModelConfigRatioOnly` call that performs a shallow clone of the base struct and a targeted clone of the `Tiers` slice, while omitting media metadata. This reduced the `BenchmarkCompute` execution time by ~19% (2313ns -> 1874ns). Always use "RatioOnly" or targeted lookup functions when full configuration metadata is not required in high-throughput paths.
+
+## 2026-04-24 - [Streamline Quota Calculation and Fast ASCII Fold]
+**Learning:** The quota calculation hot path in `relay/quota/quota.go` was performing manual tier iterations to resolve completion ratios, which is redundant since `eff.OutputRatio / eff.InputRatio` already provides the effective ratio for the matched tier. Additionally, string searches like `containsASCIIFold` (used for Claude detection) can be optimized by pre-calculating uppercase/lowercase variants of the first search byte to avoid expensive normalization calls in the search loop.
+**Action:** Consolidate ratio resolution by deriving tiered completion ratios from effective pricing results and use fast-path byte matching in string search functions.
diff --git a/relay/quota/quota.go b/relay/quota/quota.go
index 9f40625587..e7244cb1bf 100644
--- a/relay/quota/quota.go
+++ b/relay/quota/quota.go
@@ -38,8 +38,6 @@ type ComputeResult struct {
 }
 
 // Compute calculates the quota required for the provided usage snapshot.
-// It mirrors the logic used in controller helper functions so streaming
-// billing and final reconciliation share the same pricing semantics.
 func Compute(input ComputeInput) ComputeResult {
 	usage := input.Usage
 	if usage == nil {
@@ -51,9 +49,19 @@ func Compute(input ComputeInput) ComputeResult {
 
 	pricingAdaptor := input.PricingAdaptor
 	resolvedModelCfg, hasResolvedModelCfg := pricing.ResolveModelConfigRatioOnly(input.ModelName, input.ChannelModelConfigs, pricingAdaptor)
+
+	// Resolve the completion ratio using a priority-ordered check to avoid redundant lookups.
+	var completionRatioResolved float64
+	if override, ok := input.ChannelCompletionRatio[input.ModelName]; ok {
+		completionRatioResolved = override
+	} else if hasResolvedModelCfg && resolvedModelCfg.CompletionRatio != 0 {
+		completionRatioResolved = resolvedModelCfg.CompletionRatio
+	} else {
+		completionRatioResolved = pricing.GetCompletionRatioWithThreeLayers(input.ModelName, input.ChannelCompletionRatio, pricingAdaptor)
+	}
+
 	hasChannelModelRatioOverride := hasOverrideForModel(input.ModelName, input.ChannelModelRatio)
 	baseRatio := input.ModelRatio
-	completionRatioResolved := resolveCompletionRatio(input.ModelName, resolvedModelCfg, hasResolvedModelCfg, input.ChannelCompletionRatio, pricingAdaptor)
 
 	if hasResolvedModelCfg {
 		// Preserve legacy fallback behavior: when channel config omits base ratio/completion
@@ -81,39 +89,26 @@ func Compute(input ComputeInput) ComputeResult {
 		if !hasChannelModelRatioOverride {
 			usedModelRatio = eff.InputRatio
 		}
-		baseComp := eff.OutputRatio
-		completionBaseRatio := eff.InputRatio
-		if hasChannelModelRatioOverride {
-			completionBaseRatio = usedModelRatio
-			baseComp = usedModelRatio * completionRatioResolved
-			for _, tier := range resolvedModelCfg.Tiers {
-				if promptTokens < tier.InputTokenThreshold {
-					break
-				}
-				if tier.CompletionRatio != 0 {
-					baseComp = usedModelRatio * tier.CompletionRatio
-				}
-			}
-		}
-		if completionBaseRatio != 0 {
-			baseComp = baseComp / completionBaseRatio
+
+		// Optimization: Deriving the tiered completion ratio from eff.OutputRatio / eff.InputRatio
+		// avoids a redundant loop over tiers. Since eff.OutputRatio = eff.InputRatio * tierComp,
+		// the division recovers the effective completion ratio for the current tier.
+		if eff.InputRatio != 0 {
+			usedCompletionRatio = eff.OutputRatio / eff.InputRatio
 		} else {
-			baseComp = 1.0
+			usedCompletionRatio = 1.0
 		}
-		usedCompletionRatio = baseComp
 	} else if pricingAdaptor != nil {
 		// Optimized check: only use effective pricing if the input model ratio matches the adaptor base.
 		// This avoids extra GetDefaultModelPricing() map lookups when not needed.
 		adaptorBase := pricingAdaptor.GetModelRatio(input.ModelName)
 		if math.Abs(baseRatio-adaptorBase) < 1e-12 {
 			usedModelRatio = eff.InputRatio
-			baseComp := eff.OutputRatio
 			if eff.InputRatio != 0 {
-				baseComp = eff.OutputRatio / eff.InputRatio
+				usedCompletionRatio = eff.OutputRatio / eff.InputRatio
 			} else {
-				baseComp = 1.0
+				usedCompletionRatio = 1.0
 			}
-			usedCompletionRatio = baseComp
 		}
 	}
 
@@ -267,9 +262,21 @@ func containsASCIIFold(s string, substr string) bool {
 		return false
 	}
 
+	// substr is already expected to be lowercase from the caller (isClaudeModelName).
+	// We pre-calculate the uppercase variant of the first byte to allow a fast search
+	// that avoids calling asciiLower on every character in the model name string.
+	firstLower := substr[0]
+	var firstUpper byte
+	if firstLower >= 'a' && firstLower <= 'z' {
+		firstUpper = firstLower - ('a' - 'A')
+	} else {
+		firstUpper = firstLower
+	}
+
 	last := len(s) - len(substr)
 	for i := 0; i <= last; i++ {
-		if asciiLower(s[i]) != substr[0] {
+		// Fast path: match the first byte against both possible cases.
+		if s[i] != firstLower && s[i] != firstUpper {
 			continue
 		}
 
@@ -286,7 +293,6 @@ func containsASCIIFold(s string, substr string) bool {
 	}
 	return false
 }
-
 // asciiLower converts ASCII uppercase bytes to lowercase.
 // Parameter: b is the byte to normalize.
 // Returns: the lowercase byte when b is an ASCII uppercase letter, otherwise b unchanged.