From 03fb3b7460083b3a321e0c9ad9d834a4d39786bb Mon Sep 17 00:00:00 2001
From: Bryan Call <bcall@apache.org>
Date: Mon, 12 Jan 2026 13:35:30 -0800
Subject: [PATCH 1/6] Add tests for S3 auth URL encoding bug

Added unit tests and autest to demonstrate the isUriEncoded() bug in the
origin_server_auth plugin (s3_auth).

Bug: When a URL has mixed encoding (some characters encoded, some not),
isUriEncoded() incorrectly returns true upon finding the first %XX sequence,
assuming the entire string is already encoded. This causes canonicalEncode()
to skip re-encoding unencoded characters like parentheses.

Example:
  Client sends:    /app/(channel)/%5B%5Bparts%5D%5D/page.js
  isUriEncoded():  Returns true (finds %5B)
  canonicalEncode(): Returns as-is (thinks already encoded)
  Signature for:   /app/(channel)/%5B%5Bparts%5D%5D/page.js  <- WRONG
  S3 expects:      /app/%28channel%29/%5B%5Bparts%5D%5D/page.js

This results in S3 returning 403 SignatureDoesNotMatch.

Unit tests marked with [!mayfail] to document expected-to-fail behavior.
Autest passes with mock server but would fail with real S3.
---
 .../unit_tests/test_aws_auth_v4.cc            |  80 +++++++
 .../unit_tests/test_aws_auth_v4.h             |   1 +
 .../rules/s3_url_encoding.test_input          |  23 +++
 .../s3_url_encoding.test.py                   | 195 ++++++++++++++++++
 4 files changed, 299 insertions(+)
 create mode 100644 tests/gold_tests/pluginTest/origin_server_auth/rules/s3_url_encoding.test_input
 create mode 100644 tests/gold_tests/pluginTest/origin_server_auth/s3_url_encoding.test.py

diff --git a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
index 25ca14dc202..c5997a58d99 100644
--- a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
+++ b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
@@ -142,6 +142,86 @@ TEST_CASE("isUriEncoded(): reserved chars in the input", "[AWS][auth][utility]")
   CHECK(true == isUriEncoded(encoded));
 }
 
+/*
+ * BUG: isUriEncoded() returns true if ANY %XX sequence is found, even if other
+ * reserved characters are NOT encoded. This causes canonicalEncode() to skip
+ * encoding, resulting in signature mismatch with S3.
+ *
+ * Example from YTSATS-4835:
+ *   Client sends: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
+ *   ATS decodes to: /app/(channel)/%5B%5Bparts%5D%5D/page.js  (partial decode)
+ *   isUriEncoded() sees %5B -> returns true (incorrectly assumes fully encoded)
+ *   canonicalEncode() returns as-is: /app/(channel)/%5B%5Bparts%5D%5D/page.js
+ *   Signature calculated for partially-encoded path
+ *   S3 expects signature for: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
+ *   Result: 403 signature mismatch
+ */
+TEST_CASE("isUriEncoded(): BUG - mixed encoding with unencoded parentheses and encoded brackets", "[AWS][auth][utility][!mayfail]")
+{
+  // Path with parentheses NOT encoded but brackets ARE encoded
+  // This is the bug case from YTSATS-4835
+  const String mixedEncoding = "/app/(channel)/%5B%5Bparts%5D%5D/page.js";
+
+  // Current behavior: returns true because it finds %5B
+  // This is WRONG - parentheses () should have been encoded too if the string was properly encoded
+  // CHECK(false == isUriEncoded(mixedEncoding, /* isObjectName */ true));
+
+  // For now, document the current (buggy) behavior
+  // TODO: Fix isUriEncoded() to detect partial encoding
+  CHECK(true == isUriEncoded(mixedEncoding, /* isObjectName */ true));
+}
+
+TEST_CASE("isUriEncoded(): BUG - unencoded parentheses should indicate not fully encoded", "[AWS][auth][utility][!mayfail]")
+{
+  // Parentheses are reserved characters per AWS spec and should be encoded
+  // If we see unencoded parentheses, the string is NOT properly URI-encoded
+  const String withParens = "/app/(channel)/test.js";
+
+  // Current behavior: returns false (no %XX found)
+  CHECK(false == isUriEncoded(withParens, /* isObjectName */ true));
+
+  // After encoding, parentheses become %28 and %29
+  String encoded = uriEncode(withParens, /* isObjectName */ true);
+  CHECK(encoded == "/app/%28channel%29/test.js");
+}
+
+TEST_CASE("canonicalEncode(): BUG - mixed encoding produces wrong canonical URI", "[AWS][auth][utility][!mayfail]")
+{
+  // This is the core bug: when path has SOME encoded chars but not ALL,
+  // canonicalEncode() thinks it's already encoded and returns as-is
+
+  const String mixedEncoding = "/app/(channel)/%5B%5Bparts%5D%5D/page.js";
+
+  // Current (buggy) behavior: returns the mixed-encoding string as-is
+  // because isUriEncoded() returns true when it sees %5B
+  String canonical = canonicalEncode(mixedEncoding, /* isObjectName */ true);
+
+  // BUG: This passes with current code but is WRONG
+  // The parentheses should have been encoded to %28 and %29
+  CHECK(canonical == "/app/(channel)/%5B%5Bparts%5D%5D/page.js");
+
+  // CORRECT behavior would be:
+  // CHECK(canonical == "/app/%28channel%29/%5B%5Bparts%5D%5D/page.js");
+}
+
+TEST_CASE("canonicalEncode(): fully encoded input should pass through unchanged", "[AWS][auth][utility]")
+{
+  // When ALL reserved chars are properly encoded, canonicalEncode should return as-is
+  const String fullyEncoded = "/app/%28channel%29/%5B%5Bparts%5D%5D/page.js";
+
+  String canonical = canonicalEncode(fullyEncoded, /* isObjectName */ true);
+  CHECK(canonical == fullyEncoded);
+}
+
+TEST_CASE("canonicalEncode(): unencoded input should be fully encoded", "[AWS][auth][utility]")
+{
+  // When NO encoding is present, canonicalEncode should encode everything
+  const String unencoded = "/app/(channel)/[[parts]]/page.js";
+
+  String canonical = canonicalEncode(unencoded, /* isObjectName */ true);
+  CHECK(canonical == "/app/%28channel%29/%5B%5Bparts%5D%5D/page.js");
+}
+
 /* base16Encode() ************************************************************************************************************** */
 
 TEST_CASE("base16Encode(): base16 encode empty string", "[utility]")
diff --git a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h
index ba4b6697907..7d41a9c902a 100644
--- a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h
+++ b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h
@@ -122,6 +122,7 @@ class MockTsInterface : public TsInterface
 String base16Encode(const char *in, size_t inLen);
 String uriEncode(const String &in, bool isObjectName = false);
 bool   isUriEncoded(const String &in, bool isObjectName = false);
+String canonicalEncode(const String &in, bool isObjectName = false);
 String lowercase(const char *in, size_t inLen);
 String trimWhiteSpacesAndSqueezeInnerSpaces(const char *in, size_t inLen);
 
diff --git a/tests/gold_tests/pluginTest/origin_server_auth/rules/s3_url_encoding.test_input b/tests/gold_tests/pluginTest/origin_server_auth/rules/s3_url_encoding.test_input
new file mode 100644
index 00000000000..1cd047fa190
--- /dev/null
+++ b/tests/gold_tests/pluginTest/origin_server_auth/rules/s3_url_encoding.test_input
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test config for S3 URL encoding bug (YTSATS-4835)
+# Fake AWS credentials for testing only
+
+access_key=AKIAIOSFODNN7EXAMPLE
+secret_key=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
+version=awsv4
diff --git a/tests/gold_tests/pluginTest/origin_server_auth/s3_url_encoding.test.py b/tests/gold_tests/pluginTest/origin_server_auth/s3_url_encoding.test.py
new file mode 100644
index 00000000000..3282d60e2f7
--- /dev/null
+++ b/tests/gold_tests/pluginTest/origin_server_auth/s3_url_encoding.test.py
@@ -0,0 +1,195 @@
+'''
+Test origin_server_auth URL encoding bug (YTSATS-4835)
+
+This test demonstrates a bug where mixed URL encoding in the request path
+causes signature mismatch with S3.
+
+Bug: When a URL has SOME characters encoded (e.g., %5B) but others NOT encoded
+(e.g., parentheses), the isUriEncoded() function incorrectly returns true,
+causing canonicalEncode() to skip re-encoding. This results in a signature
+calculated for a partially-encoded path, which won't match what S3 expects.
+
+Example:
+  Client sends:    /app/(channel)/%5B%5Bparts%5D%5D/page.js
+  isUriEncoded():  Returns true (finds %5B)
+  canonicalEncode(): Returns as-is (thinks already encoded)
+  Signature for:   /app/(channel)/%5B%5Bparts%5D%5D/page.js  <- WRONG
+  S3 expects:      /app/%28channel%29/%5B%5Bparts%5D%5D/page.js  <- CORRECT
+
+This test sends requests with various URL encodings and captures what
+the origin receives, including the Authorization header signature.
+'''
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+Test.testName = "origin_server_auth: URL encoding bug"
+Test.Summary = '''
+Test for S3 signature calculation with mixed URL encoding
+'''
+Test.ContinueOnFail = True
+
+
+class S3UrlEncodingTest:
+    """
+    Test class for verifying URL encoding behavior in S3 auth signature calculation.
+    """
+
+    def __init__(self):
+        self.setupOriginServer()
+        self.setupTS()
+
+    def setupOriginServer(self):
+        """
+        Create an origin server that captures and logs the request path and headers.
+        ATS preserves URL encoding from the client, so we need to match exactly.
+        """
+        self.server = Test.MakeOriginServer("s3_mock")
+
+        # Test case 1: Fully unencoded path with parentheses
+        # Client sends: /bucket/app/(channel)/test.js (unencoded)
+        # ATS forwards: /bucket/app/(channel)/test.js (as-is)
+        request1 = {
+            "headers": "GET /bucket/app/(channel)/test.js HTTP/1.1\r\nHost: s3.amazonaws.com\r\n\r\n",
+            "timestamp": "1469733493.993",
+            "body": ""
+        }
+        response1 = {
+            "headers": "HTTP/1.1 200 OK\r\nConnection: close\r\nContent-Length: 7\r\n\r\n",
+            "timestamp": "1469733493.993",
+            "body": "test1ok"
+        }
+        self.server.addResponse("sessionlog.log", request1, response1)
+
+        # Test case 2: Fully encoded parentheses
+        # Client sends: /bucket/app/%28channel%29/test.js (encoded)
+        # ATS forwards: /bucket/app/%28channel%29/test.js (as-is, preserves encoding)
+        request2 = {
+            "headers": "GET /bucket/app/%28channel%29/test.js HTTP/1.1\r\nHost: s3.amazonaws.com\r\n\r\n",
+            "timestamp": "1469733493.993",
+            "body": ""
+        }
+        response2 = {
+            "headers": "HTTP/1.1 200 OK\r\nConnection: close\r\nContent-Length: 7\r\n\r\n",
+            "timestamp": "1469733493.993",
+            "body": "test2ok"
+        }
+        self.server.addResponse("sessionlog.log", request2, response2)
+
+        # Test case 3: Mixed encoding - BUG CASE
+        # Client sends: /bucket/app/(channel)/%5B%5Bparts%5D%5D/page.js
+        #   - Parentheses () NOT encoded
+        #   - Brackets [] ARE encoded as %5B%5D
+        # ATS forwards: /bucket/app/(channel)/%5B%5Bparts%5D%5D/page.js (as-is)
+        #
+        # BUG: isUriEncoded() sees %5B -> returns true -> canonicalEncode() skips encoding
+        # Result: Signature calculated for /bucket/app/(channel)/%5B%5Bparts%5D%5D/page.js
+        # But S3 expects signature for /bucket/app/%28channel%29/%5B%5Bparts%5D%5D/page.js
+        request3 = {
+            "headers": "GET /bucket/app/(channel)/%5B%5Bparts%5D%5D/page.js HTTP/1.1\r\nHost: s3.amazonaws.com\r\n\r\n",
+            "timestamp": "1469733493.993",
+            "body": ""
+        }
+        response3 = {
+            "headers": "HTTP/1.1 200 OK\r\nConnection: close\r\nContent-Length: 7\r\n\r\n",
+            "timestamp": "1469733493.993",
+            "body": "test3ok"
+        }
+        self.server.addResponse("sessionlog.log", request3, response3)
+
+    def setupTS(self):
+        """Configure ATS with the origin_server_auth plugin."""
+        self.ts = Test.MakeATSProcess("ts")
+
+        self.ts.Disk.records_config.update(
+            {
+                'proxy.config.diags.debug.enabled': 1,
+                'proxy.config.diags.debug.tags': 'origin_server_auth',
+                'proxy.config.url_remap.pristine_host_hdr': 1,
+            })
+
+        # Copy the config file to the test directory and use it
+        self.ts.Setup.CopyAs('rules/s3_url_encoding.test_input', Test.RunDirectory)
+
+        # Remap rule with S3 auth
+        self.ts.Disk.remap_config.AddLine(
+            f'map http://s3.amazonaws.com/ http://127.0.0.1:{self.server.Variables.Port}/ '
+            f'@plugin=origin_server_auth.so '
+            f'@pparam=--config @pparam={Test.RunDirectory}/s3_url_encoding.test_input')
+
+    def test_unencoded_parentheses(self):
+        """
+        Test 1: Request with unencoded parentheses.
+        The plugin should encode () to %28%29 for the signature calculation.
+        """
+        tr = Test.AddTestRun("Unencoded parentheses in path")
+        tr.Processes.Default.Command = (
+            f'curl -s -v -o /dev/null '
+            f'-H "Host: s3.amazonaws.com" '
+            f'"http://127.0.0.1:{self.ts.Variables.port}/bucket/app/(channel)/test.js"')
+        tr.Processes.Default.ReturnCode = 0
+        tr.Processes.Default.StartBefore(self.server)
+        tr.Processes.Default.StartBefore(self.ts)
+        tr.Processes.Default.Streams.stderr.Content = Testers.ContainsExpression("200 OK", "Expected 200 OK response")
+        tr.StillRunningAfter = self.ts
+
+    def test_encoded_parentheses(self):
+        """
+        Test 2: Request with URL-encoded parentheses.
+        ATS preserves the encoding when forwarding to origin.
+        """
+        tr = Test.AddTestRun("Encoded parentheses in path")
+        tr.Processes.Default.Command = (
+            f'curl -s -v -o /dev/null '
+            f'-H "Host: s3.amazonaws.com" '
+            f'"http://127.0.0.1:{self.ts.Variables.port}/bucket/app/%28channel%29/test.js"')
+        tr.Processes.Default.ReturnCode = 0
+        tr.Processes.Default.Streams.stderr.Content = Testers.ContainsExpression("200 OK", "Expected 200 OK response")
+        tr.StillRunningAfter = self.ts
+
+    def test_mixed_encoding_bug(self):
+        """
+        Test 3: BUG CASE - Mixed encoding with unencoded parentheses and encoded brackets.
+
+        Client URL: /bucket/app/(channel)/%5B%5Bparts%5D%5D/page.js
+        - Parentheses () are NOT encoded
+        - Brackets [] ARE encoded as %5B%5D
+
+        BUG: isUriEncoded() finds %5B and returns true, so canonicalEncode()
+        returns the path as-is without encoding the parentheses.
+
+        This causes signature mismatch because:
+        - Plugin calculates signature for: /bucket/app/(channel)/%5B%5Bparts%5D%5D/page.js
+        - S3 expects signature for: /bucket/app/%28channel%29/%5B%5Bparts%5D%5D/page.js
+        """
+        tr = Test.AddTestRun("Mixed encoding - BUG CASE")
+        tr.Processes.Default.Command = (
+            f'curl -s -v -o /dev/null '
+            f'-H "Host: s3.amazonaws.com" '
+            f'"http://127.0.0.1:{self.ts.Variables.port}/bucket/app/(channel)/%5B%5Bparts%5D%5D/page.js"')
+        tr.Processes.Default.ReturnCode = 0
+        # This should succeed with our mock server, but would fail with real S3
+        # The test documents the bug - when fixed, the signature would be correct
+        tr.Processes.Default.Streams.stderr.Content = Testers.ContainsExpression(
+            "200 OK", "Expected 200 OK response (mock server accepts any signature)")
+        tr.StillRunningAfter = self.ts
+
+    def run(self):
+        self.test_unencoded_parentheses()
+        self.test_encoded_parentheses()
+        self.test_mixed_encoding_bug()
+
+
+S3UrlEncodingTest().run()

From 4f04e786507e365da1575468d435086b6b18aa6f Mon Sep 17 00:00:00 2001
From: Bryan Call <bcall@apache.org>
Date: Mon, 12 Jan 2026 13:38:41 -0800
Subject: [PATCH 2/6] Fix S3 auth URL encoding for mixed-encoding URLs

Fixed the isUriEncoded() and canonicalEncode() functions to properly handle
URLs with mixed encoding (some characters encoded, some not).

Bug: URLs like /app/(channel)/%5B%5Bparts%5D%5D/page.js would cause S3
signature mismatch because:
- isUriEncoded() found %5B and returned true (incorrectly)
- canonicalEncode() returned the string as-is
- Signature was calculated for partially-encoded path
- S3 expected signature for fully-encoded path

Fix:
1. isUriEncoded() now checks the ENTIRE string and returns false if ANY
   character that should be encoded is found unencoded.
2. canonicalEncode() now always decodes first, then re-encodes using AWS
   canonical rules. This ensures consistent output regardless of input.

Added uriDecode() helper function to decode percent-encoded characters.

Updated unit tests to verify the correct behavior.

This fixes YTSATS-4835.
---
 plugins/origin_server_auth/aws_auth_v4.cc     |  92 ++++++++++----
 .../unit_tests/test_aws_auth_v4.cc            | 112 ++++++++++++++----
 .../unit_tests/test_aws_auth_v4.h             |   1 +
 3 files changed, 162 insertions(+), 43 deletions(-)

diff --git a/plugins/origin_server_auth/aws_auth_v4.cc b/plugins/origin_server_auth/aws_auth_v4.cc
index 940b7d3e2bd..d72e12ced00 100644
--- a/plugins/origin_server_auth/aws_auth_v4.cc
+++ b/plugins/origin_server_auth/aws_auth_v4.cc
@@ -66,6 +66,34 @@ base16Encode(const char *in, size_t inLen)
   return result.str();
 }
 
+/**
+ * @brief URI-decode a character string
+ *
+ * Decodes percent-encoded characters (e.g., %20 -> space, %2F -> /)
+ *
+ * @param in string to be URI decoded
+ * @return decoded string
+ */
+String
+uriDecode(const String &in)
+{
+  String result;
+  result.reserve(in.length()); /* Decoded string will be same size or smaller */
+
+  for (size_t i = 0; i < in.length(); i++) {
+    if (in[i] == '%' && i + 2 < in.length() && std::isxdigit(in[i + 1]) && std::isxdigit(in[i + 2])) {
+      /* Decode %XX to character */
+      char hex[3]  = {in[i + 1], in[i + 2], '\0'};
+      result      += static_cast<char>(std::strtol(hex, nullptr, 16));
+      i           += 2; /* Skip past the hex digits */
+    } else {
+      result += in[i];
+    }
+  }
+
+  return result;
+}
+
 /**
  * @brief URI-encode a character string (AWS specific version, see spec)
  *
@@ -108,7 +136,7 @@ uriEncode(const String &in, bool isObjectName)
 }
 
 /**
- * @brief checks if the string is URI-encoded (AWS specific encoding version, see spec)
+ * @brief checks if the string is FULLY URI-encoded (AWS specific encoding version, see spec)
  *
  * @see AWS spec: http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
  *
@@ -116,13 +144,20 @@ uriEncode(const String &in, bool isObjectName)
  *       be followed by 2 hexadecimal symbols otherwise '%' should be encoded with %25:
  *          https://tools.ietf.org/html/rfc3986#section-2.1
  *
+ * @note This function checks if ALL characters that need encoding ARE encoded. A string with
+ *       mixed encoding (some chars encoded, some not) returns false. This fixes a bug where
+ *       URLs like /app/(channel)/%5B%5Bparts%5D%5D/ would incorrectly return true because
+ *       %5B was found, even though parentheses () were not encoded.
+ *
  * @param in string to be URI checked
  * @param isObjectName if true encoding didn't encode '/', kept it as it is.
- * @return true if encoded, false not encoded.
+ * @return true if fully encoded, false if not encoded or partially encoded.
  */
 bool
 isUriEncoded(const String &in, bool isObjectName)
 {
+  bool foundEncodedChar = false;
+
   for (size_t pos = 0; pos < in.length(); pos++) {
     char c = in[pos];
 
@@ -132,43 +167,56 @@ isUriEncoded(const String &in, bool isObjectName)
       continue;
     }
 
-    if (' ' == c) {
-      /* space should have been encoded with %20 if the string was encoded */
-      return false;
-    }
-
-    if ('/' == c && !isObjectName) {
-      /* if this is not an object name '/' should have been encoded */
-      return false;
+    if ('/' == c && isObjectName) {
+      /* '/' is allowed unencoded in object names */
+      continue;
     }
 
     if ('%' == c) {
       if (pos + 2 < in.length() && std::isxdigit(in[pos + 1]) && std::isxdigit(in[pos + 2])) {
-        /* if string was encoded we should have exactly 2 hexadecimal chars following it */
-        return true;
+        /* Valid encoded sequence found */
+        foundEncodedChar  = true;
+        pos              += 2; /* Skip past the hex digits */
+        continue;
       } else {
-        /* lonely '%' should have been encoded with %25 according to the RFC so likely not encoded */
+        /* lonely '%' should have been encoded with %25 according to the RFC */
         return false;
       }
     }
+
+    /* Any other character that reaches here should have been encoded but wasn't.
+     * This includes: space, '/', '(', ')', '[', ']', etc.
+     * Return false to indicate the string is not fully encoded. */
+    return false;
   }
 
-  return false;
+  /* If we found at least one encoded character and no unencoded special chars,
+   * the string is fully encoded. If no encoded chars were found, the string
+   * contains only unreserved chars (and possibly '/' for object names). */
+  return foundEncodedChar;
 }
 
 String
 canonicalEncode(const String &in, bool isObjectName)
 {
-  String canonical;
-  if (!isUriEncoded(in, isObjectName)) {
-    /* Not URI-encoded */
-    canonical = uriEncode(in, isObjectName);
-  } else {
-    /* URI-encoded, then don't encode since AWS does not encode which is not mentioned in the spec,
-     * asked AWS, still waiting for confirmation */
-    canonical = in;
+  if (isUriEncoded(in, isObjectName)) {
+    /* Fully URI-encoded, return as-is */
+    return in;
   }
 
+  /* Not fully encoded (either unencoded or partially encoded).
+   * Decode first to avoid double-encoding, then re-encode with AWS canonical rules.
+   *
+   * This fixes a bug where mixed-encoding URLs (e.g., /app/(channel)/%5B%5Bparts%5D%5D/)
+   * would cause signature mismatch. The parentheses were not encoded but brackets were.
+   *
+   * Example:
+   * - Input: /app/(channel)/%5B%5Bparts%5D%5D/page.js  (partially encoded)
+   * - Decode: /app/(channel)/[[parts]]/page.js
+   * - Encode: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
+   */
+  String decoded   = uriDecode(in);
+  String canonical = uriEncode(decoded, isObjectName);
   return canonical;
 }
 
diff --git a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
index c5997a58d99..917dc50f974 100644
--- a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
+++ b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
@@ -156,28 +156,24 @@ TEST_CASE("isUriEncoded(): reserved chars in the input", "[AWS][auth][utility]")
  *   S3 expects signature for: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
  *   Result: 403 signature mismatch
  */
-TEST_CASE("isUriEncoded(): BUG - mixed encoding with unencoded parentheses and encoded brackets", "[AWS][auth][utility][!mayfail]")
+TEST_CASE("isUriEncoded(): mixed encoding with unencoded parentheses and encoded brackets", "[AWS][auth][utility]")
 {
   // Path with parentheses NOT encoded but brackets ARE encoded
   // This is the bug case from YTSATS-4835
   const String mixedEncoding = "/app/(channel)/%5B%5Bparts%5D%5D/page.js";
 
-  // Current behavior: returns true because it finds %5B
-  // This is WRONG - parentheses () should have been encoded too if the string was properly encoded
-  // CHECK(false == isUriEncoded(mixedEncoding, /* isObjectName */ true));
-
-  // For now, document the current (buggy) behavior
-  // TODO: Fix isUriEncoded() to detect partial encoding
-  CHECK(true == isUriEncoded(mixedEncoding, /* isObjectName */ true));
+  // Fixed behavior: returns false because parentheses () are not encoded
+  // Even though %5B is present, the string is only PARTIALLY encoded
+  CHECK(false == isUriEncoded(mixedEncoding, /* isObjectName */ true));
 }
 
-TEST_CASE("isUriEncoded(): BUG - unencoded parentheses should indicate not fully encoded", "[AWS][auth][utility][!mayfail]")
+TEST_CASE("isUriEncoded(): unencoded parentheses should indicate not fully encoded", "[AWS][auth][utility]")
 {
   // Parentheses are reserved characters per AWS spec and should be encoded
   // If we see unencoded parentheses, the string is NOT properly URI-encoded
   const String withParens = "/app/(channel)/test.js";
 
-  // Current behavior: returns false (no %XX found)
+  // Returns false (no encoded chars found, string needs encoding)
   CHECK(false == isUriEncoded(withParens, /* isObjectName */ true));
 
   // After encoding, parentheses become %28 and %29
@@ -185,23 +181,38 @@ TEST_CASE("isUriEncoded(): BUG - unencoded parentheses should indicate not fully
   CHECK(encoded == "/app/%28channel%29/test.js");
 }
 
-TEST_CASE("canonicalEncode(): BUG - mixed encoding produces wrong canonical URI", "[AWS][auth][utility][!mayfail]")
+TEST_CASE("uriDecode(): decode percent-encoded characters", "[AWS][auth][utility]")
+{
+  // Test basic decoding
+  CHECK(uriDecode("%28") == "(");
+  CHECK(uriDecode("%29") == ")");
+  CHECK(uriDecode("%5B") == "[");
+  CHECK(uriDecode("%5D") == "]");
+  CHECK(uriDecode("%20") == " ");
+
+  // Test mixed content
+  CHECK(uriDecode("/app/%28channel%29/test.js") == "/app/(channel)/test.js");
+  CHECK(uriDecode("/app/%5B%5Bparts%5D%5D/page.js") == "/app/[[parts]]/page.js");
+
+  // Test passthrough of non-encoded content
+  CHECK(uriDecode("/app/test.js") == "/app/test.js");
+  CHECK(uriDecode("hello-world_123.txt") == "hello-world_123.txt");
+
+  // Test mixed encoded and non-encoded (the bug case)
+  CHECK(uriDecode("/app/(channel)/%5B%5Bparts%5D%5D/page.js") == "/app/(channel)/[[parts]]/page.js");
+}
+
+TEST_CASE("canonicalEncode(): mixed encoding produces correct canonical URI", "[AWS][auth][utility]")
 {
-  // This is the core bug: when path has SOME encoded chars but not ALL,
-  // canonicalEncode() thinks it's already encoded and returns as-is
+  // Fixed behavior: canonicalEncode() now decodes first, then re-encodes
+  // This ensures consistent canonical output regardless of input encoding
 
   const String mixedEncoding = "/app/(channel)/%5B%5Bparts%5D%5D/page.js";
 
-  // Current (buggy) behavior: returns the mixed-encoding string as-is
-  // because isUriEncoded() returns true when it sees %5B
   String canonical = canonicalEncode(mixedEncoding, /* isObjectName */ true);
 
-  // BUG: This passes with current code but is WRONG
-  // The parentheses should have been encoded to %28 and %29
-  CHECK(canonical == "/app/(channel)/%5B%5Bparts%5D%5D/page.js");
-
-  // CORRECT behavior would be:
-  // CHECK(canonical == "/app/%28channel%29/%5B%5Bparts%5D%5D/page.js");
+  // Correct behavior: ALL reserved characters are encoded
+  CHECK(canonical == "/app/%28channel%29/%5B%5Bparts%5D%5D/page.js");
 }
 
 TEST_CASE("canonicalEncode(): fully encoded input should pass through unchanged", "[AWS][auth][utility]")
@@ -222,6 +233,65 @@ TEST_CASE("canonicalEncode(): unencoded input should be fully encoded", "[AWS][a
   CHECK(canonical == "/app/%28channel%29/%5B%5Bparts%5D%5D/page.js");
 }
 
+/*
+ * Test all S3 "safe" characters that are NOT SigV4 unreserved.
+ * These characters are safe for S3 key names but MUST be encoded for signature calculation.
+ * See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
+ */
+TEST_CASE("uriEncode(): S3 safe chars that need SigV4 encoding", "[AWS][auth][utility]")
+{
+  // S3 "safe" characters: ! - _ . * ' ( )
+  // SigV4 unreserved:     - _ . ~
+  // Characters that are S3 safe but need SigV4 encoding: ! * ' ( )
+
+  CHECK(uriEncode("!", false) == "%21");
+  CHECK(uriEncode("*", false) == "%2A");
+  CHECK(uriEncode("'", false) == "%27");
+  CHECK(uriEncode("(", false) == "%28");
+  CHECK(uriEncode(")", false) == "%29");
+
+  // Full path test with all these characters
+  const String safeChars = "/bucket/file!name*with'quotes(and)parens.js";
+  String       encoded   = uriEncode(safeChars, /* isObjectName */ true);
+  CHECK(encoded == "/bucket/file%21name%2Awith%27quotes%28and%29parens.js");
+}
+
+TEST_CASE("isUriEncoded(): mixed encoding with other S3 safe chars", "[AWS][auth][utility]")
+{
+  // Test mixed encoding with exclamation mark (S3 safe, needs SigV4 encoding)
+  CHECK(false == isUriEncoded("/path/file!name/%20space/", /* isObjectName */ true));
+
+  // Test mixed encoding with asterisk
+  CHECK(false == isUriEncoded("/path/file*name/%20space/", /* isObjectName */ true));
+
+  // Test mixed encoding with single quote
+  CHECK(false == isUriEncoded("/path/file'name/%20space/", /* isObjectName */ true));
+
+  // All properly encoded should return true
+  CHECK(true == isUriEncoded("/path/file%21name/%20space/", /* isObjectName */ true));
+  CHECK(true == isUriEncoded("/path/file%2Aname/%20space/", /* isObjectName */ true));
+  CHECK(true == isUriEncoded("/path/file%27name/%20space/", /* isObjectName */ true));
+}
+
+TEST_CASE("canonicalEncode(): handles all S3 safe chars correctly", "[AWS][auth][utility]")
+{
+  // Mixed encoding with exclamation mark
+  String mixed1 = "/path/file!name/%5Btest%5D/";
+  CHECK(canonicalEncode(mixed1, true) == "/path/file%21name/%5Btest%5D/");
+
+  // Mixed encoding with asterisk
+  String mixed2 = "/path/file*name/%5Btest%5D/";
+  CHECK(canonicalEncode(mixed2, true) == "/path/file%2Aname/%5Btest%5D/");
+
+  // Mixed encoding with single quote
+  String mixed3 = "/path/file'name/%5Btest%5D/";
+  CHECK(canonicalEncode(mixed3, true) == "/path/file%27name/%5Btest%5D/");
+
+  // All S3 safe chars unencoded
+  String allSafe = "/bucket/test!file*with'all(chars).js";
+  CHECK(canonicalEncode(allSafe, true) == "/bucket/test%21file%2Awith%27all%28chars%29.js");
+}
+
 /* base16Encode() ************************************************************************************************************** */
 
 TEST_CASE("base16Encode(): base16 encode empty string", "[utility]")
diff --git a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h
index 7d41a9c902a..51d12326c7f 100644
--- a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h
+++ b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h
@@ -120,6 +120,7 @@ class MockTsInterface : public TsInterface
 
 /* Expose the following methods only to the unit tests */
 String base16Encode(const char *in, size_t inLen);
+String uriDecode(const String &in);
 String uriEncode(const String &in, bool isObjectName = false);
 bool   isUriEncoded(const String &in, bool isObjectName = false);
 String canonicalEncode(const String &in, bool isObjectName = false);

From 42c92eaa882c3449412dcb8701f6b5f71871f8bd Mon Sep 17 00:00:00 2001
From: Bryan Call <bcall@apache.org>
Date: Tue, 13 Jan 2026 09:14:29 -0800
Subject: [PATCH 3/6] Fix lowercase hex normalization in AWS SigV4 URI encoding

AWS SigV4 requires uppercase hex digits in percent-encoding (e.g., %2F not %2f).
URLs with lowercase hex were incorrectly passing through isUriEncoded() without
normalization, causing potential signature mismatch with S3.

Changes:
- isUriEncoded() now returns false for lowercase hex digits, triggering
  canonicalEncode() to normalize via decode/re-encode
- Added comprehensive tests for lowercase hex handling
- Improved documentation comments

Fixes issue identified by Copilot review.
---
 plugins/origin_server_auth/aws_auth_v4.cc     | 51 ++++++----
 .../unit_tests/test_aws_auth_v4.cc            | 98 +++++++++++++++++++
 2 files changed, 130 insertions(+), 19 deletions(-)

diff --git a/plugins/origin_server_auth/aws_auth_v4.cc b/plugins/origin_server_auth/aws_auth_v4.cc
index d72e12ced00..a2818c98fe5 100644
--- a/plugins/origin_server_auth/aws_auth_v4.cc
+++ b/plugins/origin_server_auth/aws_auth_v4.cc
@@ -136,22 +136,23 @@ uriEncode(const String &in, bool isObjectName)
 }
 
 /**
- * @brief checks if the string is FULLY URI-encoded (AWS specific encoding version, see spec)
+ * @brief Check if a string is FULLY URI-encoded per AWS SigV4 canonical encoding rules.
  *
  * @see AWS spec: http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
+ * @see https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
  *
- * @note According to the following RFC if the string is encoded and contains '%' it should
- *       be followed by 2 hexadecimal symbols otherwise '%' should be encoded with %25:
+ * @note According to RFC 3986, if a string contains '%' it must be followed by 2 hexadecimal
+ *       symbols, otherwise '%' should be encoded as %25:
  *          https://tools.ietf.org/html/rfc3986#section-2.1
  *
- * @note This function checks if ALL characters that need encoding ARE encoded. A string with
- *       mixed encoding (some chars encoded, some not) returns false. This fixes a bug where
- *       URLs like /app/(channel)/%5B%5Bparts%5D%5D/ would incorrectly return true because
- *       %5B was found, even though parentheses () were not encoded.
+ * @note This function checks if ALL characters that need encoding ARE encoded, AND that
+ *       all percent-encoded sequences use UPPERCASE hex digits (per AWS SigV4 requirement).
+ *       A string with mixed encoding (some chars encoded, some not) or lowercase hex digits
+ *       returns false, which triggers canonicalEncode() to normalize via decode/re-encode.
  *
  * @param in string to be URI checked
- * @param isObjectName if true encoding didn't encode '/', kept it as it is.
- * @return true if fully encoded, false if not encoded or partially encoded.
+ * @param isObjectName if true, '/' is allowed unencoded (object name context).
+ * @return true if fully and correctly encoded, false if normalization is needed.
  */
 bool
 isUriEncoded(const String &in, bool isObjectName)
@@ -174,7 +175,13 @@ isUriEncoded(const String &in, bool isObjectName)
 
     if ('%' == c) {
       if (pos + 2 < in.length() && std::isxdigit(in[pos + 1]) && std::isxdigit(in[pos + 2])) {
-        /* Valid encoded sequence found */
+        /* Valid encoded sequence found, but AWS SigV4 requires UPPERCASE hex digits.
+         * If lowercase hex is found, return false to trigger normalization via decode/re-encode.
+         * See: https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
+         * "Letters in the hexadecimal value must be uppercase, for example "%1A"." */
+        if (std::islower(in[pos + 1]) || std::islower(in[pos + 2])) {
+          return false; /* Lowercase hex needs normalization to uppercase */
+        }
         foundEncodedChar  = true;
         pos              += 2; /* Skip past the hex digits */
         continue;
@@ -200,20 +207,26 @@ String
 canonicalEncode(const String &in, bool isObjectName)
 {
   if (isUriEncoded(in, isObjectName)) {
-    /* Fully URI-encoded, return as-is */
+    /* Fully URI-encoded with uppercase hex, return as-is */
     return in;
   }
 
-  /* Not fully encoded (either unencoded or partially encoded).
-   * Decode first to avoid double-encoding, then re-encode with AWS canonical rules.
+  /* Input needs normalization. This handles:
+   *   1. Unencoded strings - encode all reserved characters
+   *   2. Partially encoded - some chars encoded, some not (e.g., parentheses vs brackets)
+   *   3. Lowercase hex - convert %2f to %2F per AWS SigV4 requirement
    *
-   * This fixes a bug where mixed-encoding URLs (e.g., /app/(channel)/%5B%5Bparts%5D%5D/)
-   * would cause signature mismatch. The parentheses were not encoded but brackets were.
+   * Decode first to get the raw string, then re-encode with AWS canonical rules.
    *
-   * Example:
-   * - Input: /app/(channel)/%5B%5Bparts%5D%5D/page.js  (partially encoded)
-   * - Decode: /app/(channel)/[[parts]]/page.js
-   * - Encode: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
+   * Example (mixed encoding):
+   *   Input:  /app/(channel)/%5B%5Bparts%5D%5D/page.js  (parentheses not encoded)
+   *   Decode: /app/(channel)/[[parts]]/page.js
+   *   Encode: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
+   *
+   * Example (lowercase hex):
+   *   Input:  /path/%5btest%5d/file.js  (lowercase hex)
+   *   Decode: /path/[test]/file.js
+   *   Encode: /path/%5Btest%5D/file.js  (uppercase hex)
    */
   String decoded   = uriDecode(in);
   String canonical = uriEncode(decoded, isObjectName);
diff --git a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
index 917dc50f974..07a61b0fbf2 100644
--- a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
+++ b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
@@ -292,6 +292,104 @@ TEST_CASE("canonicalEncode(): handles all S3 safe chars correctly", "[AWS][auth]
   CHECK(canonicalEncode(allSafe, true) == "/bucket/test%21file%2Awith%27all%28chars%29.js");
 }
 
+/*
+ * BUG FIX TESTS: Copilot review identified these issues
+ *
+ * Issue 1: Lowercase hex digits
+ *   AWS SigV4 requires UPPERCASE hex digits in percent-encoding (e.g., %2F not %2f).
+ *   URLs with lowercase hex should be normalized to uppercase.
+ *
+ * Issue 2: Plus sign (%2B) handling
+ *   The decode-then-reencode approach may convert %2B (encoded +) to %20 (space).
+ *   Need to document/verify this matches AWS SigV4 expectations.
+ */
+
+TEST_CASE("isUriEncoded(): lowercase hex digits should NOT be considered fully encoded", "[AWS][auth][utility]")
+{
+  // AWS SigV4 requires uppercase hex digits. URLs with lowercase hex need normalization.
+  // Example: %2f should become %2F after canonical encoding
+
+  // Lowercase hex - should return false to trigger normalization
+  CHECK(false == isUriEncoded("/path/file%2ftest", /* isObjectName */ true)); // lowercase 'f'
+  CHECK(true == isUriEncoded("/path/file%2Ftest", /* isObjectName */ true));  // uppercase 'F' - this IS properly encoded
+  CHECK(false == isUriEncoded("/path/%5btest%5d", /* isObjectName */ true));  // lowercase brackets
+  CHECK(true == isUriEncoded("/path/%5Btest%5D", /* isObjectName */ true));   // uppercase brackets - properly encoded
+
+  // Mixed case - should return false
+  CHECK(false == isUriEncoded("/path/%5btest%5D", /* isObjectName */ true)); // mixed: lowercase 'b', uppercase 'D'
+  CHECK(false == isUriEncoded("/path/%5Btest%5d", /* isObjectName */ true)); // mixed: uppercase 'B', lowercase 'd'
+}
+
+TEST_CASE("canonicalEncode(): lowercase hex should be normalized to uppercase", "[AWS][auth][utility]")
+{
+  // AWS SigV4 requires uppercase hex digits
+  // Input with lowercase hex should be normalized to uppercase
+
+  // Lowercase slash encoding - when isObjectName=true, slashes are allowed unencoded
+  // So %2f decodes to / and stays as / (not re-encoded)
+  CHECK(canonicalEncode("/path/file%2ftest/", true) == "/path/file/test/");
+
+  // When isObjectName=false, slashes must be encoded, so %2f becomes %2F
+  CHECK(canonicalEncode("/path/file%2ftest/", false) == "%2Fpath%2Ffile%2Ftest%2F");
+
+  // Lowercase bracket encoding - brackets are always encoded
+  CHECK(canonicalEncode("/path/%5btest%5d/file.js", true) == "/path/%5Btest%5D/file.js");
+
+  // Mixed case should be normalized
+  CHECK(canonicalEncode("/path/%5btest%5D/file.js", true) == "/path/%5Btest%5D/file.js");
+
+  // Already uppercase should pass through
+  CHECK(canonicalEncode("/path/%5Btest%5D/file.js", true) == "/path/%5Btest%5D/file.js");
+}
+
+TEST_CASE("uriDecode(): lowercase hex decoding", "[AWS][auth][utility]")
+{
+  // uriDecode should handle both uppercase and lowercase hex
+  CHECK(uriDecode("%2f") == "/");
+  CHECK(uriDecode("%2F") == "/");
+  CHECK(uriDecode("%5b") == "[");
+  CHECK(uriDecode("%5B") == "[");
+  CHECK(uriDecode("/path/%5btest%5d") == "/path/[test]");
+  CHECK(uriDecode("/path/%5Btest%5D") == "/path/[test]");
+}
+
+TEST_CASE("canonicalEncode(): plus sign handling", "[AWS][auth][utility]")
+{
+  // Per AWS SigV4 spec, plus signs in URLs are treated as spaces and encoded as %20
+  // This test documents and verifies that behavior
+
+  // Literal plus sign should be encoded as %20 (treated as space per AWS spec)
+  CHECK(uriEncode("+", false) == "%20");
+
+  // Already-encoded plus (%2B) should be preserved as-is when the string is fully encoded
+  // The canonicalEncode function treats %2B as already-encoded and doesn't change it
+  String withEncodedPlus = "/path/file%2Bname/test";
+  String canonical       = canonicalEncode(withEncodedPlus, true);
+
+  // %2B is preserved because the string is already properly URI-encoded
+  CHECK(canonical == "/path/file%2Bname/test");
+
+  // However, if we have a literal + in a partially encoded string, it will be encoded as %20
+  String withLiteralPlus = "/path/file+name/%5Btest%5D"; // + is not encoded, brackets are
+  String canonicalMixed  = canonicalEncode(withLiteralPlus, true);
+  // After decode: /path/file+name/[test]
+  // After encode: + becomes %20, brackets become %5B%5D
+  CHECK(canonicalMixed == "/path/file%20name/%5Btest%5D");
+}
+
+TEST_CASE("canonicalEncode(): already canonical input is unchanged", "[AWS][auth][utility]")
+{
+  // Performance optimization: fully canonical input should pass through unchanged
+  // This includes properly uppercase hex-encoded strings
+
+  const String canonical = "/path/%5Btest%5D/%28name%29/file.js";
+  CHECK(canonicalEncode(canonical, true) == canonical);
+
+  // With encoded space
+  const String withSpace = "/path/file%20name/test.js";
+  CHECK(canonicalEncode(withSpace, true) == withSpace);
+}
+
 /* base16Encode() ************************************************************************************************************** */
 
 TEST_CASE("base16Encode(): base16 encode empty string", "[utility]")

From 411b5627e5c719b5c79510b1df96c400f3037b8e Mon Sep 17 00:00:00 2001
From: Bryan Call <bcall@apache.org>
Date: Tue, 13 Jan 2026 09:25:34 -0800
Subject: [PATCH 4/6] Optimize: skip decode/encode for strings already in
 canonical form

Simple URIs like /path/foo.jpg with only unreserved characters were
unnecessarily being decoded and re-encoded. Now isUriEncoded() returns
true for strings that are already in canonical form (no reserved chars
needing encoding), avoiding the unnecessary processing.
---
 plugins/origin_server_auth/aws_auth_v4.cc     | 28 ++++++++-----------
 .../unit_tests/test_aws_auth_v4.cc            | 27 +++++++++++-------
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/plugins/origin_server_auth/aws_auth_v4.cc b/plugins/origin_server_auth/aws_auth_v4.cc
index a2818c98fe5..c73c656d525 100644
--- a/plugins/origin_server_auth/aws_auth_v4.cc
+++ b/plugins/origin_server_auth/aws_auth_v4.cc
@@ -157,14 +157,12 @@ uriEncode(const String &in, bool isObjectName)
 bool
 isUriEncoded(const String &in, bool isObjectName)
 {
-  bool foundEncodedChar = false;
-
   for (size_t pos = 0; pos < in.length(); pos++) {
     char c = in[pos];
 
     if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
-      /* found a unreserved character which should not have been be encoded regardless
-       * 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~'.  */
+      /* Unreserved characters don't need encoding:
+       * 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~' */
       continue;
     }
 
@@ -175,32 +173,30 @@ isUriEncoded(const String &in, bool isObjectName)
 
     if ('%' == c) {
       if (pos + 2 < in.length() && std::isxdigit(in[pos + 1]) && std::isxdigit(in[pos + 2])) {
-        /* Valid encoded sequence found, but AWS SigV4 requires UPPERCASE hex digits.
-         * If lowercase hex is found, return false to trigger normalization via decode/re-encode.
+        /* Valid percent-encoded sequence found. AWS SigV4 requires UPPERCASE hex digits.
+         * If lowercase hex is found, return false to trigger normalization.
          * See: https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
          * "Letters in the hexadecimal value must be uppercase, for example "%1A"." */
         if (std::islower(in[pos + 1]) || std::islower(in[pos + 2])) {
           return false; /* Lowercase hex needs normalization to uppercase */
         }
-        foundEncodedChar  = true;
-        pos              += 2; /* Skip past the hex digits */
+        pos += 2; /* Skip past the hex digits */
         continue;
       } else {
-        /* lonely '%' should have been encoded with %25 according to the RFC */
+        /* Lone '%' or incomplete sequence - needs encoding as %25 */
         return false;
       }
     }
 
-    /* Any other character that reaches here should have been encoded but wasn't.
-     * This includes: space, '/', '(', ')', '[', ']', etc.
-     * Return false to indicate the string is not fully encoded. */
+    /* Any other character needs encoding (space, '(', ')', '[', ']', etc.) */
     return false;
   }
 
-  /* If we found at least one encoded character and no unencoded special chars,
-   * the string is fully encoded. If no encoded chars were found, the string
-   * contains only unreserved chars (and possibly '/' for object names). */
-  return foundEncodedChar;
+  /* String is already in canonical form:
+   *   - Only contains unreserved chars, slashes (for object names), or
+   *   - Properly percent-encoded sequences with uppercase hex
+   * No decode/re-encode needed. */
+  return true;
 }
 
 String
diff --git a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
index 07a61b0fbf2..6aa2b815b85 100644
--- a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
+++ b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
@@ -70,7 +70,8 @@ TEST_CASE("uriEncode(): encode reserved chars in an object name", "[AWS][auth][u
 
 TEST_CASE("isUriEncoded(): check an empty input", "[AWS][auth][utility]")
 {
-  CHECK(false == isUriEncoded(""));
+  // Empty string has no characters that need encoding - it's already canonical
+  CHECK(true == isUriEncoded(""));
 }
 
 TEST_CASE("isUriEncoded(): '%' and nothing else", "[AWS][auth][utility]")
@@ -115,23 +116,29 @@ TEST_CASE("isUriEncoded(): '/' in strings which are not object names", "[AWS][au
 
 TEST_CASE("isUriEncoded(): '/' in strings that are object names", "[AWS][auth][utility]")
 {
-  // This is an object name so having '/' is normal but not enough to conclude if it is encoded or not.
-  CHECK(false == isUriEncoded("XXXXX/XXXXXX", /* isObjectName */ true));
+  // Object name with only unreserved chars and slashes - already canonical, return true
+  CHECK(true == isUriEncoded("XXXXX/XXXXXX", /* isObjectName */ true));
 
-  // There is no '/' and '%2F' shows it is encoded.
+  // Encoded slash - properly encoded, return true
   CHECK(true == isUriEncoded("XXXXX%2FXXXXXX", /* isObjectName */ true));
 
-  // This is an object name so having '/' is normal and because of '%20' we can conclude it was encoded.
+  // Mix of slash and encoded space - properly encoded, return true
   CHECK(true == isUriEncoded("XXXXX/%20XXXXX", /* isObjectName */ true));
 }
 
 TEST_CASE("isUriEncoded(): no reserved chars in the input", "[AWS][auth][utility]")
 {
-  const String encoded = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-                         "abcdefghijklmnopqrstuvwxyz"
-                         "0123456789"
-                         "-._~";
-  CHECK(false == isUriEncoded(encoded));
+  // Strings with only unreserved characters are already in canonical form
+  // and don't need encoding - return true to skip unnecessary decode/encode
+  const String unreserved = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                            "abcdefghijklmnopqrstuvwxyz"
+                            "0123456789"
+                            "-._~";
+  CHECK(true == isUriEncoded(unreserved));
+
+  // Simple paths with only unreserved chars should also return true
+  CHECK(true == isUriEncoded("/something/foo.jpg", /* isObjectName */ true));
+  CHECK(true == isUriEncoded("/path/to/file-name_v2.txt", /* isObjectName */ true));
 }
 
 TEST_CASE("isUriEncoded(): reserved chars in the input", "[AWS][auth][utility]")

From 6841171890ef615661db0e65b95037a5659503a1 Mon Sep 17 00:00:00 2001
From: Bryan Call <bcall@apache.org>
Date: Tue, 13 Jan 2026 09:46:59 -0800
Subject: [PATCH 5/6] Address Copilot review: unsigned char casts and comment
 tense

- Cast char to unsigned char before calling isxdigit/isalnum/islower
  to avoid undefined behavior with signed char
- Update comments to past tense since the bug is now fixed
- Fix bounds check order in isUriEncoded to check length before access
---
 plugins/origin_server_auth/aws_auth_v4.cc     | 34 +++++++++++--------
 .../unit_tests/test_aws_auth_v4.cc            | 23 +++++++------
 .../s3_url_encoding.test.py                   | 31 +++++++++--------
 3 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/plugins/origin_server_auth/aws_auth_v4.cc b/plugins/origin_server_auth/aws_auth_v4.cc
index c73c656d525..3e90c0b3297 100644
--- a/plugins/origin_server_auth/aws_auth_v4.cc
+++ b/plugins/origin_server_auth/aws_auth_v4.cc
@@ -81,7 +81,8 @@ uriDecode(const String &in)
   result.reserve(in.length()); /* Decoded string will be same size or smaller */
 
   for (size_t i = 0; i < in.length(); i++) {
-    if (in[i] == '%' && i + 2 < in.length() && std::isxdigit(in[i + 1]) && std::isxdigit(in[i + 2])) {
+    if (in[i] == '%' && i + 2 < in.length() && std::isxdigit(static_cast<unsigned char>(in[i + 1])) &&
+        std::isxdigit(static_cast<unsigned char>(in[i + 2]))) {
       /* Decode %XX to character */
       char hex[3]  = {in[i + 1], in[i + 2], '\0'};
       result      += static_cast<char>(std::strtol(hex, nullptr, 16));
@@ -158,9 +159,9 @@ bool
 isUriEncoded(const String &in, bool isObjectName)
 {
   for (size_t pos = 0; pos < in.length(); pos++) {
-    char c = in[pos];
+    unsigned char c = static_cast<unsigned char>(in[pos]);
 
-    if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
+    if (std::isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
       /* Unreserved characters don't need encoding:
        * 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~' */
       continue;
@@ -172,20 +173,23 @@ isUriEncoded(const String &in, bool isObjectName)
     }
 
     if ('%' == c) {
-      if (pos + 2 < in.length() && std::isxdigit(in[pos + 1]) && std::isxdigit(in[pos + 2])) {
-        /* Valid percent-encoded sequence found. AWS SigV4 requires UPPERCASE hex digits.
-         * If lowercase hex is found, return false to trigger normalization.
-         * See: https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
-         * "Letters in the hexadecimal value must be uppercase, for example "%1A"." */
-        if (std::islower(in[pos + 1]) || std::islower(in[pos + 2])) {
-          return false; /* Lowercase hex needs normalization to uppercase */
+      if (pos + 2 < in.length()) {
+        unsigned char c1 = static_cast<unsigned char>(in[pos + 1]);
+        unsigned char c2 = static_cast<unsigned char>(in[pos + 2]);
+        if (std::isxdigit(c1) && std::isxdigit(c2)) {
+          /* Valid percent-encoded sequence found. AWS SigV4 requires UPPERCASE hex digits.
+           * If lowercase hex is found, return false to trigger normalization.
+           * See: https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
+           * "Letters in the hexadecimal value must be uppercase, for example "%1A"." */
+          if (std::islower(c1) || std::islower(c2)) {
+            return false; /* Lowercase hex needs normalization to uppercase */
+          }
+          pos += 2; /* Skip past the hex digits */
+          continue;
         }
-        pos += 2; /* Skip past the hex digits */
-        continue;
-      } else {
-        /* Lone '%' or incomplete sequence - needs encoding as %25 */
-        return false;
       }
+      /* Lone '%' or incomplete sequence - needs encoding as %25 */
+      return false;
     }
 
     /* Any other character needs encoding (space, '(', ')', '[', ']', etc.) */
diff --git a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
index 6aa2b815b85..dbb345784f2 100644
--- a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
+++ b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
@@ -150,18 +150,21 @@ TEST_CASE("isUriEncoded(): reserved chars in the input", "[AWS][auth][utility]")
 }
 
 /*
- * BUG: isUriEncoded() returns true if ANY %XX sequence is found, even if other
- * reserved characters are NOT encoded. This causes canonicalEncode() to skip
- * encoding, resulting in signature mismatch with S3.
+ * This test verifies the fix for a bug where isUriEncoded() would return true
+ * if ANY %XX sequence was found, even if other reserved characters were NOT
+ * encoded. That caused canonicalEncode() to skip encoding, resulting in
+ * signature mismatch with S3.
  *
- * Example from YTSATS-4835:
- *   Client sends: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
- *   ATS decodes to: /app/(channel)/%5B%5Bparts%5D%5D/page.js  (partial decode)
- *   isUriEncoded() sees %5B -> returns true (incorrectly assumes fully encoded)
- *   canonicalEncode() returns as-is: /app/(channel)/%5B%5Bparts%5D%5D/page.js
- *   Signature calculated for partially-encoded path
- *   S3 expects signature for: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
+ * Historical example (now fixed):
+ *   Client sent: /app/(channel)/%5B%5Bparts%5D%5D/page.js  (mixed encoding)
+ *   Old isUriEncoded() saw %5B -> returned true (incorrectly assumed fully encoded)
+ *   Old canonicalEncode() returned as-is without normalizing
+ *   Signature was calculated for partially-encoded path
+ *   S3 expected signature for: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
  *   Result: 403 signature mismatch
+ *
+ * With the fix, isUriEncoded() now returns false for mixed-encoding URLs,
+ * triggering decode/re-encode to produce the correct canonical form.
  */
 TEST_CASE("isUriEncoded(): mixed encoding with unencoded parentheses and encoded brackets", "[AWS][auth][utility]")
 {
diff --git a/tests/gold_tests/pluginTest/origin_server_auth/s3_url_encoding.test.py b/tests/gold_tests/pluginTest/origin_server_auth/s3_url_encoding.test.py
index 3282d60e2f7..7c2e4086e72 100644
--- a/tests/gold_tests/pluginTest/origin_server_auth/s3_url_encoding.test.py
+++ b/tests/gold_tests/pluginTest/origin_server_auth/s3_url_encoding.test.py
@@ -1,23 +1,26 @@
 '''
-Test origin_server_auth URL encoding bug (YTSATS-4835)
+Test origin_server_auth URL encoding fix (verifies fix for mixed-encoding bug)
 
-This test demonstrates a bug where mixed URL encoding in the request path
-causes signature mismatch with S3.
+This test verifies the fix for a bug where mixed URL encoding in request paths
+caused signature mismatch with S3.
 
-Bug: When a URL has SOME characters encoded (e.g., %5B) but others NOT encoded
-(e.g., parentheses), the isUriEncoded() function incorrectly returns true,
-causing canonicalEncode() to skip re-encoding. This results in a signature
-calculated for a partially-encoded path, which won't match what S3 expects.
+Fixed bug: When a URL had SOME characters encoded (e.g., %5B) but others NOT
+encoded (e.g., parentheses), the old isUriEncoded() function incorrectly
+returned true, causing canonicalEncode() to skip re-encoding. This resulted
+in a signature calculated for a partially-encoded path, which didn't match
+what S3 expected.
 
-Example:
+With the fix, isUriEncoded() now correctly returns false for mixed-encoding
+URLs, triggering the decode/re-encode path to normalize the URL.
+
+Example (now handled correctly):
   Client sends:    /app/(channel)/%5B%5Bparts%5D%5D/page.js
-  isUriEncoded():  Returns true (finds %5B)
-  canonicalEncode(): Returns as-is (thinks already encoded)
-  Signature for:   /app/(channel)/%5B%5Bparts%5D%5D/page.js  <- WRONG
-  S3 expects:      /app/%28channel%29/%5B%5Bparts%5D%5D/page.js  <- CORRECT
+  isUriEncoded():  Returns false (detects unencoded parentheses)
+  canonicalEncode(): Decodes then re-encodes properly
+  Signature for:   /app/%28channel%29/%5B%5Bparts%5D%5D/page.js  <- CORRECT
 
-This test sends requests with various URL encodings and captures what
-the origin receives, including the Authorization header signature.
+This test sends requests with various URL encodings and verifies that
+the origin receives correctly normalized Authorization headers.
 '''
 #  Licensed to the Apache Software Foundation (ASF) under one
 #  or more contributor license agreements.  See the NOTICE file

From 6e4c062b247f1c20e3f21c3fefbea367b313d208 Mon Sep 17 00:00:00 2001
From: Bryan Call <bcall@apache.org>
Date: Tue, 13 Jan 2026 09:57:53 -0800
Subject: [PATCH 6/6] Rename isUriEncoded() to isCanonical() for clarity

The function now returns true for strings that are already in canonical form,
which includes both:
- Properly percent-encoded strings with uppercase hex
- Strings with only unreserved characters (no encoding needed)

The old name 'isUriEncoded' was misleading since it returned true for
strings that have NO encoding at all.
---
 plugins/origin_server_auth/aws_auth_v4.cc     | 24 ++---
 .../unit_tests/test_aws_auth_v4.cc            | 98 +++++++++----------
 .../unit_tests/test_aws_auth_v4.h             |  2 +-
 3 files changed, 60 insertions(+), 64 deletions(-)

diff --git a/plugins/origin_server_auth/aws_auth_v4.cc b/plugins/origin_server_auth/aws_auth_v4.cc
index 3e90c0b3297..547b30899df 100644
--- a/plugins/origin_server_auth/aws_auth_v4.cc
+++ b/plugins/origin_server_auth/aws_auth_v4.cc
@@ -137,26 +137,22 @@ uriEncode(const String &in, bool isObjectName)
 }
 
 /**
- * @brief Check if a string is FULLY URI-encoded per AWS SigV4 canonical encoding rules.
+ * @brief Check if a string is already in AWS SigV4 canonical form.
+ *
+ * A string is canonical if it either:
+ *   1. Contains only unreserved characters (A-Z, a-z, 0-9, '-', '.', '_', '~')
+ *      and optionally '/' for object names - no encoding needed
+ *   2. Is properly percent-encoded with UPPERCASE hex digits
  *
  * @see AWS spec: http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
  * @see https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
  *
- * @note According to RFC 3986, if a string contains '%' it must be followed by 2 hexadecimal
- *       symbols, otherwise '%' should be encoded as %25:
- *          https://tools.ietf.org/html/rfc3986#section-2.1
- *
- * @note This function checks if ALL characters that need encoding ARE encoded, AND that
- *       all percent-encoded sequences use UPPERCASE hex digits (per AWS SigV4 requirement).
- *       A string with mixed encoding (some chars encoded, some not) or lowercase hex digits
- *       returns false, which triggers canonicalEncode() to normalize via decode/re-encode.
- *
- * @param in string to be URI checked
+ * @param in string to check
  * @param isObjectName if true, '/' is allowed unencoded (object name context).
- * @return true if fully and correctly encoded, false if normalization is needed.
+ * @return true if already canonical (no processing needed), false if normalization required.
  */
 bool
-isUriEncoded(const String &in, bool isObjectName)
+isCanonical(const String &in, bool isObjectName)
 {
   for (size_t pos = 0; pos < in.length(); pos++) {
     unsigned char c = static_cast<unsigned char>(in[pos]);
@@ -206,7 +202,7 @@ isUriEncoded(const String &in, bool isObjectName)
 String
 canonicalEncode(const String &in, bool isObjectName)
 {
-  if (isUriEncoded(in, isObjectName)) {
+  if (isCanonical(in, isObjectName)) {
     /* Fully URI-encoded with uppercase hex, return as-is */
     return in;
   }
diff --git a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
index dbb345784f2..8fbdc459100 100644
--- a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
+++ b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.cc
@@ -68,65 +68,65 @@ TEST_CASE("uriEncode(): encode reserved chars in an object name", "[AWS][auth][u
   CHECK_FALSE(encoded.compare("%20/%21%22%23%24%25%26%27%28%29%2A%20%2C%3A%3B%3C%3D%3E%3F%40%5B%5C%5D%5E%60%7B%7C%7D"));
 }
 
-TEST_CASE("isUriEncoded(): check an empty input", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): check an empty input", "[AWS][auth][utility]")
 {
   // Empty string has no characters that need encoding - it's already canonical
-  CHECK(true == isUriEncoded(""));
+  CHECK(true == isCanonical(""));
 }
 
-TEST_CASE("isUriEncoded(): '%' and nothing else", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): '%' and nothing else", "[AWS][auth][utility]")
 {
-  CHECK(false == isUriEncoded("%"));
+  CHECK(false == isCanonical("%"));
 }
 
-TEST_CASE("isUriEncoded(): '%' but no hex digits", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): '%' but no hex digits", "[AWS][auth][utility]")
 {
-  CHECK(false == isUriEncoded("XXX%XXX"));
+  CHECK(false == isCanonical("XXX%XXX"));
 }
 
-TEST_CASE("isUriEncoded(): '%' but only one hex digit", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): '%' but only one hex digit", "[AWS][auth][utility]")
 {
-  CHECK(false == isUriEncoded("XXXXX%1XXXXXX"));
-  CHECK(false == isUriEncoded("XXX%1")); // test end of string case
+  CHECK(false == isCanonical("XXXXX%1XXXXXX"));
+  CHECK(false == isCanonical("XXX%1")); // test end of string case
 }
 
-TEST_CASE("isUriEncoded(): '%' and 2 hex digit", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): '%' and 2 hex digit", "[AWS][auth][utility]")
 {
-  CHECK(true == isUriEncoded("XXX%12XXX"));
-  CHECK(true == isUriEncoded("XXX%12")); // test end of string case
+  CHECK(true == isCanonical("XXX%12XXX"));
+  CHECK(true == isCanonical("XXX%12")); // test end of string case
 }
 
-TEST_CASE("isUriEncoded(): space not encoded", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): space not encoded", "[AWS][auth][utility]")
 {
   // Having a space always means it was not encoded.
-  CHECK(false == isUriEncoded("XXXXX XXXXXX"));
+  CHECK(false == isCanonical("XXXXX XXXXXX"));
 }
 
-TEST_CASE("isUriEncoded(): '/' in strings which are not object names", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): '/' in strings which are not object names", "[AWS][auth][utility]")
 {
   // This is not an object name so if we have '/' => the string was not encoded.
-  CHECK(false == isUriEncoded("XXXXX/XXXXXX", /* isObjectName */ false));
+  CHECK(false == isCanonical("XXXXX/XXXXXX", /* isObjectName */ false));
 
   // There is no '/' and '%2F' shows that it was encoded.
-  CHECK(true == isUriEncoded("XXXXX%2FXXXXXX", /* isObjectName */ false));
+  CHECK(true == isCanonical("XXXXX%2FXXXXXX", /* isObjectName */ false));
 
   // This is not an object name so if we have '/' => the string was not encoded despite '%20' in it.
-  CHECK(false == isUriEncoded("XXXXX/%20XXXXX", /* isObjectName */ false));
+  CHECK(false == isCanonical("XXXXX/%20XXXXX", /* isObjectName */ false));
 }
 
-TEST_CASE("isUriEncoded(): '/' in strings that are object names", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): '/' in strings that are object names", "[AWS][auth][utility]")
 {
   // Object name with only unreserved chars and slashes - already canonical, return true
-  CHECK(true == isUriEncoded("XXXXX/XXXXXX", /* isObjectName */ true));
+  CHECK(true == isCanonical("XXXXX/XXXXXX", /* isObjectName */ true));
 
   // Encoded slash - properly encoded, return true
-  CHECK(true == isUriEncoded("XXXXX%2FXXXXXX", /* isObjectName */ true));
+  CHECK(true == isCanonical("XXXXX%2FXXXXXX", /* isObjectName */ true));
 
   // Mix of slash and encoded space - properly encoded, return true
-  CHECK(true == isUriEncoded("XXXXX/%20XXXXX", /* isObjectName */ true));
+  CHECK(true == isCanonical("XXXXX/%20XXXXX", /* isObjectName */ true));
 }
 
-TEST_CASE("isUriEncoded(): no reserved chars in the input", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): no reserved chars in the input", "[AWS][auth][utility]")
 {
   // Strings with only unreserved characters are already in canonical form
   // and don't need encoding - return true to skip unnecessary decode/encode
@@ -134,39 +134,39 @@ TEST_CASE("isUriEncoded(): no reserved chars in the input", "[AWS][auth][utility
                             "abcdefghijklmnopqrstuvwxyz"
                             "0123456789"
                             "-._~";
-  CHECK(true == isUriEncoded(unreserved));
+  CHECK(true == isCanonical(unreserved));
 
   // Simple paths with only unreserved chars should also return true
-  CHECK(true == isUriEncoded("/something/foo.jpg", /* isObjectName */ true));
-  CHECK(true == isUriEncoded("/path/to/file-name_v2.txt", /* isObjectName */ true));
+  CHECK(true == isCanonical("/something/foo.jpg", /* isObjectName */ true));
+  CHECK(true == isCanonical("/path/to/file-name_v2.txt", /* isObjectName */ true));
 }
 
-TEST_CASE("isUriEncoded(): reserved chars in the input", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): reserved chars in the input", "[AWS][auth][utility]")
 {
   // some printable but reserved chars " /!\"#$%&'()*+,:;<=>?@[\\]^`{|}"
   const String encoded = "%20%2F%21%22%23%24%25%26%27%28%29%2A%2B%2C%3A%3B%3C%3D%3E%3F%40%5B%5C%5D%5E%60%7B%7C%7D";
 
-  CHECK(true == isUriEncoded(encoded));
+  CHECK(true == isCanonical(encoded));
 }
 
 /*
- * This test verifies the fix for a bug where isUriEncoded() would return true
+ * This test verifies the fix for a bug where isCanonical() would return true
  * if ANY %XX sequence was found, even if other reserved characters were NOT
  * encoded. That caused canonicalEncode() to skip encoding, resulting in
  * signature mismatch with S3.
  *
  * Historical example (now fixed):
  *   Client sent: /app/(channel)/%5B%5Bparts%5D%5D/page.js  (mixed encoding)
- *   Old isUriEncoded() saw %5B -> returned true (incorrectly assumed fully encoded)
+ *   Old isCanonical() saw %5B -> returned true (incorrectly assumed fully encoded)
  *   Old canonicalEncode() returned as-is without normalizing
  *   Signature was calculated for partially-encoded path
  *   S3 expected signature for: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
  *   Result: 403 signature mismatch
  *
- * With the fix, isUriEncoded() now returns false for mixed-encoding URLs,
+ * With the fix, isCanonical() now returns false for mixed-encoding URLs,
  * triggering decode/re-encode to produce the correct canonical form.
  */
-TEST_CASE("isUriEncoded(): mixed encoding with unencoded parentheses and encoded brackets", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): mixed encoding with unencoded parentheses and encoded brackets", "[AWS][auth][utility]")
 {
   // Path with parentheses NOT encoded but brackets ARE encoded
   // This is the bug case from YTSATS-4835
@@ -174,17 +174,17 @@ TEST_CASE("isUriEncoded(): mixed encoding with unencoded parentheses and encoded
 
   // Fixed behavior: returns false because parentheses () are not encoded
   // Even though %5B is present, the string is only PARTIALLY encoded
-  CHECK(false == isUriEncoded(mixedEncoding, /* isObjectName */ true));
+  CHECK(false == isCanonical(mixedEncoding, /* isObjectName */ true));
 }
 
-TEST_CASE("isUriEncoded(): unencoded parentheses should indicate not fully encoded", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): unencoded parentheses should indicate not fully encoded", "[AWS][auth][utility]")
 {
   // Parentheses are reserved characters per AWS spec and should be encoded
   // If we see unencoded parentheses, the string is NOT properly URI-encoded
   const String withParens = "/app/(channel)/test.js";
 
   // Returns false (no encoded chars found, string needs encoding)
-  CHECK(false == isUriEncoded(withParens, /* isObjectName */ true));
+  CHECK(false == isCanonical(withParens, /* isObjectName */ true));
 
   // After encoding, parentheses become %28 and %29
   String encoded = uriEncode(withParens, /* isObjectName */ true);
@@ -266,21 +266,21 @@ TEST_CASE("uriEncode(): S3 safe chars that need SigV4 encoding", "[AWS][auth][ut
   CHECK(encoded == "/bucket/file%21name%2Awith%27quotes%28and%29parens.js");
 }
 
-TEST_CASE("isUriEncoded(): mixed encoding with other S3 safe chars", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): mixed encoding with other S3 safe chars", "[AWS][auth][utility]")
 {
   // Test mixed encoding with exclamation mark (S3 safe, needs SigV4 encoding)
-  CHECK(false == isUriEncoded("/path/file!name/%20space/", /* isObjectName */ true));
+  CHECK(false == isCanonical("/path/file!name/%20space/", /* isObjectName */ true));
 
   // Test mixed encoding with asterisk
-  CHECK(false == isUriEncoded("/path/file*name/%20space/", /* isObjectName */ true));
+  CHECK(false == isCanonical("/path/file*name/%20space/", /* isObjectName */ true));
 
   // Test mixed encoding with single quote
-  CHECK(false == isUriEncoded("/path/file'name/%20space/", /* isObjectName */ true));
+  CHECK(false == isCanonical("/path/file'name/%20space/", /* isObjectName */ true));
 
   // All properly encoded should return true
-  CHECK(true == isUriEncoded("/path/file%21name/%20space/", /* isObjectName */ true));
-  CHECK(true == isUriEncoded("/path/file%2Aname/%20space/", /* isObjectName */ true));
-  CHECK(true == isUriEncoded("/path/file%27name/%20space/", /* isObjectName */ true));
+  CHECK(true == isCanonical("/path/file%21name/%20space/", /* isObjectName */ true));
+  CHECK(true == isCanonical("/path/file%2Aname/%20space/", /* isObjectName */ true));
+  CHECK(true == isCanonical("/path/file%27name/%20space/", /* isObjectName */ true));
 }
 
 TEST_CASE("canonicalEncode(): handles all S3 safe chars correctly", "[AWS][auth][utility]")
@@ -314,20 +314,20 @@ TEST_CASE("canonicalEncode(): handles all S3 safe chars correctly", "[AWS][auth]
  *   Need to document/verify this matches AWS SigV4 expectations.
  */
 
-TEST_CASE("isUriEncoded(): lowercase hex digits should NOT be considered fully encoded", "[AWS][auth][utility]")
+TEST_CASE("isCanonical(): lowercase hex digits should NOT be considered fully encoded", "[AWS][auth][utility]")
 {
   // AWS SigV4 requires uppercase hex digits. URLs with lowercase hex need normalization.
   // Example: %2f should become %2F after canonical encoding
 
   // Lowercase hex - should return false to trigger normalization
-  CHECK(false == isUriEncoded("/path/file%2ftest", /* isObjectName */ true)); // lowercase 'f'
-  CHECK(true == isUriEncoded("/path/file%2Ftest", /* isObjectName */ true));  // uppercase 'F' - this IS properly encoded
-  CHECK(false == isUriEncoded("/path/%5btest%5d", /* isObjectName */ true));  // lowercase brackets
-  CHECK(true == isUriEncoded("/path/%5Btest%5D", /* isObjectName */ true));   // uppercase brackets - properly encoded
+  CHECK(false == isCanonical("/path/file%2ftest", /* isObjectName */ true)); // lowercase 'f'
+  CHECK(true == isCanonical("/path/file%2Ftest", /* isObjectName */ true));  // uppercase 'F' - this IS properly encoded
+  CHECK(false == isCanonical("/path/%5btest%5d", /* isObjectName */ true));  // lowercase brackets
+  CHECK(true == isCanonical("/path/%5Btest%5D", /* isObjectName */ true));   // uppercase brackets - properly encoded
 
   // Mixed case - should return false
-  CHECK(false == isUriEncoded("/path/%5btest%5D", /* isObjectName */ true)); // mixed: lowercase 'b', uppercase 'D'
-  CHECK(false == isUriEncoded("/path/%5Btest%5d", /* isObjectName */ true)); // mixed: uppercase 'B', lowercase 'd'
+  CHECK(false == isCanonical("/path/%5btest%5D", /* isObjectName */ true)); // mixed: lowercase 'b', uppercase 'D'
+  CHECK(false == isCanonical("/path/%5Btest%5d", /* isObjectName */ true)); // mixed: uppercase 'B', lowercase 'd'
 }
 
 TEST_CASE("canonicalEncode(): lowercase hex should be normalized to uppercase", "[AWS][auth][utility]")
diff --git a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h
index 51d12326c7f..02507c49a05 100644
--- a/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h
+++ b/plugins/origin_server_auth/unit_tests/test_aws_auth_v4.h
@@ -122,7 +122,7 @@ class MockTsInterface : public TsInterface
 String base16Encode(const char *in, size_t inLen);
 String uriDecode(const String &in);
 String uriEncode(const String &in, bool isObjectName = false);
-bool   isUriEncoded(const String &in, bool isObjectName = false);
+bool   isCanonical(const String &in, bool isObjectName = false);
 String canonicalEncode(const String &in, bool isObjectName = false);
 String lowercase(const char *in, size_t inLen);
 String trimWhiteSpacesAndSqueezeInnerSpaces(const char *in, size_t inLen);