From 87cdaff04eefb84800e019346820b45263e706ca Mon Sep 17 00:00:00 2001
From: greymoth <246701683+greymoth-jp@users.noreply.github.com>
Date: Tue, 30 Jun 2026 05:51:31 +0900
Subject: [PATCH] fix(text): avoid splitting surrogate pairs when truncating
 text

---
 src/graphic/helper/parseText.ts               | 13 +++-
 .../graphic/truncateTextSurrogate.test.ts     | 73 +++++++++++++++++++
 2 files changed, 85 insertions(+), 1 deletion(-)
 create mode 100644 test/ut/spec/graphic/truncateTextSurrogate.test.ts

diff --git a/src/graphic/helper/parseText.ts b/src/graphic/helper/parseText.ts
index 69c63a694..875b3a51b 100644
--- a/src/graphic/helper/parseText.ts
+++ b/src/graphic/helper/parseText.ts
@@ -153,12 +153,23 @@ function truncateSingleLine(
             break;
         }
 
-        const subLength = j === 0
+        let subLength = j === 0
             ? estimateLength(textLine, contentWidth, fontMeasureInfo)
             : lineWidth > 0
             ? Math.floor(textLine.length * contentWidth / lineWidth)
             : 0;
 
+        // `subLength` is a UTF-16 code unit count, so it can fall between the
+        // two halves of a surrogate pair (for example CJK Extension B characters
+        // such as U+20BB7). Slicing there would leave an orphaned lead surrogate
+        // and corrupt the character, so step back to the pair boundary.
+        if (subLength > 0 && subLength < textLine.length) {
+            const lastCharCode = textLine.charCodeAt(subLength - 1);
+            if (lastCharCode >= 0xD800 && lastCharCode <= 0xDBFF) {
+                subLength -= 1;
+            }
+        }
+
         textLine = textLine.substr(0, subLength);
         lineWidth = measureWidth(fontMeasureInfo, textLine);
     }
diff --git a/test/ut/spec/graphic/truncateTextSurrogate.test.ts b/test/ut/spec/graphic/truncateTextSurrogate.test.ts
new file mode 100644
index 000000000..bab13656e
--- /dev/null
+++ b/test/ut/spec/graphic/truncateTextSurrogate.test.ts
@@ -0,0 +1,73 @@
+import { truncateText } from '../../../../src/graphic/helper/parseText';
+import { platformApi, setPlatformAPI } from '../../../../src/core/platform';
+
+// '𠮷' is U+20BB7, encoded as the surrogate pair 0xD842 0xDFB7.
+const KANJI = '𠮷';
+
+// A deterministic width model so the test does not depend on a real canvas:
+// ASCII code points have width 1, everything else (e.g. fullwidth CJK) width 2.
+// A surrogate pair is measured as a single width-2 glyph, the same way a real
+// `measureText` would treat it.
+function fakeMeasureText(text: string): { width: number } {
+    let width = 0;
+    for (let i = 0; i < text.length; i++) {
+        const code = text.charCodeAt(i);
+        if (code >= 0xD800 && code <= 0xDBFF && i + 1 < text.length) {
+            width += 2;
+            i++;
+        }
+        else {
+            width += code < 0x80 ? 1 : 2;
+        }
+    }
+    return { width };
+}
+
+function hasLoneSurrogate(str: string): boolean {
+    for (let i = 0; i < str.length; i++) {
+        const code = str.charCodeAt(i);
+        if (code >= 0xD800 && code <= 0xDBFF) {
+            const next = str.charCodeAt(i + 1);
+            if (!(next >= 0xDC00 && next <= 0xDFFF)) {
+                return true;
+            }
+            i++;
+        }
+        else if (code >= 0xDC00 && code <= 0xDFFF) {
+            return true;
+        }
+    }
+    return false;
+}
+
+describe('truncateText surrogate pairs', function () {
+    // A unique font so `ensureFontMeasureInfo` does not reuse another test's cache.
+    const font = '12px ZRTruncateSurrogateTestFont';
+    let originalMeasureText: typeof platformApi.measureText;
+
+    beforeAll(function () {
+        originalMeasureText = platformApi.measureText;
+        setPlatformAPI({ measureText: fakeMeasureText });
+    });
+
+    afterAll(function () {
+        setPlatformAPI({ measureText: originalMeasureText });
+    });
+
+    it('should not split a surrogate pair (CJK Extension B) when truncating', function () {
+        const result = truncateText(KANJI + KANJI + KANJI + KANJI, 6, font, '');
+        expect(hasLoneSurrogate(result)).toBe(false);
+        expect(result).toBe(KANJI);
+    });
+
+    it('should keep complete characters when the text fits', function () {
+        const text = KANJI + KANJI;
+        expect(truncateText(text, 20, font, '')).toBe(text);
+    });
+
+    it('should keep truncating ASCII text by character as before', function () {
+        const result = truncateText('aaaaaaaa', 4, font, '');
+        expect(hasLoneSurrogate(result)).toBe(false);
+        expect(result).toBe('aaa');
+    });
+});