Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion src/graphic/helper/parseText.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,23 @@ function truncateSingleLine(
break;
}

const subLength = j === 0
let subLength = j === 0
? estimateLength(textLine, contentWidth, fontMeasureInfo)
: lineWidth > 0
? Math.floor(textLine.length * contentWidth / lineWidth)
: 0;

// `subLength` is a UTF-16 code unit count, so it can fall between the
// two halves of a surrogate pair (for example CJK Extension B characters
// such as U+20BB7). Slicing there would leave an orphaned lead surrogate
// and corrupt the character, so step back to the pair boundary.
if (subLength > 0 && subLength < textLine.length) {
const lastCharCode = textLine.charCodeAt(subLength - 1);
if (lastCharCode >= 0xD800 && lastCharCode <= 0xDBFF) {
subLength -= 1;
}
}

textLine = textLine.substr(0, subLength);
lineWidth = measureWidth(fontMeasureInfo, textLine);
}
Expand Down
73 changes: 73 additions & 0 deletions test/ut/spec/graphic/truncateTextSurrogate.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import { truncateText } from '../../../../src/graphic/helper/parseText';
import { platformApi, setPlatformAPI } from '../../../../src/core/platform';

// '𠮷' is U+20BB7, encoded as the surrogate pair 0xD842 0xDFB7.
const KANJI = '𠮷';

// A deterministic width model so the test does not depend on a real canvas:
// ASCII code points have width 1, everything else (e.g. fullwidth CJK) width 2.
// A surrogate pair is measured as a single width-2 glyph, the same way a real
// `measureText` would treat it.
function fakeMeasureText(text: string): { width: number } {
let width = 0;
for (let i = 0; i < text.length; i++) {
const code = text.charCodeAt(i);
if (code >= 0xD800 && code <= 0xDBFF && i + 1 < text.length) {
width += 2;
i++;
}
else {
width += code < 0x80 ? 1 : 2;
}
}
return { width };
}

function hasLoneSurrogate(str: string): boolean {
for (let i = 0; i < str.length; i++) {
const code = str.charCodeAt(i);
if (code >= 0xD800 && code <= 0xDBFF) {
const next = str.charCodeAt(i + 1);
if (!(next >= 0xDC00 && next <= 0xDFFF)) {
return true;
}
i++;
}
else if (code >= 0xDC00 && code <= 0xDFFF) {
return true;
}
}
return false;
}

describe('truncateText surrogate pairs', function () {
// A unique font so `ensureFontMeasureInfo` does not reuse another test's cache.
const font = '12px ZRTruncateSurrogateTestFont';
let originalMeasureText: typeof platformApi.measureText;

beforeAll(function () {
originalMeasureText = platformApi.measureText;
setPlatformAPI({ measureText: fakeMeasureText });
});

afterAll(function () {
setPlatformAPI({ measureText: originalMeasureText });
});

it('should not split a surrogate pair (CJK Extension B) when truncating', function () {
const result = truncateText(KANJI + KANJI + KANJI + KANJI, 6, font, '');
expect(hasLoneSurrogate(result)).toBe(false);
expect(result).toBe(KANJI);
});

it('should keep complete characters when the text fits', function () {
const text = KANJI + KANJI;
expect(truncateText(text, 20, font, '')).toBe(text);
});

it('should keep truncating ASCII text by character as before', function () {
const result = truncateText('aaaaaaaa', 4, font, '');
expect(hasLoneSurrogate(result)).toBe(false);
expect(result).toBe('aaa');
});
});