diff --git a/CHANGELOG.md b/CHANGELOG.md index 900f2f3..c69999d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Unreleased ### Added +- add extra unicode data from supplementary panes [#12](https://github.com/ghostflyby/PinIn/pull/12) ### Changed diff --git a/build.gradle.kts b/build.gradle.kts index 5265f09..da9c58f 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -7,7 +7,7 @@ plugins { } group = "dev.ghostflyby" -version = "1.7.1" +version = "1.7.2" repositories { mavenCentral() diff --git a/src/main/java/me/towdium/pinin/DictLoader.java b/src/main/java/me/towdium/pinin/DictLoader.java index 203878d..4234ffc 100644 --- a/src/main/java/me/towdium/pinin/DictLoader.java +++ b/src/main/java/me/towdium/pinin/DictLoader.java @@ -22,17 +22,6 @@ interface CodePointConsumer { class Default implements DictLoader { @Override public void load(BiConsumer feed) { - if (feed == null) return; - loadInternal(feed, null); - } - - @Override - public void loadCodePoints(CodePointConsumer feed) { - if (feed == null) return; - loadInternal(null, feed); - } - - private void loadInternal(BiConsumer charFeed, CodePointConsumer codePointFeed) { InputStream is = PinIn.class.getResourceAsStream("data.txt"); if (is == null) return; try (BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { @@ -43,23 +32,38 @@ private void loadInternal(BiConsumer charFeed, CodePointCon if (separator <= 0 || separator + 2 > line.length()) continue; String[] records = line.substring(separator + 2).split(", "); + char first = line.charAt(0); - if (Character.isHighSurrogate(first) && line.length() > 1) { - char second = line.charAt(1); - if (Character.isLowSurrogate(second)) { - if (codePointFeed != null) { - int codePoint = Character.toCodePoint(first, second); - codePointFeed.accept(codePoint, records); - } - continue; - } - } + feed.accept(first, records); + + } + } catch (IOException e) { + e.printStackTrace(); + } + + } - if (charFeed != null) charFeed.accept(first, records); + @Override + public void loadCodePoints(CodePointConsumer feed) { + InputStream is = PinIn.class.getResourceAsStream("extra.txt"); + if (is == null) return; + try (BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { + String line; + while ((line = br.readLine()) != null) { + if (line.isEmpty()) continue; + int separator = line.indexOf(':'); + if (separator <= 0 || separator + 2 > line.length()) continue; + + String[] code2records = line.split(": "); + int code = code2records[0].codePointAt(0); + String[] records = code2records[1].split(", "); + feed.accept(code, records); } } catch (IOException e) { e.printStackTrace(); } + } + } } diff --git a/src/main/resources/me/towdium/pinin/extra.txt b/src/main/resources/me/towdium/pinin/extra.txt new file mode 100644 index 0000000..1007c3e --- /dev/null +++ b/src/main/resources/me/towdium/pinin/extra.txt @@ -0,0 +1,9 @@ +𬬻: lu2 +𬭊: du4 +𬭳: xi3 +𬭛: bo1 +𬭶: hei1 +𫟼: da2 +𬬭: lun2 +𫓧: fu1 +𫟷: li4 diff --git a/src/test/java/me/towdium/pinin/PinInTest.java b/src/test/java/me/towdium/pinin/PinInTest.java index f13922a..d46a661 100644 --- a/src/test/java/me/towdium/pinin/PinInTest.java +++ b/src/test/java/me/towdium/pinin/PinInTest.java @@ -214,18 +214,20 @@ public void dictUnicodeExtended() { PinIn p = new PinIn(); TreeSearcher searcher = new TreeSearcher<>(CONTAIN, p); searcher.put("𫟼锭", 0); - assert !searcher.search("da2d").contains(0); - assert !p.contains("𫟼", "da2"); + assert searcher.search("da2d").contains(0); + assert p.contains("𫟼", "da2"); + assert !searcher.search("ta2d").contains(0); + assert !p.contains("𫟼", "ta2"); p = new PinIn(new DictLoader.Default() { @Override public void loadCodePoints(CodePointConsumer feed) { - feed.accept("𫟼".codePointAt(0), new String[]{"da2"}); + feed.accept("𫟼".codePointAt(0), new String[]{"ta2"}); } }); searcher = new TreeSearcher<>(CONTAIN, p); searcher.put("𫟼锭", 0); - assert searcher.search("da2d").contains(0); - assert p.contains("𫟼", "da2"); + assert searcher.search("ta2d").contains(0); + assert p.contains("𫟼", "ta2"); } }