Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Unreleased

### Added
- add extra unicode data from supplementary panes [#12](https://github.com/ghostflyby/PinIn/pull/12)

### Changed

Expand Down
2 changes: 1 addition & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ plugins {
}

group = "dev.ghostflyby"
version = "1.7.1"
version = "1.7.2"

repositories {
mavenCentral()
Expand Down
48 changes: 26 additions & 22 deletions src/main/java/me/towdium/pinin/DictLoader.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,6 @@ interface CodePointConsumer {
class Default implements DictLoader {
@Override
public void load(BiConsumer<Character, String[]> feed) {
if (feed == null) return;
loadInternal(feed, null);
}

@Override
public void loadCodePoints(CodePointConsumer feed) {
if (feed == null) return;
loadInternal(null, feed);
}

private void loadInternal(BiConsumer<Character, String[]> charFeed, CodePointConsumer codePointFeed) {
InputStream is = PinIn.class.getResourceAsStream("data.txt");
if (is == null) return;
try (BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
Expand All @@ -43,23 +32,38 @@ private void loadInternal(BiConsumer<Character, String[]> charFeed, CodePointCon
if (separator <= 0 || separator + 2 > line.length()) continue;

String[] records = line.substring(separator + 2).split(", ");

char first = line.charAt(0);
if (Character.isHighSurrogate(first) && line.length() > 1) {
char second = line.charAt(1);
if (Character.isLowSurrogate(second)) {
if (codePointFeed != null) {
int codePoint = Character.toCodePoint(first, second);
codePointFeed.accept(codePoint, records);
}
continue;
}
}
feed.accept(first, records);

}
} catch (IOException e) {
e.printStackTrace();
}

}

if (charFeed != null) charFeed.accept(first, records);
@Override
public void loadCodePoints(CodePointConsumer feed) {
InputStream is = PinIn.class.getResourceAsStream("extra.txt");
if (is == null) return;
try (BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
String line;
while ((line = br.readLine()) != null) {
if (line.isEmpty()) continue;
int separator = line.indexOf(':');
if (separator <= 0 || separator + 2 > line.length()) continue;

String[] code2records = line.split(": ");
int code = code2records[0].codePointAt(0);
String[] records = code2records[1].split(", ");
feed.accept(code, records);
}
} catch (IOException e) {
e.printStackTrace();
}

}

}
}
9 changes: 9 additions & 0 deletions src/main/resources/me/towdium/pinin/extra.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
𬬻: lu2
𬭊: du4
𬭳: xi3
𬭛: bo1
𬭶: hei1
𫟼: da2
𬬭: lun2
𫓧: fu1
𫟷: li4
12 changes: 7 additions & 5 deletions src/test/java/me/towdium/pinin/PinInTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -214,18 +214,20 @@ public void dictUnicodeExtended() {
PinIn p = new PinIn();
TreeSearcher<Integer> searcher = new TreeSearcher<>(CONTAIN, p);
searcher.put("𫟼锭", 0);
assert !searcher.search("da2d").contains(0);
assert !p.contains("𫟼", "da2");
assert searcher.search("da2d").contains(0);
assert p.contains("𫟼", "da2");
assert !searcher.search("ta2d").contains(0);
assert !p.contains("𫟼", "ta2");

p = new PinIn(new DictLoader.Default() {
@Override
public void loadCodePoints(CodePointConsumer feed) {
feed.accept("𫟼".codePointAt(0), new String[]{"da2"});
feed.accept("𫟼".codePointAt(0), new String[]{"ta2"});
}
});
searcher = new TreeSearcher<>(CONTAIN, p);
searcher.put("𫟼锭", 0);
assert searcher.search("da2d").contains(0);
assert p.contains("𫟼", "da2");
assert searcher.search("ta2d").contains(0);
assert p.contains("𫟼", "ta2");
}
}