-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathscelParser.py
More file actions
40 lines (32 loc) · 1.1 KB
/
scelParser.py
File metadata and controls
40 lines (32 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import struct
class ScelParser():
def __init__(self):
pass
def parse(self, content):
hz_offset = 0
mask = struct.unpack('128B', content[:128])[4]
if mask == 0x44:
hz_offset = 0x2628
elif mask == 0x45:
hz_offset = 0x26c4
index = hz_offset
words = set([])
while index < len(content):
word_count = struct.unpack('<H', content[index:index + 2])[0]
index += 2
pinyin_count = int(struct.unpack('<H', content[index:index + 2])[0] / 2)
index += 2
index += pinyin_count * 2
for i in range(word_count):
word_len = struct.unpack('<H', content[index:index + 2])[0]
index += 2
word= content[index:index + word_len].decode('UTF-16LE')
index += word_len
index += 12
words.add(word)
return list(words)
def parse_file(self, filepath):
with open(filepath, 'rb') as f:
words = self.parse(f.read())
return words
scel_parser = ScelParser()