-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyMap.py
More file actions
118 lines (85 loc) · 3.27 KB
/
pyMap.py
File metadata and controls
118 lines (85 loc) · 3.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os
import shlex
import subprocess
import unicodedata
import string
class MetaMap():
def __init__(self):
self.skrmedpost_server = None
def start_server(self, server):
'''
Create a MetaMap subprocess based on the supplied server command.
Arguments:
server (str): The MetaMap start file location. It must be the
absolute path if the directory is not in $PATH.
Returns:
proc: A subprocess corresponding to the supplied server location.
'''
proc = subprocess.Popen(server,
stdout=subprocess.PIPE)
return proc
def start_skrmedpost_server(self, server_loc=None):
'''
Start the MetaMap skrmedpost server.
Arguments:
server_loc (str): The MetaMap start file location. It must be the
absolute path if the directory is not in $PATH.
'''
if server_loc:
server = server_loc
else:
server = 'skrmedpostctl_start.bat'
self.skrmedpost_server = self.start_server(server)
def stop_skrmedpost_server(self):
'''
Terminate the skrmedpost server process.
'''
self.skrmedpost_server.terminate()
def start_wsd_server(self, server_loc=None):
'''
Start the MetaMap word sense disambiguation server.
Arguments:
server_loc (str): The MetaMap start file location. It must be the
absolute path if the directory is not in $PATH.
'''
if server_loc:
server = server_loc
else:
server = 'wsdserverctl_start.bat'
self.wsd_server = self.start_server(server)
def stop_wsd_server(self, server_loc=None):
'''
Terminate the wsd server process.
'''
self.wsd_server.terminate()
def process_text(self, text, MM_options='metamap14.bat --XMLf'):
'''
Submit a block of text to MetaMap for processing. This requires
that the skrmedpost server is running.
Arguments:
text (str): The text to be processed.
MM_options (str): The same command you would run from the command
line to start the MetaMap process.
Returns:
out (str): The results of processing the text as returned by the
MetaMap process.
'''
text = self.sanitize_text(text)
p = subprocess.Popen(shlex.split(MM_options), stdout=subprocess.PIPE,
stdin=subprocess.PIPE, encoding='utf-8')
out = p.communicate(input=text+'\n')[0]
p.terminate()
return out
def sanitize_text(self, text):
'''
MetaMap only accepts ASCII printable characters. This function will
ensure that extraneous characters are replaced or removed before
submitting to the MetaMap process.
Arguments:
text (str): The raw, unprocessed text.
Returns:
text (str): A pure-ASCII, sanitized text.
'''
text = unicodedata.normalize('NFKD', text)
text = ''.join(list(filter(lambda x: x in string.printable, text)))
return text