-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathetd_string_utils.py
More file actions
25 lines (24 loc) · 942 Bytes
/
etd_string_utils.py
File metadata and controls
25 lines (24 loc) · 942 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def detokenizeString(tokenList):
detokenizedString=''
for i, token in enumerate(tokenList):
if i==len(tokenList)-1:
detokenizedString+=token.lower()
else:
detokenizedString+=token.lower()+' '
return detokenizedString
def getEditDistance(word1, word2):
distanceMatrix=[]
for i in range(len(word1)+1):
distanceMatrix.append([i])
for j in range(1,(len(word2))+1):
distanceMatrix[0].append(j)
for i in range(1, (len(word1))+1):
for j in range(1,(len(word2))+1):
if word1[i-1]==word2[j-1]:
cost=0
else:
cost=1
distanceMatrix[i].append(min((distanceMatrix[i-1][j]+1,
distanceMatrix[i][j-1]+1,
distanceMatrix[i-1][j-1]+cost)))
return distanceMatrix[len(word1)][len(word2)]