-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathword_counts.py
More file actions
63 lines (51 loc) · 1.78 KB
/
Copy pathword_counts.py
File metadata and controls
63 lines (51 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 26 23:24:51 2016
@author: Isaac
"""
import numpy as np
a = 'The quick brown fox jumps over the lazy dog'.lower().split()
b = 'A quick brown dog outpaces a quick fox'.lower().split()
# given a string finds its count in a list of strings
def find_count(string,strings):
count = 0
for s in strings:
if s==string:
count += 1
return count
# Given 2 lists of strings, compute their Cosine distance. Cosine distance is equal to 1 - Cosine Similarity
def cosine_distance(string1,string2):
res1 = []
res2 = []
acc = []
str_array2 = []
str_array1 = []
for string in string1+string2:
if string not in acc:
count1 = find_count(string,string1)
count2 = find_count(string,string2)
acc.append(string)
res1.append(count1)
res2.append(count2)
str_array1.append(string)
str_array2.append(string)
return str_array1,str_array2,1-float(np.dot(res1,res2))/(np.linalg.norm(res1)*np.linalg.norm(res2))
# Given 2 lists of strings, compute their Euclidean distance
def euclidean_distance(string1,string2):
res1 = []
res2 = []
acc = []
str_array1 = []
str_array2 = []
for string in string1+string2:
if string not in acc:
count1 = find_count(string,string1)
count2 = find_count(string,string2)
acc.append(string)
res1.append(count1)
res2.append(count2)
str_array1.append(string)
str_array2.append(string)
return str_array1,str_array2,np.linalg.norm(np.array(res1)-np.array(res2))
print cosine_distance(a,b)
print euclidean_distance(a,b)