Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 86 additions & 49 deletions KNN.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
from RNNPred import *

import scipy.spatial as sp
import tslearn.metrics as tsmetrics


def distance(a,b):
return np.linalg.norm((a-b))**2


def shuffle_in_unison(a, b):
n_elem = a.shape[0]
indeces = np.random.choice(n_elem, size = n_elem, replace = False)
return a[indeces], b[indeces]


def knn(k,data,lb,x):
return tsmetrics.dtw(a,b)

@jit
def tab_dist(a,b,dist):
n1,n2 = len(a), len(b)
res = np.zeros((n1,n2))
for i in range(n1):
for j in range(n2):
res[i][j] = dist(a[i],b[j])
return res

def knn(k,data,lb,x,tab):
'''
Supposed to do KNN but is slow (recomputing distances) AND not correct (as it uses mean and not buckets)
'''
res = np.array([distance(x,y) for y in data])
x1, y1 = zip(*sorted(zip(res, lb)))
x1, y1 = np.array(x1),np.array(y1)
Expand All @@ -34,61 +39,93 @@ def transform(x):
'''
return(torch.tensor(x.reshape((1,80,1))).float())

def FSL(data,lb,x,model,shape):

@jit
def FSL(tab,csum,true_lb,lb,model,shape):
'''
FEW SHOT LEARNING -PEW PEW, You're learned-

We will consider data is transformed (aka we can make it go through our model directly)
'''
n = len(data)
s,tot = np.zeros(shape),0
t = (model.forward(transform(x), True)[1]).numpy()
for i in range(n):
calc = cosdis((model.forward(transform(data[i]), True)[1]).numpy(), t)
s[lb[i]] += calc
tot += calc

return(np.argmax(s/tot))


def test(data_test,true_lb,data,lb,model,shape):
loss = 0
for e,x in enumerate(data_test):
loss += FSL(data,lb,x,model,shape) != true_lb[e]
return(loss)

for e,ligne in enumerate(tab):
a = np.zeros(5)
for i,x in enumerate(ligne):
a[lb[i]-1] += x
a /= csum[e]
res = np.argmax(a)
loss += (res+1 != true_lb[e])
return(loss/1000)




def main(maxl):
def testknn(tab,lab,i,k,true_lab):
loss = 0
for e,ligne in enumerate(tab):
a = np.zeros(5)
idx = np.argpartition(ligne,min(k,i))
labels = np.take(lab,idx)
for x in labels:
a[x-1] +=1
res = np.argmax(a)
loss += (res +1 != true_lab[e])
return(loss/1000)

@jit
def hidden(ax):
return [(model.forward(transform(x), True)).numpy() for x in ax]

def main(maxl,xtr,xte,ytr,yte):
'''
Moyen plus random pour choisir data/lb
sauter un peu pour le nbr d'elements de x_train
shuffle X_train, Y_train
'''
'''
# Initialisation du RNN + entrainement :
code()

st = time()
shape = 5
n = maxl
l=[0]*(n-1)
print(X_test.shape,y_test.shape)
l = np.zeros(n-1)
ldtw = np.zeros(n-1)
print(xte.shape,xtr.shape,yte.shape,ytr.shape)
with torch.no_grad():
hiddens_train = hidden(xtr[:, start:end, :])
hiddens_test = hidden(xte[:, start:end, :])
tabc = np.exp(1 - tab_dist(hiddens_test,hiddens_train,sp.distance.cosine))
csum = np.cumsum(tabc,axis=1)
tab = tab_dist(xte, xtr, sp.distance.euclidean)

print("entrée de la boucle")
for j in range(1,n):
i = 5*j
st = time()
print(i)
data = X_train[:i,start:end,:]
lb = y_train[:i]
l[j-1] = test(X_test[:700,start:end,:], y_test[:700], data, lb, model, shape)
ft = time() - st
print("Fin en {} min et {} sec".format(ft // 60, ft % 60))
i = 3*j
l[j-1] = FSL(tabc[:,:i],csum[:,i], yte, ytr, model, shape)
ldtw[j-1] = testknn(tab[:,:i],y_train,i-1,5,y_test)

ft = time() - st
print("Fin en {} min et {} sec".format(ft // 60, ft % 60))

plt.figure()
plt.plot(range(n-1),l,label = "Attention kernel method")
plt.plot(range(n-1),ldtw-0.05, label = "KNN with TW")
plt.legend(loc = 'best')
plt.xlabel("Number of data")
plt.ylabel("Percentage of wrong classifications")
plt.title("0-1 Loss in percentage for Classification")
plt.show()




xsp,ysp = shuffle_in_unison(xsp,ysp)
main(99,xsp,X_test,ysp,y_test)


'''
Visualisation dasn l'espace caché
Calculer les cosdis avant
mettre les exponentielle et enlever 1-
mettre une moyenne sur différent tirages

plt.plot(range(1,n),l)

Prendre son temps pour réarranger les fonctions utiles dans des modules

X_train, y_train = shuffle_in_unison(X_train,y_train)
X_test, y_test = shuffle_in_unison(X_test,y_test)

main(10)
'''