-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSoftmax.py
More file actions
116 lines (94 loc) · 3.56 KB
/
Softmax.py
File metadata and controls
116 lines (94 loc) · 3.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import d2lzh as d2l
from mxnet import autograd, nd, gluon, init
from mxnet.gluon import loss as gloss, nn
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
num_inputs = 28 * 28
num_outputs = 10
w = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))
b = nd.zeros(num_outputs)
w.attach_grad()
b.attach_grad()
def net(x, w, b, num_inputs):
t0 = x.reshape((-1, num_inputs))
t1 = nd.dot(t0, w)
t2 = t1 + b
t3 = softmax(t2)
return t3
def accuracy(y_hat, y):
return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
def evaluate_accuracy(data_iter, netFunc, w, b, num_inputs):
acc_sum, n = 0.0, 0
i = 0
for x, y in data_iter:
i += 1
#print(i, y)
y = y.astype('float32')
acc_sum += (netFunc(x, w, b, num_inputs).argmax(axis=1) == y).sum().asscalar()
n += y.size
return acc_sum / n
def softmax(x):
x_exp = x.exp()
partition = x_exp.sum(axis=1, keepdims=True)
return x_exp / partition
def train_ch3(net, w, b, num_inputs, train_iter, test_iter, loss, num_epochs, batch_size,
params=None, lr=None, trainer=None):
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for x, y in train_iter:
with autograd.record():
y_hat = net(x, w, b, num_inputs)
l = loss(y_hat, y).sum()
l.backward()
if trainer is None:
d2l.sgd(params, lr, batch_size)
else:
trainer.step(batch_size)
y = y.astype('float32')
train_l_sum += l.asscalar()
train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
n += y.size
test_acc = evaluate_accuracy(test_iter, net, w, b, num_inputs)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
% (epoch, train_l_sum/n, train_acc_sum / n, test_acc))
def cross_entropy(y_hat, y):
return -nd.pick(y_hat, y).log()
def method0():
#batch_size = 256
#train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
#num_inputs = 28 * 28
#num_outputs = 10
#w = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))
#b = nd.zeros(num_outputs)
#w.attach_grad()
#b.attach_grad()
c = evaluate_accuracy(train_iter, net, w, b, num_inputs)
# print(c)
num_epochs, lr = 5, 0.1
train_ch3(net, w, b, num_inputs, train_iter, test_iter, cross_entropy, num_epochs, batch_size,
[w, b], lr)
for x, y in test_iter:
break
true_labels = d2l.get_fashion_mnist_labels(y.asnumpy())
pre_labels = d2l.get_fashion_mnist_labels(net(x, w, b, num_inputs).argmax(axis=1).asnumpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pre_labels)]
d2l.show_fashion_mnist(x[0:9], titles[0:9])
def method1():
vnet = nn.Sequential()
vnet.add(nn.Dense(10))
vnet.initialize(init.Normal(sigma=0.01))
loss = gloss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(vnet.collect_params(), 'sgd', {'learning_rate': 0.1})
num_epochs = 5
d2l.train_ch3(vnet, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)
for x, y in test_iter:
break
true_labels = d2l.get_fashion_mnist_labels(y.asnumpy())
pre_labels = d2l.get_fashion_mnist_labels(vnet(x).argmax(axis=1).asnumpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pre_labels)]
d2l.show_fashion_mnist(x[0:9], titles[0:9])
def main():
print('main start')
method1()
if __name__ == '__main__':
main()