Skip to content

Commit a59cd7c

Browse files
hujianwujianwu.hu
hujianwu
authored and
jianwu.hu
committedSep 6, 2017
add dropout and fc layers and use moment optimizer
1 parent 4076abd commit a59cd7c

File tree

4 files changed

+61
-39
lines changed

4 files changed

+61
-39
lines changed
 

‎conv.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ def __init__(self,keep):
1818
def forward(self,x):
1919
linear = x.reshape(-1)
2020
self.outs = np.random.choice(len(linear),size=int(len(linear)*(1-self.keep)))
21-
linear[outs] = 0
21+
linear[self.outs] = 0
2222
return x
2323

2424
def backward(self,dy):
2525
linear = dy.reshape(-1)
26-
linear[outs] = 0
26+
linear[self.outs] = 0
2727
return dy
2828

2929
def apply_gradients(self,learning_rate):
@@ -35,6 +35,8 @@ def __init__(self,filter,stride,padding):
3535
self.bias = np.zeros(shape=[filter.shape[-1]],dtype=float)
3636
self.stride = stride
3737
self.padding = padding
38+
self.m_filter = np.zeros_like(self.filter)
39+
self.m_bias = np.zeros_like(self.bias)
3840
def forward(self,x):
3941
padding = self.padding
4042
stride = self.stride
@@ -72,9 +74,12 @@ def backward(self,dy):
7274
dexpand[i*stride:i*stride+fh,j*stride:j*stride+fw,:] += dgarden[i,j,:,:,:]
7375
return dexpand[padding:-padding,padding:-padding,:]
7476
def apply_gradients(self,learning_rate):
75-
self.filter -= self.dfilter*learning_rate
76-
self.bias -= self.dbias*learning_rate
77-
77+
self.m_bias = self.m_bias*0.9 + self.dbias*0.1
78+
self.m_filter = self.m_filter*0.9 + self.dfilter*0.1
79+
self.filter -= self.m_filter*learning_rate
80+
self.bias -= self.m_bias*learning_rate
81+
# self.filter -= self.dfilter*learning_rate
82+
# self.bias -= self.dbias*learning_rate
7883
if __name__ == '__main__':
7984
image = np.ones(shape=[48,64,3])
8085
filter = np.ones(shape=[5,5,3,7])

‎mnist_conv.py

+35-28
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,24 @@
1414
idx = range(batch_size)
1515

1616

17-
filter0 = np.random.standard_normal(size=[3,3,1,5])*0.1
18-
filter = np.random.standard_normal(size=[3,3,5,5])*0.1
19-
conv_layer0 = conv.conv2d(filter0,2,1)
17+
filter0 = np.random.standard_normal(size=[3,3,1,3])*0.1
18+
filter = np.random.standard_normal(size=[3,3,3,5])*0.1
19+
conv_layer0 = conv.conv2d(filter0,1,1)
2020
relu0 = conv.relu()
21+
dropout0 = conv.dropout(0.6)
2122
conv_layer = conv.conv2d(filter,2,1)
2223
relu = conv.relu()
2324

24-
l1 = neural.layer(28*28/4/4*5,10,"sigmoid",weight_decay=0.001)
25+
hidden = 100
26+
l1 = neural.layer(28*28/1/4*5,hidden,"linear",weight_decay=0.0)
27+
l2 = neural.layer(hidden,10,"softmax",weight_decay=0.0)
2528

2629
loop = size//batch_size
2730

28-
epoch = 3
29-
learning_rate = 0.02
30-
for _ in range(epoch):
31-
for _ in range(loop):
31+
epoch = 1
32+
learning_rate = 0.0015
33+
for k in range(epoch):
34+
for j in range(loop):
3235
images,labels = train.next_batch(batch_size)
3336
l = np.zeros([batch_size,label_num])
3437
l[idx,[labels]] = 1
@@ -37,40 +40,44 @@
3740
label = l[i].reshape(-1,1)
3841
conv_out0 = conv_layer0.forward(data)
3942
reludata0 = relu0.forward(conv_out0)
40-
conv_out = conv_layer.forward(reludata0)
43+
dropoutdata0 = dropout0.forward(reludata0)
44+
conv_out = conv_layer.forward(dropoutdata0)
4145
reludata = relu.forward(conv_out)
4246
linear = reludata.reshape(-1,1)
4347
l1_out = l1.forward(linear)
44-
loss = neural.sigmoid_loss(l1_out,label)
48+
l2_out = l2.forward(l1_out)
49+
loss = neural.softmax_loss(l2_out,label)
4550
print 'loss is ' + str(loss)
46-
dloss = neural.prime_sigmoid_loss(l1_out,label)
47-
dlinear = l1.backward(dloss)
51+
dloss = neural.prime_softmax_loss(l2_out,label)
52+
dl2_out = l2.backward(dloss)
53+
dlinear = l1.backward(dl2_out)
4854
dreluout = dlinear.reshape(reludata.shape)
4955
dconv_out = relu.backward(dreluout)
5056
ddata = conv_layer.backward(dconv_out)
51-
drelu0 = relu0.backward(ddata)
57+
ddropout0 = dropout0.backward(ddata)
58+
drelu0 = relu0.backward(ddropout0)
5259
dconv_out0 = conv_layer0.backward(drelu0)
5360
conv_layer.apply_gradients(learning_rate)
5461
conv_layer0.apply_gradients(learning_rate)
5562
l1.apply_gradients(learning_rate)
56-
learning_rate *= 0.3
57-
63+
if (j != 0 and j % 400 == 0):
64+
learning_rate *= 0.4
5865

5966
correct = 0
6067
test_size = test.num_examples
6168
for i in range(test_size):
62-
images,labels = test.next_batch(1)
63-
data = images[0]
64-
label = labels[0]
65-
conv_out0 = conv_layer0.forward(data)
66-
reludata0 = relu0.forward(conv_out0)
67-
conv_out = conv_layer.forward(reludata0)
68-
reludata = relu.forward(conv_out)
69-
linear = conv_out.reshape(-1,1)
70-
l1_out = l1.forward(linear)
71-
# l2_out = l2.forward(l1_out)
72-
n = np.argmax(l1_out,axis=0)
73-
if(n[0] == label ):
74-
correct += 1
69+
images,labels = test.next_batch(1)
70+
data = images[0]
71+
label = labels[0]
72+
conv_out0 = conv_layer0.forward(data)
73+
reludata0 = relu0.forward(conv_out0)
74+
conv_out = conv_layer.forward(reludata0)
75+
reludata = relu.forward(conv_out)
76+
linear = conv_out.reshape(-1,1)
77+
l1_out = l1.forward(linear)
78+
l2_out = l2.forward(l1_out)
79+
n = np.argmax(l2_out,axis=0)
80+
if(n[0] == label ):
81+
correct += 1
7582
print "{} correct, rate is {}".format(correct,correct*1.0/test_size)
7683
# IPython.embed()

‎mnist_reader.py

+4
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ def __init__(self,
135135
images = numpy.multiply(images, 1.0 / 255.0)
136136
self._images = images
137137
self._labels = labels
138+
perm = numpy.arange(self._num_examples)
139+
numpy.random.shuffle(perm)
140+
self._images = self._images[perm]
141+
self._labels = self._labels[perm]
138142
self._epochs_completed = 0
139143
self._index_in_epoch = 0
140144

‎neural.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,20 @@ def __init__(self,active,prime_active):
3232

3333
activations = dict()
3434
activations["sigmoid"] = activation(sigmoid,prime_sigmoid)
35-
activations["liner"] = activation(nop,prime_nop)
35+
activations["linear"] = activation(nop,prime_nop)
3636
activations["softmax"] = activation(softmax,prime_softmax)
3737
activations["tanh"] = activation(tanh,prime_tanh)
3838
class layer:
39-
def __init__(self,input_size,output_size,active,bias_rate=1.0,weight_decay=0.001):
39+
def __init__(self,input_size,output_size,active,bias_rate=1.0,weight_decay=0.0001):
4040
a = activations[active]
4141
self.fn_activate = a.active
4242
self.fn_prime_activate = a.prime_active
4343
self.weights = np.random.randn(output_size,input_size)/10.0
4444
self.d_weights = np.zeros((output_size,input_size))
4545
self.bias = np.random.randn(output_size,1)
4646
self.d_bias = np.zeros((output_size,1))
47+
self.m_weights = np.zeros_like(self.weights)
48+
self.m_bias = np.zeros_like(self.bias)
4749
self.bias_rate = bias_rate
4850
self.weight_decay = weight_decay
4951
def forward(self,x):
@@ -57,14 +59,18 @@ def backward(self,y):
5759
self.dy = y
5860
else:
5961
self.dy = self.fn_prime_activate(self.out) * y
60-
self.d_weights = np.matmul(self.dy,self.x.T)/self.batch_size
61-
self.d_bias = np.sum(self.dy,axis=1,keepdims=1)/self.batch_size
62+
self.d_weights = np.matmul(self.dy,self.x.T)
63+
self.d_bias = np.sum(self.dy,axis=1,keepdims=1)
6264
self.dx = np.matmul(self.weights.T,self.dy)
6365

6466
return self.dx
6567
def apply_gradients(self,learning_rate):
66-
self.weights = self.weights - self.d_weights*learning_rate - self.weights*self.weight_decay
67-
self.bias -= self.d_bias
68+
self.m_weights = self.m_weights*0.9 + self.d_weights*0.1
69+
self.m_bias = self.m_bias*0.9 + self.d_bias*0.1
70+
self.weights = self.weights - self.m_weights*learning_rate - self.weights*self.weight_decay
71+
self.bias -= self.m_bias*learning_rate
72+
# self.weights = self.weights - self.d_weights*learning_rate - self.weights*self.weight_decay
73+
# self.bias -= self.d_bias*learning_rate
6874

6975
def square_loss(a,y):
7076
l = np.sum((a-y)*(a-y))/2

0 commit comments

Comments
 (0)
Please sign in to comment.