| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 
 | 
 """
 network.py
 ~~~~~~~~~~
 
 A module to implement the stochastic gradient descent learning
 algorithm for a feedforward neural network.  Gradients are calculated
 using backpropagation.  Note that I have focused on making the code
 simple, easily readable, and easily modifiable.  It is not optimized,
 and omits many desirable features.
 """
 
 
 
 import random
 
 
 import numpy as np
 
 class Network(object):
 
 def __init__(self, sizes):
 """The list ``sizes`` contains the number of neurons in the
 respective layers of the network.  For example, if the list
 was [2, 3, 1] then it would be a three-layer network, with the
 first layer containing 2 neurons, the second layer 3 neurons,
 and the third layer 1 neuron.  The biases and weights for the
 network are initialized randomly, using a Gaussian
 distribution with mean 0, and variance 1.  Note that the first
 layer is assumed to be an input layer, and by convention we
 won't set any biases for those neurons, since biases are only
 ever used in computing the outputs from later layers."""
 self.num_layers = len(sizes)
 self.sizes = sizes
 self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
 self.weights = [np.random.randn(y, x)
 for x, y in zip(sizes[:-1], sizes[1:])]
 
 def feedforward(self, a):
 """Return the output of the network if ``a`` is input."""
 for b, w in zip(self.biases, self.weights):
 a = sigmoid(np.dot(w, a)+b)
 return a
 
 def SGD(self, training_data, epochs, mini_batch_size, eta,
 test_data=None):
 """Train the neural network using mini-batch stochastic
 gradient descent.  The ``training_data`` is a list of tuples
 ``(x, y)`` representing the training inputs and the desired
 outputs.  The other non-optional parameters are
 self-explanatory.  If ``test_data`` is provided then the
 network will be evaluated against the test data after each
 epoch, and partial progress printed out.  This is useful for
 tracking progress, but slows things down substantially."""
 if test_data: n_test = len(test_data)
 n = len(training_data)
 num_batches = n/mini_batch_size
 for j in xrange(epochs):
 random.shuffle(training_data)
 for k in xrange(0,num_batches):
 mini_batch = training_data[k*mini_batch_size : (k+1)*mini_batch_size]
 self.update_mini_batch(mini_batch, eta)
 if test_data:
 print "Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test)
 else:
 print "Epoch {0} complete".format(j)
 
 def calculate_sum_derivatives_of_mini_batch(self,mini_batch):
 """
 计算m个样本的总梯度和。
 利用反向传播计算每一个样本(x,y)对应的梯度。
 """
 nabla_b = [np.zeros(b.shape) for b in self.biases]
 nabla_w = [np.zeros(w.shape) for w in self.weights]
 for x, y in mini_batch:
 
 delta_nabla_b, delta_nabla_w = self.backprop(x, y)
 
 nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
 nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
 return nabla_b,nabla_w
 
 def update_mini_batch(self, mini_batch, eta):
 """Update the network's weights and biases by applying
 gradient descent using backpropagation to a single mini batch.
 The "mini_batch" is a list of tuples "(x, y)", and "eta"
 is the learning rate."""
 m = len(mini_batch)
 nabla_b,nabla_w = self.calculate_sum_derivatives_of_mini_batch(mini_batch)
 
 self.weights = [w-(eta/m)*nw for w, nw in zip(self.weights, nabla_w)]
 self.biases =  [b-(eta/m)*nb for b, nb in zip(self.biases,  nabla_b)]
 
 def backprop(self, x, y):
 """Return a tuple "(nabla_b, nabla_w)" representing the
 gradient for the cost function C_x.  "nabla_b" and
 "nabla_w" are layer-by-layer lists of numpy arrays, similar
 to "self.biases" and "self.weights"."""
 
 nabla_b = [np.zeros(b.shape) for b in self.biases]
 nabla_w = [np.zeros(w.shape) for w in self.weights]
 
 
 
 
 activation = x
 activations = [x]
 zs = []
 
 for b, w in zip(self.biases, self.weights):
 z = np.dot(w, activation)+b
 zs.append(z)
 activation = sigmoid(z)
 activations.append(activation)
 
 
 
 
 
 
 
 
 l = -1
 delta = self.cost_derivative_of_a_L(activations[l], y) * sigmoid_prime(zs[l])
 deltas = [delta]
 
 for i in xrange(2, self.num_layers):
 l = -i
 delta = np.dot(self.weights[l+1].transpose(), deltas[l+1]) * sigmoid_prime(zs[l])
 deltas.insert(0,delta)
 
 
 for i in xrange(1, self.num_layers):
 l = -i
 nabla_b[l] = deltas[l]
 nabla_w[l] = np.dot(deltas[l], activations[l-1].transpose())
 
 return (nabla_b, nabla_w)
 
 def evaluate(self, test_data):
 """Return the number of test inputs for which the neural
 network outputs the correct result. Note that the neural
 network's output is assumed to be the index of whichever
 neuron in the final layer has the highest activation."""
 
 """
 l = [0,1,0,0,0,0,0,0,0,0]
 a = np.array(l).reshape(10,1)
 np.argmax(a) #输出向量对应的数字1
 
 test_results = [(1,1),(2,2),(3,3),(1,9)]
 [int(x == y) for (x, y) in test_results]
 #[1, 1, 1, 0]
 sum([int(x == y) for (x, y) in test_results])
 #3
 """
 
 test_results = [(np.argmax(self.feedforward(x)), y)
 for (x, y) in test_data]
 return sum(int(x == y) for (x, y) in test_results)
 
 def cost_derivative_of_a_L(self, output_activations, y):
 """Return the vector of partial derivatives \partial C_x /
 \partial a for the output activations."""
 return (output_activations-y)
 
 
 def sigmoid(z):
 """The sigmoid function."""
 return 1.0/(1.0+np.exp(-z))
 
 def sigmoid_prime(z):
 """Derivative of the sigmoid function."""
 return sigmoid(z)*(1-sigmoid(z))
 
 |