运行环境为Mac+python3.7
神经网络为3层,神经元个数分别为784,30,10,激活函数为sigmoid,损失函数为二次方,优化器为梯度下降算法,学习率为3.0,达到的准确率为接近96%
处理输入,这里的数据集为mnist.pkl.gz
import pickle
import gzip
import numpy as np
def load_data():
file = gzip.open('mnist.pkl.gz', 'rb')
training_data, validation_data, test_data = pickle.load(file, encoding='bytes')
file.close()
return training_data, validation_data, test_data
def load_data_wrapper():
training_data, validation_data, test_data = load_data()
training_images = [np.reshape(x, (784, 1)) for x in training_data[0]] # 784个像素点
training_labels = [vectorized_label(x) for x in training_data[1]]
training_data = zip(training_images, training_labels)
validation_images = [np.reshape(x, (784, 1)) for x in validation_data[0]]
validation_data = zip(validation_images, validation_data[1])
test_images = [np.reshape(x, (784, 1)) for x in test_data[0]]
test_data = zip(test_images, test_data[1])
return training_data, validation_data, test_data
def vectorized_label(x):
l = np.zeros((10, 1))
l[x] = 1.0
return l
def make_data():
train, v, test = load_data_wrapper()
training_data = list(train)
validation_data = list(v)
test_data = list(test)
return training_data, validation_data, test_data
神经网络
import numpy as np
from load_mnist import make_data
import matplotlib.pyplot as plt
def sigmoid(z):
return 1.0 / (1.0 + np.exp(-z))
def sigmoid_prime(z): # 对sigmoid求导
return sigmoid(z) * (1-sigmoid(z))
# 定义神经网络参数
# 3层神经网络:输入层,隐藏层,输出层
class Network(object):
def __init__(self, sizes): # std是如何决定的
self.sizes = sizes
self.num_layers = len(sizes)
self.weights = [np.random.randn(x, y) for x, y in zip(sizes[1:], sizes[:-1])] # weights的矩阵是反着的,也就是30*64
self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
def feedforward(self, x): # x应该是784*1的向量
for w, b in zip(self.weights, self.biases): # weights[0]是30*784,b[0]是30*1,x是784*1
x = sigmoid(np.dot(w, x) + b)
return x # 这个x是10*1的向量,每个分类上有一个得分
def cost_dev(self, a, y): # 可以计算损失函数对于a的偏导
# 损失函数计算为 loss = 1/2 * (y-a)^2, 求导之后为a-y
return a - y
def backprop(self, x, y): # x, y是一个输入,一个输出
act = x # 初始的激活值
acts = [x] # 每一层的激活值
zs = [] # 每一层的z值
nabla_w = [np.zeros(w.shape) for w in self.weights]
nabla_b = [np.zeros(b.shape) for b in self.biases]
# 前向传播计算过程
for w, b in zip(self.weights, self.biases):
z = np.dot(w, act) + b
zs.append(z)
act = sigmoid(z)
acts.append(act)
# 反向传播计算偏差
# 首先计算最后一层的delta值
delta = self.cost_dev(acts[-1], y) * sigmoid_prime(zs[-1]) # 这里的*表示的对应的分量相乘
nabla_b[-1] = delta # b[i]是向量
nabla_w[-1] = np.dot(delta, acts[-2].transpose()) # w[i]是一个矩阵
# 计算所有层的nabla值
for l in range(2, self.num_layers):
delta = np.dot(self.weights[-l+1].transpose(), delta) * sigmoid_prime(zs[-l])
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, acts[-l-1].transpose())
return nabla_w, nabla_b
def SGD(self, training_data, epochs, mini_batch_size, learning_rate, test_data=None):
'''
:param epochs: 迭代次数
:param mini_batch_size: 随机份数
:param learning_rate: 学习率
'''
if test_data:
n_test = len(test_data)
n = len(training_data)
epo = []
accu = []
for i in range(epochs):
np.random.shuffle(training_data) # 将训练数据随机排列
# 把数据以mini_batch_size大小划分开
mini_batchs = [training_data[k:k+mini_batch_size] for k in range(0, n, mini_batch_size)]
for mini_batch in mini_batchs: # 每个mini_batch都是一个mini_batch_size大小的输入,mini_batch_size*784*1
self.update(mini_batch, learning_rate)
if(test_data):
n_correct = self.evaluate(test_data)
print("Epoch {0}: accuracy: {1}/{2}".format(i, n_correct, n_test))
epo.append(i)
accu.append(self.eva(n_correct, n_test))
# 绘图
plt.plot(epo, accu, 'b')
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.title('Training accuracy')
plt.show()
def update(self, training_data, learning_rate):
# 计算nabla的累积和(对于所有的输入)
nabla_b_accu = [np.zeros(b.shape) for b in self.biases]
nabla_w_accu = [np.zeros(w.shape) for w in self.weights]
for x, y in training_data: # !!!!!!!!
nabla_w, nabla_b = self.backprop(x, y)
nabla_b_accu = [nb + dnb for nb, dnb in zip(nabla_b_accu, nabla_b)]
nabla_w_accu = [nw + dnw for nw, dnw in zip(nabla_w_accu, nabla_w)]
# 用累积的误差更新w, b
# w = w - alpha*1/m*dw
m = len(training_data)
self.weights = [w - (learning_rate/m) * dw for w, dw in zip(self.weights, nabla_w_accu)]
self.biases = [b - (learning_rate/m) * db for b, db in zip(self.biases, nabla_b_accu)]
def evaluate(self, test_data):
test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data]
return sum(int(x == y) for (x, y) in test_results)
def eva(self, n_correct, n_test):
return n_correct/n_test
Input_size = 784
Output_size = 10
training_data, validation_data, test_data = make_data()
bp = Network([Input_size, 30, Output_size])
bp.SGD(training_data, 30, 10, 3.0, test_data=test_data)
版权声明:本文为mimeidongyu原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。