MNIST3_numpy手写全连接神经网络

文章目录一、链式求导二、numpy layer和反向传播反向传播三、MNIST训练及测试一、链式求导二、numpy layer和反向传播全部脚本见笔者github: numpynn.pyimport numpy as npclass npLayer():def __init__(self, n_input, n_out, activation=None, weights=None,bias=Non

Scc_hy

860人浏览 · 2020-08-27 22:07:49

Scc_hy · 2020-08-27 22:07:49 发布

文章目录

一、链式求导
二、numpy layer和反向传播
- 反向传播
三、MNIST训练及测试

一、链式求导

在这里插入图片描述

二、numpy layer和反向传播

全部脚本见笔者github: numpynn.py


import numpy as np

class npLayer():
    def __init__(self, n_input, n_out, activation=None, weights=None
                ,bias=None):
        self.weights = weights if weights is not None else np.random.randn(n_input, n_out) * np.sqrt(1 / n_out)
        self.bias = bias if bias is not None else np.random.randn(n_out) * 0.1
        self.activation = activation 
        self.last_activation = None 
        self.error = None 
        self.delta = None 

    def activate(self, x):
        # 前向传播
        r = np.dot(x, self.weights) + self.bias
        self.last_activation = self.apply_activation(r)
        return self.last_activation 
    
    def apply_activation(self, r):
        # 计算激活函数的输出
        if self.activation is None:
            return r
        elif self.activation == 'relu':
            return np.maximum(r, 0)
        elif self.activation == 'tanh':
            return np.tanh(r)
        elif self.activation == 'sigmoid':
            return 1/(1 + np.exp(-r))
        return r

    def apply_activation_derivative(self, act_r):
        # 计算激活函数的导数
        if self.activation is None:
            return np.ones_like(act_r)
        elif self.activation == 'relu':
            return (act_r > 0) * 1
        elif self.activation == 'tanh':
            return 1 - act_r ** 2
        elif self.activation == 'sigmoid':
            return act_r * (1 - act_r)
        return act_r
    
    def __call__(self, x):
        return self.activate(x)

反向传播

    def backpropagation(self, x, y, learning_rate):
        # 反向传播算法实现
        ## 从后向前计算梯度 
        output = self.feed_forward(x) # 最后层输出
        layer_len = len(self._layers)
        for i in reversed(range(layer_len)):
            layer = self._layers[i] 
            # 如果是输出层
            if layer  == self._layers[-1]:
                delta_i = layer.apply_activation_derivative(output)
                layer.error = output - y
                layer.delta = layer.error * delta_i
            else:
                next_layer = self._layers[i + 1]
                delta_i = layer.apply_activation_derivative(layer.last_activation)
                layer.error = np.dot(next_layer.weights, next_layer.delta)
                layer.delta = layer.error * delta_i
                
        # 梯度下降
        for i in range(layer_len):
            layer = self._layers[i]
            o_i = np.atleast_2d(x if i == 0 else self._layers[i - 1].last_activation)
            layer.weights -= layer.delta * o_i.T * learning_rate

三、MNIST训练及测试


if __name__ == '__main__':
    mnistdf = get_ministdata()
    te_index = mnistdf.sample(frac=0.8).index.tolist()
    mnist_te = mnistdf.loc[te_index, :]
    mnist_tr = mnistdf.loc[~mnistdf.index.isin(te_index), :]
    x_tr, y_tr = mnist_tr.iloc[:, :-1].values, mnist_tr.iloc[:, -1].values
    x_te, y_te = mnist_te.iloc[:, :-1].values, mnist_te.iloc[:, -1].values
    print(x_te.shape)
    nn = NeuralNetwork()
    nn.add_layer(npLayer(784, 128, 'relu')) 
    nn.add_layer(npLayer(128, 10, 'sigmoid'))
    st = time.perf_counter()
    mses, accs = nn.train(x_tr, x_te, y_tr, y_te, 0.01, 150)
    cost_ = time.perf_counter() - st
    print(f'cost: {cost_:.2f}s',accs)

 ================================================================================
Epoch: # 85, MSE: 0.00713
Accuracy: 93.93 % 
 ================================================================================
Epoch: # 90, MSE: 0.00654
Accuracy: 94.09 % 
 ================================================================================
Epoch: # 95, MSE: 0.00600
Accuracy: 94.27 % 
 ================================================================================
Epoch: # 100, MSE: 0.00558
Accuracy: 94.41 % 
 ================================================================================
Epoch: # 105, MSE: 0.00514
Accuracy: 94.53 % 
 ================================================================================
Epoch: # 110, MSE: 0.00479
Accuracy: 94.65 % 
 ================================================================================
Epoch: # 115, MSE: 0.00447
Accuracy: 94.75 % 
 ================================================================================
Epoch: # 120, MSE: 0.00417
Accuracy: 94.84 % 
 ================================================================================
Epoch: # 125, MSE: 0.00393
Accuracy: 94.93 % 
 ================================================================================
Epoch: # 130, MSE: 0.00370
Accuracy: 94.98 % 
 ================================================================================
Epoch: # 135, MSE: 0.00350
Accuracy: 95.03 %
 ================================================================================
Epoch: # 140, MSE: 0.00332
Accuracy: 95.08 %
 ================================================================================
Epoch: # 145, MSE: 0.00316
Accuracy: 95.12 %
 ================================================================================
Epoch: # 150, MSE: 0.00303
Accuracy: 95.14 %
cost: 1104.11s [0.2034285714285714, 0.5135714285714286, 0.5907142857142857, 0.6798928571428572, 0.74375, 0.7954285714285715
, 0.8364821428571428, 0.863125, 0.8833571428571428, 0.8975178571428571, 0.9077857142857142, 0.9149285714285714, 0.9213214285714286
, 0.9264821428571427, 0.9302142857142858, 0.9336071428571429, 0.9372678571428571, 0.9392857142857143, 0.9408928571428572, 0.9427321428571429
, 0.9440535714285714, 0.94525, 0.9465178571428572, 0.9475178571428572, 0.9483571428571429, 0.9493035714285715, 0.9498214285714286
, 0.9502857142857143, 0.95075, 0.9511607142857144, 0.9513571428571429]