# 代码实现

## 激活函数函数实现

#各个激活函数实现
def no_activate(x):
return x

def relu(x):
return np.max(0, x)

def sigmoid(x):
return 1 / (1 + np.exp(-x))

def tan(x):
return np.tanh(x)

#各个激活函数的求偏导实现
def no_activate_derive(x):
return 1

def relu_derive(x):
result = np.zeros_like(x)
result[x > 0] = 1

return result

def sigmoid_derive(x):
return sigmoid(x)*(1-sigmoid(x))

def tan_derive(x):
return 1 - x**2


## 单层网络实现

#单层模型
class Layer:
def __init__(self, input_num, output_num, activate_func, activate_derive_func, weight = None, bias = None):
#初始化w参数
#self.weights = np.random.normal(loc=0, scale=1, size=(input_num, output_num))
self.weights  = np.random.randn(input_num, output_num) * np.sqrt(1/output_num)
self.bias = np.random.randn(1, output_num) * 0.1
#初始化b参数
#self.bias = np.zeros(shape=(output_num)).reshape(1, output_num)
#对于w的偏导
self.dw = None
#对于b的偏导
self.db = None
self.activate_func = activate_func
self.activate_derive_func = activate_derive_func
self.x = None
self.z = None
self.cache = None

#前向传播算法
def foward(self, input):
#缓存输入值
self.x = input
#计算z = wx+b
z = np.dot(input, self.weights) + self.bias
#缓存z值,后续反向传播中求导需要用到
self.z = z
#进行激活，并缓存激活值
activate_value = self.activate_func(z)

return activate_value

#反向传播算法
def backward(self, input):
#对z求导 (dz矩阵的shape为 1 * output_num)
dz = input * self.activate_derive_func(self.z)
print(dz.shape)
#对w进行求导(dw矩阵的shape为input_num * output_num)
self.dw = np.dot(self.x.T, dz)
#对b进行求导(db的shape为1 * output_num)
self.db = dz
#缓存本层的求导中间值，作为反向传播中下一层的输入值
self.cache = np.dot(dz, self.weights.T)

return self.cache

#更新w参数与b参数
def update(self, learning_rate):
self.weights -= learning_rate * self.dw
self.bias -= learning_rate * self.db


## 全连接神经网络模型实现

#神经网络模型
class NeuralNetwork:
def __init__(self, learning_rate):
self.layers = []
self.learning_rate = learning_rate

#增加一层网络
self.layers.append(layer)

#前向传导
def forwar_calcuate(self, x):
#逐层前向传播
for layer in self.layers:
x = layer.foward(x)
return x

#反向传播
def backward_calculate(self, x, y):
#先进行一遍前向传播
output = self.forwar_calcuate(x)
output = output - y

#反向遍历整个网络，对每一层做backward操作
for i in reversed(range(len(self.layers))):
layer = self.layers[i]
output = layer.backward(output)

#更新每一层的w参数和b参数，使其更接近真实值
for layer in self.layers:
layer.update(self.learning_rate)

#训练
def train(self, x_train, y_train, epochs):
#先将数据进行one-hot转换，方便训练
y_one_hot = np.zeros((y_train.shape[0], 2))
y_one_hot[np.arange(y_train.shape[0]), y_train] = 1
mses = []

#进行epochs次迭代训练
for i in range(epochs):
y_predict = self.forwar_calcuate(x_train)
#每次迭代之后都记录一下误差均值
mse = np.mean(np.square(y_one_hot - y_predict))
mses.append(mse)
#每迭代10次打印一遍误差均值
if i % 10 == 0:
print('epcho:{}, mse:{}'.format(i, mse))
#pass
#逐个数据进行训练（可优化为batch训练方式）
for j in range(len(x_train)):
self.backward_calculate(x_train[j].reshape((1,2)), y_one_hot[j].reshape((1,2)))

plt.plot(mses)

#打印模型预测准确率
def accurancy(self, x_test, y_test):
y_test_one_hot = np.zeros((y_test.shape[0], 2))
y_test_one_hot[np.arange(y_test.shape[0]), y_test] = 1
y_predict_one_hot = np.zeros_like(y_test_one_hot)

for i in range(len(x_test)):
y_predict = self.forwar_calcuate(x_test[i])
y_predict_one_hot[i][np.argmax(y_predict)] = 1

right_count = 0
for i in range(len(x_test)):
if (y_predict_one_hot[i] == y_test_one_hot[i]).all():
right_count += 1

print("network auccurency:{}".format(right_count / len(x_test)))


## 数据集加载

import numpy as np
import os
import matplotlib.pyplot as plt
#这里使用make_moons数据集
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
%matplotlib inline

SAMPLE_COUNT = 3000

#加载数据集（首次调用需要从远程下载数据集，会比较慢）
x, y = make_moons(n_samples=SAMPLE_COUNT, noise=0.2)
#这里将数据集分为训练集与测试集，训练时采用训练集数据进行训练，验证时采用测试集数据进行验证
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

#利用matplotlib绘制图形
def make_plot(x, y, plot_name, file_name = None):
plt.style.use('dark_background')
plt.figure(figsize=(16,12))
plt.title(plot_name, fontsize = 30)
plt.scatter(x[:, 0], x[:, 1], c = y.ravel(), s=40)
plt.show()


(x1(横轴), x2(纵轴))，能供通过神经网络实例准确预测出类别(黄色or紫色)。

## 进行训练与预测

#初始化神经网络中的每一层网络
layer1 = Layer(2, 25, sigmoid, sigmoid_derive)
layer2 = Layer(25, 50, sigmoid, sigmoid_derive)
layer3 = Layer(50, 25, sigmoid, sigmoid_derive)
layer4 = Layer(25, 2, sigmoid, sigmoid_derive)

#设置学习率为0.01
network = NeuralNetwork(0.01)

#构造神经网络(2 * 25 * 50 * 25 * 2 层神经网络模型，激活函数使用sigmoid激活函数)

#打印图像
make_plot(x_train, y_train, "moon plot")

#进行训练(迭代500次)
network.train(x_train, y_train, 500)
#利用测试集测试准确率
network.accurancy(x_test, y_test)


# 数学推导

## 梯度下降

L

o

s

s

=

(

y

t

)

2

/

2

Loss = (y-t)^2/2

Loss=(yt)2/2

w

=

w

η

L

o

s

s

w

η

w= w -eta *frac{partial Loss}{partial w} （eta为步长，也称作学习率）

w=wηwLossη

b

=

b

η

L

o

s

s

b

η

b= b -eta *frac{partial Loss}{partial b} （eta为步长，也称作学习率）

b=bηbLossη

    #更新w参数与b参数
def update(self, learning_rate):
self.weights -= learning_rate * self.dw
self.bias -= learning_rate * self.db


## 链式求导法则

y

x

1

=

y

x

n

x

n

x

n

1

.

.

.

.

.

.

x

2

x

1

frac{partial y}{partial x_1} = frac{partial y}{partial x_n} * frac{partial x_n}{partial x_{n-1}}*......* frac{partial x_2}{partial x_{1}}

x1y=xnyxn1xn......x1x2

z

1

=

w

1

x

+

b

1

z1 = w_{1}x+b1

z1=w1x+b1

a

1

=

a

c

t

i

v

a

t

e

(

z

1

)

a1 = activate(z1)

a1=activate(z1)

z

2

=

w

2

a

1

+

b

2

z_2 = w_2a_1 + b_2

z2=w2a1+b2

a

2

=

a

c

t

i

v

a

t

e

(

z

2

)

a_2 = activate(z_2)

a2=activate(z2)

z

3

=

w

3

a

2

+

b

3

z_3 = w_{3}a_2+b_3

z3=w3a2+b3

a

3

=

a

c

t

i

v

a

t

e

(

z

3

)

a_3 = activate(z_3)

a3=activate(z3)

L

o

s

s

=

(

a

3

t

)

2

/

2

Loss = (a_3-t)^2/2

Loss=(a3t)2/2

1

L

o

s

s

w

3

=

L

o

s

s

a

3

a

3

z

3

z

3

w

3

（1）frac{partial Loss}{partial w_3} = frac{partial Loss}{partial a_3} * frac{partial a_3}{partial z_3} * frac{partial z_3}{partial w_3}

1w3Loss=a3Lossz3a3w3z3
。其中右边的每一项我们都是可求的。

2

L

o

s

s

w

2

=

L

o

s

s

a

3

a

3

z

3

z

3

a

2

a

2

z

2

z

2

w

2

（2）frac{partial Loss}{partial w_2} = frac{partial Loss}{partial a_3} * frac{partial a_3}{partial z_3} * frac{partial z_3}{partial a_2} * frac{partial a_2}{partial z_2} * frac{partial z_2}{partial w_2}

2w2Loss=a3Lossz3a3a2z3z2a2w2z2

    #反向传播算法
def backward(self, input):
#对z求导 (dz矩阵的shape为 1 * output_num)
dz = input * self.activate_derive_func(self.z)
print(dz.shape)
#对w进行求导(dw矩阵的shape为input_num * output_num)
self.dw = np.dot(self.x.T, dz)
#对b进行求导(db的shape为1 * output_num)
self.db = dz
#缓存本层的求导中间值，作为反向传播中下一层的输入值
self.cache = np.dot(dz, self.weights.T)

return self.cache


THE END