neural_network_vec.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. import numpy as np
  2. from model import SigmoidActivator
  3. class FullConnectedLayer(object):
  4. """ 全连接层实现类 """
  5. def __init__(self, input_size, output_size, activator):
  6. """初始化
  7. :param input_size: 本层输入向量的维度
  8. :param output_size: 本层输出向量的维度
  9. :param activator: 激活函数
  10. """
  11. self.input_size = input_size
  12. self.output_size = output_size
  13. self.activator = activator
  14. self.W = np.random.uniform(-.1, .1, (output_size, input_size)) # 权重数组W
  15. self.b = np.zeros((output_size, 1)) # 偏置项b
  16. self.output = np.zeros((output_size, 1)) # 输出向量
  17. def forward(self, input_array):
  18. """ 前向计算
  19. 输出向量a = sigmoid(W * 输入向量x) ,其中 偏置项b看为1 * w
  20. :param input_array: 输入向量,维度必须等于input_size
  21. :return:
  22. """
  23. self.input = input_array
  24. self.output = self.activator.forward(np.dot(self.W, input_array) + self.b)
  25. def backward(self, delta_array):
  26. """ 反向计算W和b的梯度
  27. l层误差项 = l层输出向量a * (1 - l层输出向量a) * 矩阵W转置 * (l的上层误差项)
  28. :param delta_array: 从上一层传递过来的误差项
  29. :return:
  30. """
  31. self.delta = self.activator.backward(self.input) * np.dot(self.W.T, delta_array)
  32. self.W_grad = np.dot(delta_array, self.input.T) # w的梯度
  33. self.b_grad = delta_array # b的梯度
  34. def update(self, rate):
  35. """使用梯度下降更新权重
  36. :param rate: 速率
  37. :return:
  38. """
  39. self.W += rate * self.W_grad
  40. self.b += rate * self.b_grad
  41. class Network(object):
  42. """ 神经网络类 """
  43. def __init__(self, layers):
  44. """ 初始化 """
  45. self.layers = []
  46. for i in range(len(layers) - 1):
  47. self.layers.append(FullConnectedLayer(layers[i], layers[i + 1], SigmoidActivator()))
  48. def predict(self, x):
  49. """ 预测样本
  50. :param x: 样本
  51. :return:
  52. """
  53. output = x
  54. # 输出等于下层的输入
  55. for layer in self.layers:
  56. layer.forward(output)
  57. output = layer.output
  58. return output
  59. def fix(self, x, y, rate, epoch):
  60. """ 训练模型
  61. :param x: 样本
  62. :param y: 样本标签
  63. :param rate: 速率
  64. :param epoch: 训练轮数
  65. :return:
  66. """
  67. for _ in range(epoch):
  68. for i in range(len(x)):
  69. self.__train_one_sample(x[i], y[i], rate)
  70. def __train_one_sample(self, x, y, rate):
  71. """ 用一个样本训练网络
  72. :param x: 样本
  73. :param y: 样本标签
  74. :param rate: 速率
  75. :return:
  76. """
  77. self.predict(x)
  78. self.__calc_delta(y)
  79. self.__update_W(rate)
  80. def __calc_delta(self, y):
  81. """计算每层误差项
  82. 输出层误差项公式: y * (1-y) * (t-y) y是输出值 t是实际值
  83. :param y: 样本标签
  84. :return:
  85. """
  86. delta = self.layers[-1].activator.backward(self.layers[-1].output) * (y - self.layers[-1].output)
  87. for layer in self.layers[::-1]:
  88. layer.backward(delta)
  89. delta = layer.delta
  90. def __update_W(self, rate):
  91. """ 更新权重矩阵
  92. :param rate: 速率
  93. :return:
  94. """
  95. for layer in self.layers:
  96. layer.update(rate)
  97. def gradient_check(self, x, y):
  98. """梯度检查
  99. :param x: 样本的特征
  100. :param y: 样本的标签
  101. :return:
  102. """
  103. # 获取网络在当前样本下每个连接的梯度
  104. self.predict(x)
  105. self.__calc_delta(y)
  106. # 检查梯度
  107. epsilon = 10e-4
  108. for layer in self.layers:
  109. for i in range(layer.W.shape[0]):
  110. for j in range(layer.W.shape[1]):
  111. # layer.W[i, j] 为获取的指定梯度
  112. # 加一个很小的值,计算网络的误差
  113. layer.W[i, j] += epsilon
  114. output = self.predict(x)
  115. err1 = self.__loss(y, output)
  116. # 减去一个很小的值,计算网络的误差
  117. layer.W[i, j] -= 2 * epsilon # 刚才加过了一次,因此这里需要减去2倍
  118. output = self.predict(x)
  119. err2 = self.__loss(y, output)
  120. # 算期望的梯度值
  121. expect_grad = (err1 - err2) / (2 * epsilon)
  122. # 刚减了,所以要加回去等于原来梯度
  123. layer.W[i, j] += epsilon
  124. print('W(%d,%d): expected: %.4e , actural: %.4e' % (i, j, expect_grad, layer.W_grad[i, j]))
  125. def __loss(self, output, label):
  126. """损失值 平方差
  127. :param output: 预测值
  128. :param label: 实际值
  129. :return:
  130. """
  131. return 0.5 * ((label - output) * (label - output)).sum()