12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- import numpy as np
- from model.lstm import LstmLayer
- from model import IdentityActivator
- def data_set():
- x = [np.array([[1], [2], [3]]),
- np.array([[2], [3], [4]])]
- d = np.array([[1], [2]])
- return x, d
- def gradient_check():
- """ 梯度检查 """
- # 设计一个误差函数,取所有节点输出项之和
- error_function = lambda o: o.sum()
- lstm = LstmLayer(3, 2, 1e-3)
- # 计算forward值
- x, d = data_set()
- lstm.forward(x[0])
- lstm.forward(x[1])
- # 求取sensitivity map
- sensitivity_array = np.ones(lstm.h_list[-1].shape, dtype=np.float64)
- # 计算梯度
- lstm.backward(x[1], sensitivity_array, IdentityActivator())
- # 检查梯度
- epsilon = 10e-4
- for i in range(lstm.Wfh.shape[0]):
- for j in range(lstm.Wfh.shape[1]):
- lstm.Wfh[i, j] += epsilon
- lstm.reset_state()
- lstm.forward(x[0])
- lstm.forward(x[1])
- err1 = error_function(lstm.h_list[-1])
- lstm.Wfh[i, j] -= 2 * epsilon
- lstm.reset_state()
- lstm.forward(x[0])
- lstm.forward(x[1])
- err2 = error_function(lstm.h_list[-1])
- expect_grad = (err1 - err2) / (2 * epsilon)
- lstm.Wfh[i, j] += epsilon
- print('weights(%d,%d): expected - actural %.4e - %.4e' % (i, j, expect_grad, lstm.Wfh_grad[i, j]))
- return lstm
- def test():
- l = LstmLayer(3, 2, 1e-3)
- x, d = data_set()
- l.forward(x[0])
- l.forward(x[1])
- l.backward(x[1], d, IdentityActivator())
- return l
- if __name__ == '__main__':
- gradient_check()
- # test()
|