lstm_demo.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import numpy as np
  2. from model.lstm import LstmLayer
  3. from model import IdentityActivator
  4. def data_set():
  5. x = [np.array([[1], [2], [3]]),
  6. np.array([[2], [3], [4]])]
  7. d = np.array([[1], [2]])
  8. return x, d
  9. def gradient_check():
  10. """ 梯度检查 """
  11. # 设计一个误差函数,取所有节点输出项之和
  12. error_function = lambda o: o.sum()
  13. lstm = LstmLayer(3, 2, 1e-3)
  14. # 计算forward值
  15. x, d = data_set()
  16. lstm.forward(x[0])
  17. lstm.forward(x[1])
  18. # 求取sensitivity map
  19. sensitivity_array = np.ones(lstm.h_list[-1].shape, dtype=np.float64)
  20. # 计算梯度
  21. lstm.backward(x[1], sensitivity_array, IdentityActivator())
  22. # 检查梯度
  23. epsilon = 10e-4
  24. for i in range(lstm.Wfh.shape[0]):
  25. for j in range(lstm.Wfh.shape[1]):
  26. lstm.Wfh[i, j] += epsilon
  27. lstm.reset_state()
  28. lstm.forward(x[0])
  29. lstm.forward(x[1])
  30. err1 = error_function(lstm.h_list[-1])
  31. lstm.Wfh[i, j] -= 2 * epsilon
  32. lstm.reset_state()
  33. lstm.forward(x[0])
  34. lstm.forward(x[1])
  35. err2 = error_function(lstm.h_list[-1])
  36. expect_grad = (err1 - err2) / (2 * epsilon)
  37. lstm.Wfh[i, j] += epsilon
  38. print('weights(%d,%d): expected - actural %.4e - %.4e' % (i, j, expect_grad, lstm.Wfh_grad[i, j]))
  39. return lstm
  40. def test():
  41. l = LstmLayer(3, 2, 1e-3)
  42. x, d = data_set()
  43. l.forward(x[0])
  44. l.forward(x[1])
  45. l.backward(x[1], d, IdentityActivator())
  46. return l
  47. if __name__ == '__main__':
  48. gradient_check()
  49. # test()