纯手撸一个rnn
原文
import copy, numpy as npnp.random.seed(0)# sigmoid函數 def sigmoid(x):output = 1 / (1 + np.exp(-x))return output# sigmoid導數 def sigmoid_output_to_derivative(output):return output * (1 - output)# 訓練數據生成 int2binary = {} binary_dim = 8largest_number = pow(2, binary_dim) binary = np.unpackbits(np.array([range(largest_number)], dtype=np.uint8).T, axis=1) for i in range(largest_number):int2binary[i] = binary[i]# 初始化一些變量 alpha = 0.1 #學習率 input_dim = 2 #輸入的大小 hidden_dim = 8 #隱含層的大小 output_dim = 1 #輸出層的大小# 隨機初始化權重 synapse_0 = 2 * np.random.random((hidden_dim, input_dim)) - 1 #(8, 2) synapse_1 = 2 * np.random.random((output_dim, hidden_dim)) - 1 #(1, 8) synapse_h = 2 * np.random.random((hidden_dim, hidden_dim)) - 1 #(8, 8)synapse_0_update = np.zeros_like(synapse_0) #(8, 2) synapse_1_update = np.zeros_like(synapse_1) #(1, 8) synapse_h_update = np.zeros_like(synapse_h) #(8, 8)# 開始訓練 for j in range(100000):# 二進制相加a_int = np.random.randint(largest_number / 2) # 隨機生成相加的數a = int2binary[a_int] # 映射成二進制值b_int = np.random.randint(largest_number / 2) # 隨機生成相加的數b = int2binary[b_int] # 映射成二進制值# 真實的答案c_int = a_int + b_int #結果c = int2binary[c_int] #映射成二進制值# 待存放預測值d = np.zeros_like(c)overallError = 0layer_2_deltas = list() #輸出層的誤差layer_2_values = list() #第二層的值(輸出的結果)layer_1_values = list() #第一層的值(隱含狀態)layer_1_values.append(copy.deepcopy(np.zeros((hidden_dim, 1)))) #第一個隱含狀態需要0作為它的上一個隱含狀態#前向傳播for i in range(binary_dim):X = np.array([[a[binary_dim - i - 1], b[binary_dim - i - 1]]]).T #(2,1)y = np.array([[c[binary_dim - i - 1]]]).T #(1,1)layer_1 = sigmoid(np.dot(synapse_h, layer_1_values[-1]) + np.dot(synapse_0, X)) #(1,1)layer_1_values.append(copy.deepcopy(layer_1)) #(8,1)layer_2 = sigmoid(np.dot(synapse_1, layer_1)) #(1,1)error = -(y-layer_2) #使用平方差作為損失函數layer_delta2 = error * sigmoid_output_to_derivative(layer_2) #(1,1)layer_2_deltas.append(copy.deepcopy(layer_delta2))d[binary_dim - i - 1] = np.round(layer_2[0][0])future_layer_1_delta = np.zeros((hidden_dim, 1))#反向傳播for i in range(binary_dim):X = np.array([[a[i], b[i]]]).Tprev_layer_1 = layer_1_values[-i-2]layer_1 = layer_1_values[-i-1]layer_delta2 = layer_2_deltas[-i-1]layer_delta1 = np.multiply(np.add(np.dot(synapse_h.T, future_layer_1_delta),np.dot(synapse_1.T, layer_delta2)), sigmoid_output_to_derivative(layer_1))synapse_0_update += np.dot(layer_delta1, X.T)synapse_h_update += np.dot(layer_delta1, prev_layer_1.T)synapse_1_update += np.dot(layer_delta2, layer_1.T)future_layer_1_delta = layer_delta1synapse_0 -= alpha * synapse_0_updatesynapse_h -= alpha * synapse_h_updatesynapse_1 -= alpha * synapse_1_updatesynapse_0_update *= 0synapse_1_update *= 0synapse_h_update *= 0# 驗證結果if (j % 100 == 0):print("Error:" + str(overallError))print("Pred:" + str(d))print("True:" + str(c))out = 0for index, x in enumerate(reversed(d)):out += x * pow(2, index)print(str(a_int) + " + " + str(b_int) + " = " + str(out))print("------------")總結
- 上一篇: python enumerate
- 下一篇: 用 pytorch 实现 一个rnn