基于Levenberg-Marquardt训练算法的BP网络Python实现
生活随笔
收集整理的這篇文章主要介紹了
基于Levenberg-Marquardt训练算法的BP网络Python实现
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
經過一個多月的努力,終于完成了BP網絡,參考的資料為:
1、Training feed-forward networks with the Marquardt algorithm
2、The Levenberg-Marquardt method for nonlinear least squares curve-?tting problems
3、Neural Network Design
4、http://deeplearning.stanford.edu/wiki/index.php/UFLDL%E6%95%99%E7%A8%8B 中介紹的神經網絡部分
以下給出Python腳本:
?
import numpy as np from math import exp, pow from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt import sys import copy from scipy.linalg import norm, pinv class Layer:def __init__(self,w, b, neure_number, transfer_function, layer_index):self.transfer_function = transfer_functionself.neure_number = neure_numberself.layer_index = layer_indexself.w = wself.b = bclass NetStruct:def __init__(self, x, y, hidden_layers, activ_fun_list, performance_function = 'mse'):if len(hidden_layers) == len(activ_fun_list):activ_fun_list.append('line')self.active_fun_list = activ_fun_listself.performance_function = performance_functionx = np.array(x)y = np.array(y)if(x.shape[1] != y.shape[1]):print 'The dimension of x and y are not same.'sys.exit()self.x = xself.y = yinput_eles = self.x.shape[0]output_eles = self.y.shape[0]tmp = []tmp.append(input_eles)tmp.extend(hidden_layers)tmp.append(output_eles)self.hidden_layers = np.array(tmp)self.layer_num = len(self.hidden_layers)self.layers = []for i in range(0, len(self.hidden_layers)):if i == 0:self.layers.append(Layer([],[],\self.hidden_layers[i], 'none', i)) continuef = self.hidden_layers[i - 1]s = self.hidden_layers[i] self.layers.append(Layer(np.random.randn(s, f),np.random.randn(s, 1),\self.hidden_layers[i], activ_fun_list[i-1], i)) class Train:def __init__(self, net_struct, mu = 1e-3, beta = 10, iteration = 100, tol = 0.1):self.net_struct = net_structself.mu = muself.beta = betaself.iteration = iterationself.tol = toldef train(self, method = 'lm'):if(method == 'lm'):self.lm()def sim(self, x):self.net_struct.x = xself.forward()layer_num = len(self.net_struct.layers)predict = self.net_struct.layers[layer_num - 1].output_valreturn predictdef actFun(self, z, activ_type = 'sigm'):if activ_type == 'sigm': f = 1.0 / (1.0 + np.exp(-z))elif activ_type == 'tanh':f = (np.exp(z) + np.exp(-z)) / (np.exp(z) + np.exp(-z))elif activ_type == 'radb':f = np.exp(-z * z)elif activ_type == 'line':f = zreturn fdef actFunGrad(self, z, activ_type = 'sigm'):if activ_type == 'sigm':grad = self.actFun(z, activ_type) * (1.0 - self.actFun(z, activ_type))elif activ_type == 'tanh':grad = 1.0 - self.actFun(z, activ_type) * self.actFun(z, activ_type)elif activ_type == 'radb':grad = -2.0 * z * self.actFun(z, activ_type)elif activ_type == 'line':m = z.shape[0]n = z.shape[1]grad = np.ones((m, n))return graddef forward(self):layer_num = len(self.net_struct.layers)for i in range(0, layer_num):if i == 0:curr_layer = self.net_struct.layers[i]curr_layer.input_val = self.net_struct.xcurr_layer.output_val = self.net_struct.xcontinuebefore_layer = self.net_struct.layers[i - 1]curr_layer = self.net_struct.layers[i]curr_layer.input_val = curr_layer.w.dot(before_layer.output_val) + curr_layer.bcurr_layer.output_val = self.actFun(curr_layer.input_val, self.net_struct.active_fun_list[i - 1])def backward(self):layer_num = len(self.net_struct.layers)last_layer = self.net_struct.layers[layer_num - 1]last_layer.error = -self.actFunGrad(last_layer.input_val,self.net_struct.active_fun_list[layer_num - 2])layer_index = range(1, layer_num - 1)layer_index.reverse()for i in layer_index:curr_layer = self.net_struct.layers[i]curr_layer.error = (last_layer.w.transpose().dot(last_layer.error)) \* self.actFunGrad(curr_layer.input_val,self.net_struct.active_fun_list[i - 1])last_layer = curr_layerdef parDeriv(self):layer_num = len(self.net_struct.layers)for i in range(1, layer_num):befor_layer = self.net_struct.layers[i - 1]befor_input_val = befor_layer.output_val.transpose()curr_layer = self.net_struct.layers[i]curr_error = curr_layer.errorcurr_error = curr_error.reshape(curr_error.shape[0]*curr_error.shape[1], 1, order='F')row = curr_error.shape[0]col = befor_input_val.shape[1]a = np.zeros((row, col))num = befor_input_val.shape[0]neure_number = curr_layer.neure_numberfor i in range(0, num):a[neure_number*i:neure_number*i + neure_number,:] = \np.repeat([befor_input_val[i,:]],neure_number,axis = 0)tmp_w_par_deriv = curr_error * acurr_layer.w_par_deriv = np.zeros((num, befor_layer.neure_number * curr_layer.neure_number))for i in range(0, num):tmp = tmp_w_par_deriv[neure_number*i:neure_number*i + neure_number,:]tmp = tmp.reshape(tmp.shape[0] * tmp.shape[1], order='C')curr_layer.w_par_deriv[i, :] = tmpcurr_layer.b_par_deriv = curr_layer.error.transpose()def jacobian(self):layers = self.net_struct.hidden_layersrow = self.net_struct.x.shape[1]col = 0for i in range(0, len(layers) - 1):col = col + layers[i] * layers[i + 1] + layers[i + 1]j = np.zeros((row, col))layer_num = len(self.net_struct.layers)index = 0for i in range(1, layer_num):curr_layer = self.net_struct.layers[i]w_col = curr_layer.w_par_deriv.shape[1]b_col = curr_layer.b_par_deriv.shape[1]j[:, index : index + w_col] = curr_layer.w_par_derivindex = index + w_colj[:, index : index + b_col] = curr_layer.b_par_derivindex = index + b_colreturn jdef gradCheck(self):W1 = self.net_struct.layers[1].wb1 = self.net_struct.layers[1].bn = self.net_struct.layers[1].neure_numberW2 = self.net_struct.layers[2].wb2 = self.net_struct.layers[2].bx = self.net_struct.xp = []p.extend(W1.reshape(1,W1.shape[0]*W1.shape[1],order = 'C')[0])p.extend(b1.reshape(1,b1.shape[0]*b1.shape[1],order = 'C')[0])p.extend(W2.reshape(1,W2.shape[0]*W2.shape[1],order = 'C')[0])p.extend(b2.reshape(1,b2.shape[0]*b2.shape[1],order = 'C')[0])old_p = pjac = []for i in range(0, x.shape[1]):xi = np.array([x[:,i]])xi = xi.transpose()ji = []for j in range(0, len(p)):W1 = np.array(p[0:2*n]).reshape(n,2,order='C')b1 = np.array(p[2*n:2*n+n]).reshape(n,1,order='C')W2 = np.array(p[3*n:4*n]).reshape(1,n,order='C')b2 = np.array(p[4*n:4*n+1]).reshape(1,1,order='C')z2 = W1.dot(xi) + b1a2 = self.actFun(z2)z3 = W2.dot(a2) + b2h1 = self.actFun(z3)p[j] = p[j] + 0.00001W1 = np.array(p[0:2*n]).reshape(n,2,order='C')b1 = np.array(p[2*n:2*n+n]).reshape(n,1,order='C')W2 = np.array(p[3*n:4*n]).reshape(1,n,order='C')b2 = np.array(p[4*n:4*n+1]).reshape(1,1,order='C')z2 = W1.dot(xi) + b1a2 = self.actFun(z2)z3 = W2.dot(a2) + b2h = self.actFun(z3)g = (h[0][0]-h1[0][0])/0.00001ji.append(g)jac.append(ji)p = old_preturn jacdef jjje(self):layer_number = self.net_struct.layer_nume = self.net_struct.y - \self.net_struct.layers[layer_number - 1].output_vale = e.transpose()j = self.jacobian()#check gradient#j1 = -np.array(self.gradCheck())#jk = j.reshape(1,j.shape[0]*j.shape[1])#jk1 = j1.reshape(1,j1.shape[0]*j1.shape[1])#plt.plot(jk[0])#plt.plot(jk1[0],'.')#plt.show()jj = j.transpose().dot(j)je = -j.transpose().dot(e)return[jj, je]def lm(self):mu = self.mubeta = self.betaiteration = self.iterationtol = self.toly = self.net_struct.yself.forward()pred = self.net_struct.layers[self.net_struct.layer_num - 1].output_valpref = self.perfermance(y, pred)for i in range(0, iteration):print 'iter:',i, 'error:', pref#1) step 1: if(pref < tol):break#2) step 2:self.backward()self.parDeriv()[jj, je] = self.jjje()while(1):#3) step 3: A = jj + mu * np.diag(np.ones(jj.shape[0]))delta_w_b = pinv(A).dot(je)#4) step 4:old_net_struct = copy.deepcopy(self.net_struct)self.updataNetStruct(delta_w_b)self.forward()pred1 = self.net_struct.layers[self.net_struct.layer_num - 1].output_valpref1 = self.perfermance(y, pred1)if (pref1 < pref):mu = mu / betapref = pref1breakmu = mu * betaself.net_struct = copy.deepcopy(old_net_struct)def updataNetStruct(self, delta_w_b):layer_number = self.net_struct.layer_numindex = 0for i in range(1, layer_number):before_layer = self.net_struct.layers[i - 1]curr_layer = self.net_struct.layers[i]w_num = before_layer.neure_number * curr_layer.neure_numberb_num = curr_layer.neure_numberw = delta_w_b[index : index + w_num]w = w.reshape(curr_layer.neure_number, before_layer.neure_number, order='C')index = index + w_numb = delta_w_b[index : index + b_num]index = index + b_numcurr_layer.w += wcurr_layer.b += bdef perfermance(self, y, pred):error = y - predreturn norm(error) / len(y) def plotSamples(self, n = 40):x = np.array([np.linspace(0, 3, n)])x = x.repeat(n, axis = 0)y = x.transpose()z = np.zeros((n, n))for i in range(0, x.shape[0]):for j in range(0, x.shape[1]):z[i][j] = self.sampleFun(x[i][j], y[i][j])fig = plt.figure()ax = fig.gca(projection='3d')surf = ax.plot_surface(x, y, z, cmap='autumn', cstride=2, rstride=2)ax.set_xlabel("X-Label")ax.set_ylabel("Y-Label")ax.set_zlabel("Z-Label")plt.show() def sinSamples(n):x = np.array([np.linspace(-0.5, 0.5, n)])#x = x.repeat(n, axis = 0)y = x + 0.2z = np.zeros((n, 1))for i in range(0, x.shape[1]):z[i] = np.sin(x[0][i] * y[0][i])X = np.zeros((n, 2))n = 0for xi, yi in zip(x.transpose(), y.transpose()):X[n][0] = xiX[n][1] = yin = n + 1return X,z def peaksSamples(n):x = np.array([np.linspace(-3, 3, n)])x = x.repeat(n, axis = 0)y = x.transpose()z = np.zeros((n, n))for i in range(0, x.shape[0]):for j in range(0, x.shape[1]):z[i][j] = sampleFun(x[i][j], y[i][j])X = np.zeros((n*n, 2))x_list = x.reshape(n*n,1 )y_list = y.reshape(n*n,1)z_list = z.reshape(n*n,1)n = 0for xi, yi in zip(x_list, y_list):X[n][0] = xiX[n][1] = yin = n + 1return X,z_list.transpose() def sampleFun(x, y):z = 3*pow((1-x),2) * exp(-(pow(x,2)) - pow((y+1),2)) \- 10*(x/5 - pow(x, 3) - pow(y, 5)) * exp(-pow(x, 2) - pow(y, 2)) \- 1/3*exp(-pow((x+1), 2) - pow(y, 2)) return z if __name__ == '__main__':hidden_layers = [10,10] #設置網絡層數,共兩層,每層10個神經元activ_fun_list = ['sigm','sigm']#設置隱層的激活函數類型,可以設置為tanh,radb,tanh,line類型,如果不顯式的設置最后一層為line [X, z] = peaksSamples(20) #產生訓練數據點X = X.transpose()bp = NetStruct(X, z, hidden_layers, activ_fun_list) #初始化網絡信息tr = Train(bp) #初始化訓練網絡的類tr.train() #訓練[XX, z0] = peaksSamples(40) #產生測試數據XX = XX.transpose()z1 = tr.sim(XX) #用訓練好的神經網絡預測數據,z1為預測結果fig = plt.figure()ax = fig.add_subplot(111)ax.plot(z0[0]) #真值ax.plot(z1[0],'r.') #預測值plt.legend((r'real data', r'predict data'))plt.show()
以上代碼計算的結果如下圖,由于初始值等原因的影響偶爾收斂效果會變差,不過大多數時候都可以收斂到下圖的結果,以后再改進,歡迎指正。
?
?
轉載于:https://www.cnblogs.com/jiangu66/p/3194175.html
總結
以上是生活随笔為你收集整理的基于Levenberg-Marquardt训练算法的BP网络Python实现的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 黑客与画家 第十四章
- 下一篇: ADS的go to命令