1.3 程序示例--梯度下降-机器学习笔记-斯坦福吴恩达教授
生活随笔
收集整理的這篇文章主要介紹了
1.3 程序示例--梯度下降-机器学习笔记-斯坦福吴恩达教授
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
回歸模塊
回歸模塊中提供了批量梯度下降和隨機梯度下降兩種學習策略來訓練模型:
# coding: utf-8 # linear_regression/regression.py import numpy as np import matplotlib as plt import timedef exeTime(func):""" 耗時計算裝飾器"""def newFunc(*args, **args2):t0 = time.time()back = func(*args, **args2)return back, time.time() - t0return newFuncdef loadDataSet(filename):""" 讀取數(shù)據(jù)從文件中獲取數(shù)據(jù),在《機器學習實戰(zhàn)中》,數(shù)據(jù)格式如下"feature1 TAB feature2 TAB feature3 TAB label"Args:filename: 文件名Returns:X: 訓練樣本集矩陣y: 標簽集矩陣"""numFeat = len(open(filename).readline().split('\t')) - 1X = []y = []file = open(filename)for line in file.readlines():lineArr = []curLine = line.strip().split('\t')for i in range(numFeat):lineArr.append(float(curLine[i]))X.append(lineArr)y.append(float(curLine[-1]))return np.mat(X), np.mat(y).Tdef h(theta, x):"""預測函數(shù)Args:theta: 相關(guān)系數(shù)矩陣x: 特征向量Returns:預測結(jié)果"""return (theta.T*x)[0,0]def J(theta, X, y):"""代價函數(shù)Args:theta: 相關(guān)系數(shù)矩陣X: 樣本集矩陣y: 標簽集矩陣Returns:預測誤差(代價)"""m = len(X)return (X*theta-y).T*(X*theta-y)/(2*m)@exeTime def bgd(rate, maxLoop, epsilon, X, y):"""批量梯度下降法Args:rate: 學習率maxLoop: 最大迭代次數(shù)epsilon: 收斂精度X: 樣本矩陣y: 標簽矩陣Returns:(theta, errors, thetas), timeConsumed"""m,n = X.shape# 初始化thetatheta = np.zeros((n,1))count = 0converged = Falseerror = float('inf')errors = []thetas = {}for j in range(n):thetas[j] = [theta[j,0]]while count<=maxLoop:if(converged):breakcount = count + 1for j in range(n):deriv = (y-X*theta).T*X[:, j]/mtheta[j,0] = theta[j,0]+rate*derivthetas[j].append(theta[j,0])error = J(theta, X, y)errors.append(error[0,0])# 如果已經(jīng)收斂if(error < epsilon):converged = Truereturn theta,errors,thetas@exeTime def sgd(rate, maxLoop, epsilon, X, y):"""隨機梯度下降法Args:rate: 學習率maxLoop: 最大迭代次數(shù)epsilon: 收斂精度X: 樣本矩陣y: 標簽矩陣Returns:(theta, error, thetas), timeConsumed"""m,n = X.shape# 初始化thetatheta = np.zeros((n,1))count = 0converged = Falseerror = float('inf')errors = []thetas = {}for j in range(n):thetas[j] = [theta[j,0]]while count <= maxLoop:if(converged):breakcount = count + 1errors.append(float('inf'))for i in range(m):if(converged):breakdiff = y[i,0]-h(theta, X[i].T)for j in range(n):theta[j,0] = theta[j,0] + rate*diff*X[i, j]thetas[j].append(theta[j,0])error = J(theta, X, y)errors[-1] = error[0,0]# 如果已經(jīng)收斂if(error < epsilon):converged = Truereturn theta, errors, thetas代碼結(jié)合注釋應(yīng)該能看懂,借助于Numpy,只是復現(xiàn)了課上的公式。
測試程序
bgd測試程序
# coding: utf-8 # linear_regression/test_bgd.py import regression from matplotlib import cm from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt import matplotlib.ticker as mtick import numpy as npif **name** == "**main**":X, y = regression.loadDataSet('data/ex1.txt');m,n = X.shapeX = np.concatenate((np.ones((m,1)), X), axis=1)rate = 0.01maxLoop = 1500epsilon =0.01result, timeConsumed = regression.bgd(rate, maxLoop, epsilon, X, y)theta, errors, thetas = result# 繪制擬合曲線fittingFig = plt.figure()title = 'bgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed)ax = fittingFig.add_subplot(111, title=title)trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:,0].flatten().A[0])xCopy = X.copy()xCopy.sort(0)yHat = xCopy*thetafittingLine, = ax.plot(xCopy[:,1], yHat, color='g')ax.set_xlabel('Population of City in 10,000s')ax.set_ylabel('Profit in $10,000s')plt.legend([trainingSet, fittingLine], ['Training Set', 'Linear Regression'])plt.show()# 繪制誤差曲線errorsFig = plt.figure()ax = errorsFig.add_subplot(111)ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.4f'))ax.plot(range(len(errors)), errors)ax.set_xlabel('Number of iterations')ax.set_ylabel('Cost J')plt.show()# 繪制能量下降曲面size = 100theta0Vals = np.linspace(-10,10, size)theta1Vals = np.linspace(-2, 4, size)JVals = np.zeros((size, size))for i in range(size):for j in range(size):col = np.matrix([[theta0Vals[i]], [theta1Vals[j]]])JVals[i,j] = regression.J(col, X, y)theta0Vals, theta1Vals = np.meshgrid(theta0Vals, theta1Vals)JVals = JVals.TcontourSurf = plt.figure()ax = contourSurf.gca(projection='3d')ax.plot_surface(theta0Vals, theta1Vals, JVals, rstride=2, cstride=2, alpha=0.3,cmap=cm.rainbow, linewidth=0, antialiased=False)ax.plot(thetas[0], thetas[1], 'rx')ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')ax.set_zlabel(r'$J(\theta)$')plt.show()# 繪制能量輪廓contourFig = plt.figure()ax = contourFig.add_subplot(111)ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')CS = ax.contour(theta0Vals, theta1Vals, JVals, np.logspace(-2,3,20))plt.clabel(CS, inline=1, fontsize=10)# 繪制最優(yōu)解ax.plot(theta[0,0], theta[1,0], 'rx', markersize=10, linewidth=2)# 繪制梯度下降過程ax.plot(thetas[0], thetas[1], 'rx', markersize=3, linewidth=1)ax.plot(thetas[0], thetas[1], 'r-')plt.show()擬合狀況:
可以看到,bgd 運行的并不慢,這是因為在 regression 程序中,我們采用了向量形式計算 θθθ ,計算機會通過并行計算的手段來優(yōu)化速度。
誤差隨迭代次數(shù)的關(guān)系:
誤差函數(shù)的下降曲面:
梯度下降過程:
sgd測試
# coding: utf-8 # linear_regression/test_sgd.py import regression from matplotlib import cm from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt import matplotlib.ticker as mtick import numpy as npif **name** == "**main**":X, y = regression.loadDataSet('data/ex1.txt');m,n = X.shapeX = np.concatenate((np.ones((m,1)), X), axis=1)rate = 0.01maxLoop = 100epsilon =0.01result, timeConsumed = regression.sgd(rate, maxLoop, epsilon, X, y)theta, errors, thetas = result# 繪制擬合曲線fittingFig = plt.figure()title = 'sgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed)ax = fittingFig.add_subplot(111, title=title)trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:,0].flatten().A[0])xCopy = X.copy()xCopy.sort(0)yHat = xCopy*thetafittingLine, = ax.plot(xCopy[:,1], yHat, color='g')ax.set_xlabel('Population of City in 10,000s')ax.set_ylabel('Profit in $10,000s')plt.legend([trainingSet, fittingLine], ['Training Set', 'Linear Regression'])plt.show()# 繪制誤差曲線errorsFig = plt.figure()ax = errorsFig.add_subplot(111)ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.4f'))ax.plot(range(len(errors)), errors)ax.set_xlabel('Number of iterations')ax.set_ylabel('Cost J')plt.show()# 繪制能量下降曲面size = 100theta0Vals = np.linspace(-10,10, size)theta1Vals = np.linspace(-2, 4, size)JVals = np.zeros((size, size))for i in range(size):for j in range(size):col = np.matrix([[theta0Vals[i]], [theta1Vals[j]]])JVals[i,j] = regression.J(col, X, y)theta0Vals, theta1Vals = np.meshgrid(theta0Vals, theta1Vals)JVals = JVals.TcontourSurf = plt.figure()ax = contourSurf.gca(projection='3d')ax.plot_surface(theta0Vals, theta1Vals, JVals, rstride=8, cstride=8, alpha=0.3,cmap=cm.rainbow, linewidth=0, antialiased=False)ax.plot(thetas[0], thetas[1], 'rx')ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')ax.set_zlabel(r'$J(\theta)$')plt.show()# 繪制能量輪廓contourFig = plt.figure()ax = contourFig.add_subplot(111)ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')CS = ax.contour(theta0Vals, theta1Vals, JVals, np.logspace(-2,3,20))plt.clabel(CS, inline=1, fontsize=10)# 繪制最優(yōu)解ax.plot(theta[0,0], theta[1,0], 'rx', markersize=10, linewidth=2)# 繪制梯度下降過程ax.plot(thetas[0], thetas[1], 'r', linewidth=1)plt.show()擬合狀況:
誤差隨迭代次數(shù)的關(guān)系:
梯度下降過程:
在學習率為 α=0.01\alpha = 0.01α=0.01 時,隨機梯度下降法出現(xiàn)了非常明顯的抖動,同時,隨機梯度下降法的速度優(yōu)勢也并未在此得到體現(xiàn),一是樣本容量不大,二是其自身很難通過并行計算去優(yōu)化速度。
總結(jié)
以上是生活随笔為你收集整理的1.3 程序示例--梯度下降-机器学习笔记-斯坦福吴恩达教授的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 1.2 线性回归与梯度下降-机器学习笔记
- 下一篇: 1.4 正规方程-机器学习笔记-斯坦福吴