机器学习Sklearn实战——极限森林、梯度提升树算法
生活随笔
收集整理的這篇文章主要介紹了
机器学习Sklearn实战——极限森林、梯度提升树算法
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
極限森林
from sklearn.ensemble import ExtraTreesClassifier,RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import cross_val_score from sklearn import datasets import numpy as np import matplotlib.pyplot as plt #決策樹,進(jìn)行裂分時候,根據(jù)信息增益最大進(jìn)行列分 #極限森林 1、樣本隨機(jī) 2、分裂條件隨機(jī)(不是最好的裂分條件) #像在隨機(jī)森林中一樣,使用候選特征的隨機(jī)子集,但不是尋找最有區(qū)別的閾值 #而是為每個候選特征隨機(jī)繪制閾值 #并選擇這些隨機(jī)生成的閾值中的最佳閾值作為劃分規(guī)則X,y = datasets.load_wine(True) clf = DecisionTreeClassifier() cross_val_score(clf,X,y,cv=6,scoring="accuracy").mean()forest = RandomForestClassifier(n_estimators=100) cross_val_score(forest,X,y,cv=6,scoring="accuracy").mean()extra = ExtraTreesClassifier(n_estimators=100) cross_val_score(extra,X,y,cv=6,scoring="accuracy").mean()結(jié)果:
0.86532567049808420.97777777777777790.9833333333333334梯度提升樹的使用
import numpy as np from sklearn.ensemble import GradientBoostingClassifier from sklearn.model_selection import train_test_split X,y = datasets.load_iris(True) X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2) gbdt = GradientBoostingClassifier() gbdt.fit(X_train,y_train) gbdt.score(X_test,y_test)結(jié)果:
0.9666666666666667 import numpy as np import matplotlib.pyplot as plt#回歸時分類的極限思想 #分類的類別多到一定程度,那么就是回歸 from sklearn.ensemble import GradientBoostingClassifier,GradientBoostingRegressor from sklearn import tree# X數(shù)據(jù):上網(wǎng)時間和購物金額 # y目標(biāo):14(高一),16(高三),24(大學(xué)畢業(yè)),26(工作兩年) X = np.array([[800,3],[1200,1],[1800,4],[2500,2]]) y = np.array([14,16,24,26]) gbdt = GradientBoostingRegressor(n_estimators=10) gbdt.fit(X,y)gbdt.predict(X)結(jié)果:
array([16.09207064, 17.39471376, 22.60528624, 23.90792936]) plt.rcParams["font.sans-serif"]="KaiTi" plt.figure(figsize=(9,6)) _ = tree.plot_tree(gbdt[0,0],filled=True,feature_names=["消費(fèi)","上網(wǎng)"])friedman_mse = ((y[:2]-y[:2].mean())**2).mean() =1
value是14,16,24,26和20的差,即殘差,殘差越小——>越好——>越準(zhǔn)確
plt.rcParams["font.sans-serif"]="KaiTi" plt.figure(figsize=(9,6)) _ = tree.plot_tree(gbdt[1,0],filled=True,feature_names=["消費(fèi)","上網(wǎng)"]) #learning_rate = 0.1 gbdt1 = np.array([-6,-4,6,4]) #梯度提升 學(xué)習(xí)率0.1 gbdt1 - gbdt1*0.1結(jié)果:
array([-5.4, -3.6, 5.4, 3.6]) #learning_rate = 0.1 gbdt2 = np.array([-5.4,-3.6,5.4,3.6]) #梯度提升 學(xué)習(xí)率0.1 gbdt2 - gbdt2*0.1?結(jié)果:
array([-4.86, -3.24, 4.86, 3.24]) plt.rcParams["font.sans-serif"]="KaiTi" plt.figure(figsize=(9,6)) _ = tree.plot_tree(gbdt[2,0],filled=True,feature_names=["消費(fèi)","上網(wǎng)"])?最后一棵樹
plt.rcParams["font.sans-serif"]="KaiTi" plt.figure(figsize=(9,6)) _ = tree.plot_tree(gbdt[-1,0],filled=True,feature_names=["消費(fèi)","上網(wǎng)"]) #learning_rate = 0.1 gbdt3 = np.array([-2.325,-1.55,2.325,1.55]) #梯度提升 學(xué)習(xí)率0.1 gbdt3 - gbdt3*0.1?結(jié)果:
array([-2.0925,-1.395,2.0925,1.395]) array([-2.0925,-1.395,1.395,2.0925])14,16,24,26下減上
16.0925,17.395,22.605,23.9075
gbdt.predict(X)結(jié)果:
array([16.09207064, 17.39471376, 22.60528624, 23.90792936])梯度上升梯度下降
下降——減法求最小值;上升——加法求最大值
import numpy as np import matplotlib.pyplot as plt f = lambda x:(x-3)**2 + 2.5*x -7.5 f#導(dǎo)數(shù) = 梯度 2(x-3)+2.5 = 0 x = 1.75x = np.linspace(-2,5,100) y = f(x) plt.plot(x,y) import numpy as np import matplotlib.pyplot as plt f = lambda x:(x-3)**2 + 2.5*x -7.5 f#導(dǎo)數(shù) = 梯度x = np.linspace(-2,5,100) y = f(x) plt.plot(x,y)learning_rate = 0.1#導(dǎo)數(shù)函數(shù) d = lambda x:2*(x-3) + 2.5 min_value = np.random.randint(-3,5,size=1)[0]print("---------------",min_value) #記錄數(shù)據(jù)更新了,原來的值,上一步的值,退出條件 min_value_last = min_value +0.1 tol = 0.0001count = 0 while True:if np.abs(min_value-min_value_last)<tol:break #梯度下降min_value_last = min_value #更新值:梯度下降min_value = min_value - learning_rate*d(min_value)print("+++++++++++++++++%d"%(count),min_value)count = count + 1 print("****************",min_value)結(jié)果:
----------------- 4 +++++++++++++++++0 3.55 +++++++++++++++++1 3.19 +++++++++++++++++2 2.902 +++++++++++++++++3 2.6716 +++++++++++++++++4 2.48728 +++++++++++++++++5 2.339824 +++++++++++++++++6 2.2218592 +++++++++++++++++7 2.12748736 +++++++++++++++++8 2.051989888 +++++++++++++++++9 1.9915919104 +++++++++++++++++10 1.94327352832 +++++++++++++++++11 1.904618822656 +++++++++++++++++12 1.8736950581248 +++++++++++++++++13 1.84895604649984 +++++++++++++++++14 1.829164837199872 +++++++++++++++++15 1.8133318697598977 +++++++++++++++++16 1.8006654958079182 +++++++++++++++++17 1.7905323966463347 +++++++++++++++++18 1.7824259173170678 +++++++++++++++++19 1.7759407338536541 +++++++++++++++++20 1.7707525870829233 +++++++++++++++++21 1.7666020696663387 +++++++++++++++++22 1.763281655733071 +++++++++++++++++23 1.760625324586457 +++++++++++++++++24 1.7585002596691655 +++++++++++++++++25 1.7568002077353324 +++++++++++++++++26 1.755440166188266 +++++++++++++++++27 1.7543521329506127 +++++++++++++++++28 1.7534817063604902 +++++++++++++++++29 1.7527853650883922 +++++++++++++++++30 1.7522282920707137 +++++++++++++++++31 1.751782633656571 +++++++++++++++++32 1.7514261069252568 +++++++++++++++++33 1.7511408855402055 +++++++++++++++++34 1.7509127084321645 +++++++++++++++++35 1.7507301667457316 +++++++++++++++++36 1.7505841333965853 +++++++++++++++++37 1.7504673067172682 +++++++++++++++++38 1.7503738453738147 ***************** 1.7503738453738147 import numpy as np import matplotlib.pyplot as pltf2 = lambda x : -(x - 3)**2 + 2.5*x -7.5# 梯度提升 導(dǎo)數(shù)函數(shù) result = [] d2 = lambda x : -2*(x - 3) + 2.5 learning_rate = 0.1 # max_value瞎蒙的值,方法,最快的速度找到最優(yōu)解(梯度下降) # 梯度消失,梯度爆炸(因?yàn)閷W(xué)習(xí)率太大) max_value = np.random.randint(2,8,size = 1)[0] # max_value = 1000result.append(max_value)print('-------------------',max_value) # 記錄數(shù)據(jù)更新了,原來的值,上一步的值,退出條件 max_value_last = max_value + 0.001 # tollerence容忍度,誤差,在萬分之一,任務(wù)結(jié)束 # precision精確度,精度達(dá)到了萬分之一,任務(wù)結(jié)束 precision = 0.0001 count = 0 while True:if np.abs(max_value - max_value_last) < precision:break # 梯度上升max_value_last = max_value # 更新值:梯度上升max_value = max_value + learning_rate*d2(max_value)result.append(max_value)count +=1print('+++++++++++++++++++++%d'%(count),max_value) print('**********************',max_value)# 觀察一下變化 plt.figure(figsize=(12,9)) x = np.linspace(4,8,100) y = f2(x) plt.plot(x,y) result = np.asarray(result) plt.plot(result,f2(result),'*')結(jié)果:
------------------- 5 +++++++++++++++++++++1 4.85 +++++++++++++++++++++2 4.7299999999999995 +++++++++++++++++++++3 4.6339999999999995 +++++++++++++++++++++4 4.5572 +++++++++++++++++++++5 4.49576 +++++++++++++++++++++6 4.4466079999999994 +++++++++++++++++++++7 4.407286399999999 +++++++++++++++++++++8 4.37582912 +++++++++++++++++++++9 4.350663296 +++++++++++++++++++++10 4.3305306368 +++++++++++++++++++++11 4.31442450944 +++++++++++++++++++++12 4.301539607552 +++++++++++++++++++++13 4.2912316860416 +++++++++++++++++++++14 4.2829853488332805 +++++++++++++++++++++15 4.276388279066625 +++++++++++++++++++++16 4.2711106232533 +++++++++++++++++++++17 4.26688849860264 +++++++++++++++++++++18 4.263510798882112 +++++++++++++++++++++19 4.260808639105689 +++++++++++++++++++++20 4.2586469112845515 +++++++++++++++++++++21 4.256917529027641 +++++++++++++++++++++22 4.255534023222113 +++++++++++++++++++++23 4.254427218577691 +++++++++++++++++++++24 4.2535417748621525 +++++++++++++++++++++25 4.252833419889722 +++++++++++++++++++++26 4.252266735911777 +++++++++++++++++++++27 4.251813388729422 +++++++++++++++++++++28 4.251450710983538 +++++++++++++++++++++29 4.251160568786831 +++++++++++++++++++++30 4.250928455029465 +++++++++++++++++++++31 4.250742764023572 +++++++++++++++++++++32 4.250594211218858 +++++++++++++++++++++33 4.250475368975087 +++++++++++++++++++++34 4.2503802951800695 ********************** 4.2503802951800695總結(jié)
以上是生活随笔為你收集整理的机器学习Sklearn实战——极限森林、梯度提升树算法的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 机器学习Sklearn实战——决策树算法
- 下一篇: 机器学习Sklearn实战——梯度提升树