机器学习知识点(十六)集成学习AdaBoost算法Java实现
生活随笔
收集整理的這篇文章主要介紹了
机器学习知识点(十六)集成学习AdaBoost算法Java实现
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
理解http://blog.csdn.net/fjssharpsword/article/details/61913092中AdaBoost算法,從網(wǎng)上找了一套簡單的代碼加以理解。
1、基分類器,實(shí)現(xiàn)一個(gè)簡單的分類
1)對象類
package sk.adaboost;public class Instance {public double[] dim; //各個(gè)維度值public int label; //類別標(biāo)號public Instance(double[] dim, int label) {this.dim = dim;this.label = label;} }2)接口類 package sk.adaboost;public abstract class Classifier {public double errorRate;public int errorNumber;public abstract int classify(Instance instance) ;}
4)adaboost集成學(xué)習(xí)算法 package sk.adaboost;import java.util.ArrayList; import java.util.List;public class Adaboost {Instance[] instances;List<Classifier> classifierList = null; //各個(gè)弱分類器List<Double> alphaList = null; //每個(gè)弱分類器的權(quán)重public Adaboost(Instance[] instances) {this.instances = instances;}public List<String> adaboost(int T) {//T個(gè)基學(xué)習(xí)器int len = this.instances.length;double[] W = new double[len]; //初始權(quán)重for(int i = 0; i < len; i ++) {W[i] = 1.0 / len;}classifierList = new ArrayList<Classifier>();alphaList = new ArrayList<Double>();List<String> iHP=new ArrayList<String>();for(int t = 0; t < T; t++) {//T輪Classifier cf = getMinErrorRateClassifier(W);classifierList.add(cf);double errorRate = cf.errorRate;//計(jì)算弱分類器的權(quán)重double alpha = 0.5 * Math.log((1 - errorRate) / errorRate);alphaList.add(alpha);//更新樣例的權(quán)重double z = 0;for(int i = 0; i < W.length; i++) {W[i] = W[i] * Math.exp(-alpha * instances[i].label * cf.classify(instances[i]));z += W[i];}for(int i = 0; i < W.length; i++) {//規(guī)范化因子W[i] /= z;}iHP.add(String.valueOf(getErrorCount()));//預(yù)測結(jié)果插入}return iHP;}private int getErrorCount() {int count = 0;for(Instance instance : instances) {if(predict(instance) != instance.label)count ++;}return count;}/*** 預(yù)測* @param instance* @return*/public int predict(Instance instance) {double p = 0;for(int i = 0; i < classifierList.size(); i++) {p += classifierList.get(i).classify(instance) * alphaList.get(i);}if(p > 0) return 1;return -1;}/*** 得到錯(cuò)誤率最低的分類器* @param W* @return*/private Classifier getMinErrorRateClassifier(double[] W) {double errorRate = Double.MAX_VALUE;SimpleClassifier minErrorRateClassifier = null;int dimLength = instances[0].dim.length;for(int i = 0; i < dimLength; i++) {SimpleClassifier sc = new SimpleClassifier();sc.train(instances, W, i);//基學(xué)習(xí)器訓(xùn)訓(xùn)練if(errorRate > sc.errorRate){errorRate = sc.errorRate;minErrorRateClassifier = sc;}}return minErrorRateClassifier;}}
這里面幾個(gè)重要步驟要清晰:public List<String> adaboost(int T)算法中,先初始化權(quán)重然后開始T輪的基學(xué)習(xí)器算法執(zhí)行和權(quán)重更新,private Classifier getMinErrorRateClassifier(double[] W)開展基學(xué)習(xí)器訓(xùn)練并返回錯(cuò)誤率,返回的錯(cuò)誤計(jì)算權(quán)重并更新分布。
5)測試adaboost算法,采用投票法的結(jié)合策略輸出集成學(xué)習(xí)預(yù)測結(jié)果
package sk.adaboost;import java.util.List;public class AdaboostTest {public static void main(String[] args) {//模擬數(shù)據(jù)double[] ins1 = {0,3};double[] ins2 = {1,3};double[] ins3 = {2,3};double[] ins4 = {3,1};double[] ins5 = {4,1};double[] ins6 = {5,1};double[] ins7 = {6,3};double[] ins8 = {7,3};double[] ins9 = {8,0};double[] ins10 = {9,1};Instance instance1 = new Instance(ins1, 1);Instance instance2 = new Instance(ins2, 1);Instance instance3 = new Instance(ins3, 1);Instance instance4 = new Instance(ins4, -1);Instance instance5 = new Instance(ins5, -1);Instance instance6 = new Instance(ins6, -1);Instance instance7 = new Instance(ins7, 1);Instance instance8 = new Instance(ins8, 1);Instance instance9 = new Instance(ins9, 1);Instance instance10 = new Instance(ins10, -1);Instance[] instances = {instance1, instance2, instance3, instance4, instance5, instance6, instance7, instance8, instance9, instance10 };//集成學(xué)習(xí),串行,基學(xué)習(xí)器之間存在強(qiáng)依賴關(guān)系A(chǔ)daboost ab = new Adaboost(instances);List<String> iHP=ab.adaboost(10);//輸出預(yù)測結(jié)果,根據(jù)多數(shù)頭投票法的結(jié)合策略int pcount=0,ncount=0;for(String hp:iHP){if (hp.equals("1")) pcount++;//預(yù)測為正例的數(shù)字if (hp.equals("0")) ncount++;//預(yù)測為正例的數(shù)字}if (pcount>=ncount) System.out.println("1");else System.out.println("0");} }6)助于理解算法本身,實(shí)際應(yīng)用中基學(xué)習(xí)器可以換成其他算法。
總結(jié)
以上是生活随笔為你收集整理的机器学习知识点(十六)集成学习AdaBoost算法Java实现的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 机器学习笔记(八)集成学习
- 下一篇: (转载)机器学习知识点(十七)Baggi