离线轻量级大数据平台Spark之MLib机器学习库SVM实例
生活随笔
收集整理的這篇文章主要介紹了
离线轻量级大数据平台Spark之MLib机器学习库SVM实例
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
支持向量機(jī),因其英文名為support vector machine,故一般簡稱SVM,通俗來講,它是一種二類分類模型,其基本模型定義為特征空間上的間隔最大的線性分類器,其學(xué)習(xí)策略便是間隔最大化,最終可轉(zhuǎn)化為一個(gè)凸二次規(guī)劃問題的求解。
http://www.dataguru.cn/thread-371987-1-1.html
參考該網(wǎng)站理解SVM基礎(chǔ)數(shù)學(xué)原理。
執(zhí)行結(jié)果和樣本集有關(guān)系。
http://www.dataguru.cn/thread-371987-1-1.html
參考該網(wǎng)站理解SVM基礎(chǔ)數(shù)學(xué)原理。
依據(jù)距離計(jì)算分類的思想,適用于各維。
具體代碼如下:
package sk.mlib;import org.apache.spark.SparkConf; import org.apache.spark.SparkContext;import scala.Tuple2; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.Function; import org.apache.spark.mllib.classification.SVMModel; import org.apache.spark.mllib.classification.SVMWithSGD; import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.mllib.util.MLUtils;public class SVMWithSGDDemo {public static void main(String[] args) {SparkConf conf = new SparkConf().setAppName("JavaSVMWithSGDExample");SparkContext sc = new SparkContext(conf);// $example on$String path = "/tmp/svmdata.txt";JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();// Split initial RDD into two... [60% training data, 40% testing data].JavaRDD<LabeledPoint> training = data.sample(false, 0.6, 11L);training.cache();JavaRDD<LabeledPoint> test = data.subtract(training);// Run training algorithm to build the model.int numIterations = 100;final SVMModel model = SVMWithSGD.train(training.rdd(), numIterations);// Clear the default threshold.model.clearThreshold();// Compute raw scores on the test set.JavaRDD<Tuple2<Object, Object>> scoreAndLabels = test.map(new Function<LabeledPoint, Tuple2<Object, Object>>() {public Tuple2<Object, Object> call(LabeledPoint p) {Double score = model.predict(p.features());return new Tuple2<Object, Object>(score, p.label());}});// Get evaluation metrics.BinaryClassificationMetrics metrics =new BinaryClassificationMetrics(JavaRDD.toRDD(scoreAndLabels));double auROC = metrics.areaUnderROC();System.out.println("Area under ROC = " + auROC);// Save and load modelmodel.save(sc, "/tmp/javaSVMWithSGDModel");SVMModel sameModel = SVMModel.load(sc, "/tmp/javaSVMWithSGDModel");//應(yīng)用模型分類System.out.println("Prediction of (-0.857554,0.555556,1,1,0.555556,0.333333,1,0.777778,0.333333,-1 ):"+sameModel.predict(Vectors.dense(-0.857554,0.555556,1,1,0.555556,0.333333,1,0.777778,0.333333,-1 )));sc.stop();} } /*執(zhí)行結(jié)果:Area under ROC = 0.9017094017094017Prediction of (-0.857554,0.555556,1,1,0.555556,0.333333,1,0.777778,0.333333,-1 ):3.238535993736797*/ 輸入的數(shù)據(jù)集:標(biāo)簽 特征向量1:特征向量值1?特征向量2:特征向量值2 ...?特征向量n:特征向量值n 0 1:-0.860107 2:-0.111111 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 0 1:-0.859671 2:-0.111111 3:-0.333333 4:-0.333333 5:-0.111111 6:0.333333 7:1 8:-0.555556 9:-0.777778 10:-1 0 1:-0.857807 2:-0.555556 3:-1 4:-1 5:-1 6:-0.777778 7:-0.777778 8:-0.555556 9:-1 10:-1 0 1:-0.85768 2:0.111111 3:0.555556 4:0.555556 5:-1 6:-0.555556 7:-0.333333 8:-0.555556 9:0.333333 10:-1 0 1:-0.857569 2:-0.333333 3:-1 4:-1 5:-0.555556 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.857554 2:0.555556 3:1 4:1 5:0.555556 6:0.333333 7:1 8:0.777778 9:0.333333 10:-1 0 1:-0.857408 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:1 8:-0.555556 9:-1 10:-1 0 1:-0.857339 2:-0.777778 3:-1 4:-0.777778 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.855171 2:-0.777778 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-1 9:-1 10:-0.111111 0 1:-0.855171 2:-0.333333 3:-0.777778 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.854841 2:-1 3:-1 4:-1 5:-1 6:-1 7:-1 8:-0.555556 9:-1 10:-1 0 1:-0.854709 2:-0.777778 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 1 1:-0.853868 2:-0.111111 3:-0.555556 4:-0.555556 5:-0.555556 6:-0.777778 7:-0.555556 8:-0.333333 9:-0.333333 10:-1 0 1:-0.85354 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-0.555556 8:-0.555556 9:-1 10:-1 1 1:-0.853454 2:0.555556 3:0.333333 4:-0.111111 5:1 6:0.333333 7:0.777778 8:-0.111111 9:-0.111111 10:-0.333333 1 1:-0.852997 2:0.333333 3:-0.333333 4:0.111111 5:-0.333333 6:0.111111 7:-1 8:-0.333333 9:-0.555556 10:-1 0 1:-0.852842 2:-0.333333 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 1 1:-0.852671 2:-0.333333 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 0 1:-0.852543 2:1 3:0.333333 4:0.333333 5:0.111111 6:-0.333333 7:1 8:-0.333333 9:-1 10:-0.777778 0 1:-0.852536 2:0.111111 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.851958 2:0.333333 3:-0.555556 4:-0.777778 5:1 6:-0.111111 7:1 8:-0.111111 9:-0.333333 10:-0.333333 1 1:-0.851957 2:1 3:-0.111111 4:-0.111111 5:-0.555556 6:0.111111 7:0.333333 8:0.333333 9:1 10:-1 0 1:-0.85163 2:-0.555556 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.851217 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.850295 2:-0.111111 3:-0.777778 4:-0.555556 5:-0.333333 6:-0.777778 7:0.333333 8:-0.555556 9:0.111111 10:-1 0 1:-0.850198 2:-0.555556 3:-0.777778 4:-1 5:-1 6:-1 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.850107 2:-0.111111 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.850038 2:-0.777778 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.849517 2:-1 3:-1 4:-0.555556 5:-1 6:-0.777778 7:-1 8:-1 9:-1 10:-1 0 1:-0.849517 2:-0.555556 3:-1 4:-1 5:-1 6:-1 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.849393 2:-0.777778 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.849331 2:1 3:0.333333 4:0.333333 5:-0.555556 6:0.555556 7:-0.111111 8:0.333333 9:-0.333333 10:-0.555556 0 1:-0.848968 2:-0.777778 3:-1 4:-1 5:-0.777778 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 0 1:-0.848891 2:-0.555556 3:-1 4:-0.777778 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.848267 2:-0.777778 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 1 1:-0.848135 2:1 3:1 4:1 5:0.555556 6:0.111111 7:-1 8:0.555556 9:0.777778 10:-1 0 1:-0.847895 2:0.111111 3:-0.777778 4:-1 5:-1 6:-1 7:-1 8:0.333333 9:-1 10:-1 1 1:-0.847478 2:-0.111111 3:-0.333333 4:-0.333333 5:0.777778 6:-0.777778 7:1 8:-0.111111 9:0.111111 10:-1 1 1:-0.846481 2:-0.777778 3:-0.111111 4:-0.555556 5:-0.555556 6:0.111111 7:0.333333 8:0.333333 9:-0.111111 10:-1 1 1:-0.845249 2:1 3:-0.333333 4:-0.555556 5:-1 6:-0.555556 7:-0.555556 8:0.111111 9:-0.111111 10:-0.777778 1 1:-0.845097 2:0.111111 3:1 4:1 5:-0.777778 6:0.555556 7:1 8:0.333333 9:-0.555556 10:-0.555556 1 1:-0.844791 2:-0.111111 3:0.111111 4:-0.111111 5:0.111111 6:1 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.844637 2:1 3:1 4:1 5:-0.333333 6:0.555556 7:-1 8:0.555556 9:1 10:-1 0 1:-0.84462 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-0.777778 1 1:-0.84439 2:-0.555556 3:0.333333 4:0.333333 5:-0.333333 6:-0.333333 7:0.777778 8:-0.333333 9:0.555556 10:-1 0 1:-0.844351 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.844265 2:-0.333333 3:-1 4:-1 5:-0.555556 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.844156 2:0.333333 3:0.555556 4:0.333333 5:-0.777778 6:-0.333333 7:0.555556 8:-0.555556 9:0.555556 10:-0.777778 1 1:-0.843926 2:0.777778 3:-0.111111 4:0.555556 5:-1 6:-0.777778 7:-0.555556 8:-0.777778 9:-1 10:-0.111111 1 1:-0.843914 2:-0.111111 3:-0.555556 4:-0.555556 5:-0.333333 6:-0.777778 7:-0.333333 8:-0.555556 9:-0.333333 10:-1 1 1:-0.843667 2:1 3:-0.555556 4:0.111111 5:-0.777778 6:-0.555556 7:-0.111111 8:-0.333333 9:1 10:-0.777778 1 1:-0.843607 2:-0.111111 3:-0.111111 4:-0.111111 5:0.555556 6:1 7:0.555556 8:0.333333 9:-0.555556 10:0.333333 1 1:-0.843604 2:1 3:-0.111111 4:-0.111111 5:0.111111 6:0.555556 7:0.555556 8:0.333333 9:-1 10:-1 1 1:-0.843496 2:1 3:0.111111 4:0.111111 5:-0.555556 6:-0.333333 7:-0.111111 8:-0.555556 9:0.111111 10:-1 1 1:-0.843352 2:0.555556 3:1 4:1 5:-1 6:-0.555556 7:0.111111 8:-0.555556 9:0.777778 10:-1 1 1:-0.843228 2:0.555556 3:-0.777778 4:-0.333333 5:-1 6:-0.111111 7:-1 8:-0.111111 9:-0.333333 10:-0.333333 1 1:-0.843162 2:-0.111111 3:-0.777778 4:-0.555556 5:-1 6:0.111111 7:1 8:-0.111111 9:-1 10:-1 1 1:-0.843099 2:0.777778 3:-0.111111 4:-0.111111 5:-0.777778 6:-0.777778 7:-0.777778 8:-0.111111 9:-1 10:-1 1 1:-0.842893 2:-0.111111 3:-0.555556 4:-0.111111 5:-0.111111 6:-0.555556 7:-0.555556 8:-0.333333 9:1 10:-1 0 1:-0.842892 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-0.777778 8:-0.777778 9:-1 10:-1 1 1:-0.842769 2:0.777778 3:1 4:1 5:-1 6:1 7:0.555556 8:-0.555556 9:-0.555556 10:-1 1 1:-0.842766 2:0.111111 3:-0.555556 4:-0.333333 5:-1 6:-0.111111 7:-0.777778 8:-0.555556 9:0.777778 10:-1 0 1:-0.842757 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 1 1:-0.842637 2:1 3:-0.333333 4:-0.777778 5:-1 6:-0.555556 7:-0.777778 8:-0.333333 9:-0.555556 10:1 0 1:-0.842614 2:-0.333333 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1執(zhí)行結(jié)果和樣本集有關(guān)系。
總結(jié)
以上是生活随笔為你收集整理的离线轻量级大数据平台Spark之MLib机器学习库SVM实例的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 离线轻量级大数据平台Spark之MLib
- 下一篇: 离线轻量级大数据平台Spark之MLib