机器学习知识点(十八)密度聚类DBSCAN算法Java实现
生活随笔
收集整理的這篇文章主要介紹了
机器学习知识点(十八)密度聚类DBSCAN算法Java实现
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
為更好理解聚類算法,從網(wǎng)上找現(xiàn)成代碼來理解,發(fā)現(xiàn)了一個Java自身的ML庫,鏈接:http://java-ml.sourceforge.net/
有興趣可以下載來看看源碼,理解基礎ML算法。對于DBSCAN算法,從網(wǎng)上找到一個Java實現(xiàn)的,主要是用來理解其算法過程。參考代碼如下:
1、Point類,數(shù)據(jù)對象
package sk.cluster;public class Point {private double x;//坐標x軸private double y;//坐標y軸private boolean isVisit;//是佛訪問標記private int cluster;//所屬簇類private boolean isNoised;//是否是噪音數(shù)據(jù)public Point(double x,double y) {this.x = x;this.y = y;this.isVisit = false;this.cluster = 0;this.isNoised = false;}public double getDistance(Point point) {//計算兩點間距離return Math.sqrt((x-point.x)*(x-point.x)+(y-point.y)*(y-point.y));}public void setX(double x) {this.x = x;}public double getX() {return x;}public void setY(double y) {this.y = y;}public double getY() {return y;}public void setVisit(boolean isVisit) {this.isVisit = isVisit;}public boolean getVisit() {return isVisit;}public int getCluster() {return cluster;}public void setNoised(boolean isNoised) {this.isNoised = isNoised;}public void setCluster(int cluster) {this.cluster = cluster;}public boolean getNoised() {return this.isNoised;}@Overridepublic String toString() {return x+" "+y+" "+cluster+" "+(isNoised?1:0);}}2、Data類,數(shù)據(jù)集 package sk.cluster;import java.io.*; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Random;public class Data {private static DecimalFormat df=(DecimalFormat) NumberFormat.getInstance();//隨機生成數(shù)據(jù)public static ArrayList<Point> generateSinData(int size) {ArrayList<Point> points = new ArrayList<Point>(size);Random rd = new Random(size);for (int i=0;i<size/2;i++) {double x = format(Math.PI / (size / 2) * (i + 1));double y = format(Math.sin(x)) ;points.add(new Point(x,y));}for (int i=0;i<size/2;i++) {double x = format(1.5 + Math.PI / (size/2) * (i+1));double y = format(Math.cos(x));points.add(new Point(x,y));}return points;}//輸入指定數(shù)據(jù)public static ArrayList<Point> generateSpecialData() {ArrayList<Point> points = new ArrayList<Point>();points.add(new Point(2,2));points.add(new Point(3,1));points.add(new Point(3,4));points.add(new Point(3,14));points.add(new Point(5,3));points.add(new Point(8,3));points.add(new Point(8,6));points.add(new Point(9,8));points.add(new Point(10,4));points.add(new Point(10,7));points.add(new Point(10,10));points.add(new Point(10,14));points.add(new Point(11,13));points.add(new Point(12,7));points.add(new Point(12,15));points.add(new Point(14,7));points.add(new Point(14,9));points.add(new Point(14,15));points.add(new Point(15,8));return points;}//獲取文件數(shù)據(jù)public static ArrayList<Point> getData(String sourcePath) {ArrayList<Point> points = new ArrayList<Point>();File fileIn = new File(sourcePath);try {BufferedReader br = new BufferedReader(new FileReader(fileIn));String line = null;line = br.readLine();while (line != null) {Double x = Double.parseDouble(line.split(",")[3]);Double y = Double.parseDouble(line.split(",")[4]);points.add(new Point(x, y));line = br.readLine();}br.close();} catch (IOException e) {e.printStackTrace();}return points;}//輸出到文件public static void writeData(ArrayList<Point> points,String path) {try {BufferedWriter bw = new BufferedWriter(new FileWriter(path));for (Point point:points) {bw.write(point.toString()+"\n");}bw.close();} catch (IOException e) {e.printStackTrace();}}private static double format(double x) {return Double.valueOf(df.format(x));}}
3、DBSCAN類,實現(xiàn)DBSCAN算法 package sk.cluster;import java.util.ArrayList;public class DBScan {private double radius;private int minPts;public DBScan(double radius,int minPts) {this.radius = radius;//領域半徑參數(shù)this.minPts = minPts;//領域密度值,該領域內(nèi)有多少個樣本}public void process(ArrayList<Point> points) {int size = points.size();int idx = 0;int cluster = 1;while (idx<size) {//樣本總數(shù)Point p = points.get(idx++);//choose an unvisited pointif (!p.getVisit()) {p.setVisit(true);//set visitedArrayList<Point> adjacentPoints = getAdjacentPoints(p, points);//計算兩點距離,看是否在領域內(nèi)//set the point which adjacent points less than minPts noisedif (adjacentPoints != null && adjacentPoints.size() < minPts) {p.setNoised(true);//噪音數(shù)據(jù)} else {//建立該點作為領域核心對象p.setCluster(cluster);for (int i = 0; i < adjacentPoints.size(); i++) {Point adjacentPoint = adjacentPoints.get(i);//領域內(nèi)的樣本//only check unvisited point, cause only unvisited have the chance to add new adjacent pointsif (!adjacentPoint.getVisit()) {adjacentPoint.setVisit(true);ArrayList<Point> adjacentAdjacentPoints = getAdjacentPoints(adjacentPoint, points);//add point which adjacent points not less than minPts noisedif (adjacentAdjacentPoints != null && adjacentAdjacentPoints.size() >= minPts) {//adjacentPoints.addAll(adjacentAdjacentPoints);for (Point pp : adjacentAdjacentPoints){if (!adjacentPoints.contains(pp)){adjacentPoints.add(pp);}}}}//add point which doest not belong to any clusterif (adjacentPoint.getCluster() == 0) {adjacentPoint.setCluster(cluster);//set point which marked noised before non-noisedif (adjacentPoint.getNoised()) {adjacentPoint.setNoised(false);}}}cluster++;}}if (idx%1000==0) {System.out.println(idx);}}}private ArrayList<Point> getAdjacentPoints(Point centerPoint,ArrayList<Point> points) {ArrayList<Point> adjacentPoints = new ArrayList<Point>();for (Point p:points) {//include centerPoint itselfdouble distance = centerPoint.getDistance(p);if (distance<=radius) {adjacentPoints.add(p);}}return adjacentPoints;}} /* ##DBScan算法流程圖算法:DBScan,基于密度的聚類算法 輸入:D:一個包含n個數(shù)據(jù)的數(shù)據(jù)集r:半徑參數(shù)minPts:領域密度閾值 輸出:基于密度的聚類集合 標記D中所有的點為unvisted for each p in Dif p.visit = unvisted找出與點p距離不大于r的所有點集合NIf N.size() < minPts標記點p為噪聲點Elsefor each p' in NIf p'.visit == unvisted找出與點p距離不大于r的所有點集合N'If N'.size()>=minPts將集合N'加入集合N中去End ifElseIf p'未被聚到某個簇將p'聚到當前簇If p'被標記為噪聲點將p'取消標記為噪聲點End IfEnd IfEnd IfEnd forEnd ifEnd if End for */
4、client測試類 package sk.cluster;import java.util.ArrayList;public class Client {public static void main(String[] args) {ArrayList<Point> points = Data.generateSinData(200);//隨機生成200個pointDBScan dbScan = new DBScan(0.6,4);//r:領域半徑參數(shù) ,minPts領域密度閾值,密度值//ArrayList<Point> points = Data.generateSpecialData();//ArrayList<Point> points = Data.getData("D:\\tmp\\testData.txt");//DBScan dbScan = new DBScan(0.1,1000);dbScan.process(points);for (Point p:points) {System.out.println(p);}Data.writeData(points,"D:\\tmp\\data.txt");}}
總結
以上是生活随笔為你收集整理的机器学习知识点(十八)密度聚类DBSCAN算法Java实现的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 机器学习笔记(九)聚类
- 下一篇: Java运行时动态加载类之URLClas