K-Means聚类算法Java实现
生活随笔
收集整理的這篇文章主要介紹了
K-Means聚类算法Java实现
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
K-Means聚類算法
目的:將數(shù)據(jù)分為K組
基本思路
初始聚類中心的選擇會對最終求出的分類結(jié)果有一定的影響,所以初始點的選取盡量離散,間隔大
K-Means算法對大數(shù)據(jù)挖掘有很高的效率,它的時間復(fù)雜度為O(NKT),其中N表示數(shù)據(jù)集中的對象個數(shù),K表示聚類個數(shù),T表示迭代次數(shù)
例題
? 將以下數(shù)據(jù)分為三類 consumption.csv
Id,R,F,M 1,27,6,232.61 2,3,5,1507.11 3,4,16,817.62 4,3,11,232.81 5,14,7,1913.05 6,19,6,220.07 7,5,2,615.83 8,26,2,1059.66 9,21,9,304.82 10,2,21,1227.96 11,15,2,521.02 12,26,3,438.22 13,17,11,1744.55 14,30,16,1957.44 15,5,7,1713.79 16,4,21,1768.11 17,93,2,1016.34 18,16,3,950.36 19,4,1,754.93 20,27,1,294.23 21,5,1,195.3 22,17,3,1845.34 23,12,13,1434.29 24,21,3,275.85 25,18,5,449.76 26,30,21,1628.68 27,4,2,1795.41 28,7,12,1786.24 29,18,1,679.44 30,60,7,5318.81 31,4,22,873.68 32,16,1,654.69 33,3,2,230.37 34,14,11,1165.68 35,13,21,1276.31 36,10,16,334.21 37,5,5,759.19 38,1,1,1383.39 39,24,8,3280.77 40,19,4,154.65 41,9,1,501.38 42,1,24,1721.93 43,14,1,107.18 44,10,1,973.36 45,10,17,764.55 46,7,6,1251.4 47,23,11,923.28 48,15,1,1011.18 49,1,15,1847.61 50,3,21,1669.46 51,10,3,1758.05 52,30,8,1865.99 53,28,8,1791.44 54,4,15,874.6 55,24,5,557.17 56,16,2,1025.35 57,7,2,1261.47 58,66,4,2920.81 59,4,2,1266.02 60,21,11,626.37 61,6,4,1105.63 62,26,21,1465.58 63,8,21,630.74 64,26,2,1546.45 65,14,11,1577.91 66,17,6,170.16 67,20,5,1558.75 68,5,5,1272.06 69,26,3,111.02 70,15,7,1578.37 71,26,24,720.26 72,25,16,873.22 73,4,7,935.19 74,23,11,723.67 75,15,9,1833.01 76,6,3,681.26 77,78,11,1461.63 78,15,17,560.57 79,9,18,1761.19 80,8,7,1707.25 81,28,2,227.14 82,22,3,223.57 83,8,6,940.46 84,23,6,256.3 85,5,1,312.44 86,15,14,929.52 87,27,15,1296.66 88,22,11,591.62 89,2,2,755.72 90,18,17,1424.07 91,61,8,940.93 92,3,7,414.24 93,1,14,576.56 94,12,22,1037.14 95,26,5,1200.17 96,1,3,1727.36 97,13,16,503.71 98,19,7,703.36 99,12,17,1583.05 100,3,18,602.9 101,5,1,798.41 102,25,7,1202.09 103,85,4,1605.36 104,28,21,1222.34 105,25,19,593.17 106,8,6,94.75 107,14,1,89.7 108,21,15,1061.56 109,29,15,978.85 110,14,3,155.9 111,20,5,938.15 112,3,24,1477.97 113,10,6,1976.23 114,8,5,181.17 115,17,4,499.65 116,49,1,76.22 117,13,11,267.1 118,23,1,137.62 119,65,5,1383.47 120,20,22,1311.2 121,22,13,496.61 122,21,6,1921.8 123,14,11,304.1 124,26,1,468.09 125,27,9,432.67 126,30,1,368.35 127,11,4,759.69 128,26,3,1110.81 129,28,1,53 130,39,11,1314.21 131,11,6,1895.95 132,23,1,417.23 133,3,2,679.58 134,5,1,533.97 135,24,8,1134.64 136,25,6,825.39 137,10,6,165.39 138,29,9,1234.64 139,80,11,1829.32 140,23,1,89 141,4,2,1557.88 142,3,8,1328.01 143,15,7,304.65 144,17,23,1505.55 145,16,7,711.1 146,16,1,539.76 147,5,1,65.83 148,16,3,776.21 149,22,18,1820.61 150,19,4,1997 151,4,22,1846.69 152,23,7,1252.41 153,7,13,987.17 154,3,6,1130.03 155,18,1,148.32 156,28,1,135.57 157,6,2,1641.79 158,7,2,242.83 159,21,8,1803.02 160,12,12,1557.95 161,25,4,1494.81 162,26,13,1280.06 163,28,1,160 164,22,9,440.12 165,14,1,746.95 166,12,2,351.09 167,6,2,556.91 168,7,3,957.83 169,16,16,1212.37 170,11,2,946.65 171,16,13,1442.68 172,5,12,1612.7 173,0,21,1281.68 174,9,13,1928.8 175,24,7,335.35 176,3,8,1589.35 177,20,11,797.72 178,17,1,793.47 179,13,16,569.47 180,10,3,149.5 181,17,21,515.38 182,8,4,187.76 183,20,7,1441.83 184,27,1,121.61 185,25,11,934.58 186,16,15,591.06 187,15,4,951.31 188,12,11,914 189,3,22,1058 190,9,2,1111.51 191,17,9,458.52 192,27,18,927.59 193,73,1,1370.25 194,17,1,946.53 195,10,1,1474.41 196,16,3,1661.03 197,0,9,1465.18 198,17,3,1813.45 199,5,7,772.54 200,4,1,172.82 201,14,4,552.37 202,12,8,946.28 203,26,2,651.99 204,6,9,857.79 205,7,4,1016.55 206,5,6,1766.44 207,25,3,908.53 208,28,2,403.75 209,25,4,1270.75 210,13,3,1157.92 211,13,1,497.09 212,2,1,216.78 213,23,16,1454.58 214,2,17,1027.58 215,24,12,722.09 216,15,7,282.19 217,11,4,106.96 218,18,1,999.75 219,24,14,1139.33 220,24,5,836.72 221,3,2,1678.54 222,3,17,1337.34 223,1,4,1335.77 224,11,2,810.2 225,29,11,943.9 226,51,12,5135.77 227,6,9,984.12 228,6,5,1413.55 229,1,6,381.95 230,6,14,788.22 231,29,1,80.8 232,21,1,611.13 233,24,4,1766.35 234,0,2,1516.76 235,9,6,1925.2 236,17,1,344.23 237,49,1,204.1 238,5,2,1257.59 239,7,3,1095.09 240,2,1,123.76 241,3,2,696.82 242,26,2,1487.35 243,19,3,1278.43 244,28,14,627.97 245,12,1,95 246,14,4,1827.01 247,10,6,754.05 248,19,2,922.93 249,12,12,257.4 250,1,14,676.34 251,3,19,984.32 252,27,32,1914.06 253,13,4,1953.81 254,1,4,768.02 255,61,13,1379.86 256,42,1,1054.24 257,21,11,298.34 258,17,5,841.04 259,8,9,1757.87 260,22,11,1010.7算法分析
? 采用map存儲數(shù)據(jù),key存id,value使用List存儲R、F、M,中心點可以使用三個List存儲,最大迭代次數(shù)m由命令行輸入
算法代碼實現(xiàn)
import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.*;public class Test {static String filePath = System.getProperty("user.dir")+"\\src\\sources\\consumption.csv";static Map<Integer,List<Float>> map = new HashMap<>();//總數(shù)據(jù)static Map<Integer,List<Float>> map1 = new HashMap<>();//第一類數(shù)據(jù)static Map<Integer,List<Float>> map2 = new HashMap<>();//第二類數(shù)據(jù)static Map<Integer,List<Float>> map3 = new HashMap<>();//第三類數(shù)據(jù)static List<Float> list1 = new ArrayList();//第一個中心static List<Float> list2 = new ArrayList();//第二個中心static List<Float> list3 = new ArrayList();//第三個中心//判斷是否是數(shù)字public static boolean isNumeric(String str){for (int i = str.length();--i>=0;){if (!Character.isDigit(str.charAt(i))){return false;}}return true;}//讀取數(shù)據(jù),存入mappublic static void ReadFile(){BufferedReader br = null;String line = "";String csvSplitBy = ",";try {br = new BufferedReader(new FileReader(filePath));while ((line = br.readLine()) != null) {// 分割點為List<String> post = Arrays.asList(line.split(csvSplitBy));if (isNumeric(post.get(0))) {int x = Integer.parseInt(post.get(0));List<Float> list = new ArrayList<>();list.add(Float.valueOf(post.get(1)));list.add(Float.valueOf(post.get(2)));list.add(Float.valueOf(post.get(3)));map.put(x,list);}}} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {if (br != null) {try {br.close();} catch (IOException e) {e.printStackTrace();}}}}//第一次,產(chǎn)生三個隨機點public static void RandPoint(){Random r = new Random();list1 = map.get((r.nextInt(942)));list2 = map.get((r.nextInt(942)));list3 = map.get((r.nextInt(942)));System.out.print(list1.toString());System.out.print(list2.toString());System.out.println(list3.toString());}//給定一個map的value,判斷他是哪個類,給數(shù)據(jù)分類public static void IsKM(List<Float> list,int index){float x1 = Math.abs(list1.get(0)-list.get(0))+Math.abs(list1.get(1)-list.get(1))+Math.abs(list1.get(2)-list.get(2));float x2 = Math.abs(list2.get(0)-list.get(0))+Math.abs(list2.get(1)-list.get(1))+Math.abs(list2.get(2)-list.get(2));float x3 = Math.abs(list3.get(0)-list.get(0))+Math.abs(list3.get(1)-list.get(1))+Math.abs(list3.get(2)-list.get(2));float min = (x1<x2)?x1:x2;min = (min<x3)?min:x3;if (min == x1){map1.put(index,list);//System.out.println(index + "屬于第1類,中心點為"+list1.toString());}if(min == x2){map2.put(index,list);//System.out.println(index + "屬于第2類,中心點為"+list2.toString());}if(min == x3){map3.put(index,list);//System.out.println(index + "屬于第3類,中心點為"+list3.toString());}}//計算map中數(shù)據(jù)與中心點的距離public static void KMeans(int m) {for (int i = 0;i<m;i++){map1.clear();map2.clear();map3.clear();for (Map.Entry<Integer,List<Float>> entry : map.entrySet()) {IsKM(entry.getValue(),entry.getKey());}NewPoint();System.out.print(list1.toString());System.out.print(list2.toString());System.out.println(list3.toString());}System.out.println("第一個中心點"+map1);System.out.println("第二個中心點"+map2);System.out.println("第三個中心點"+map3);}//計算三個類的新中心public static void NewPoint(){//重置中心點list1.clear();list2.clear();list3.clear();//一列數(shù)據(jù)的和float sum1 = 0;float sum2 = 0;float sum3 = 0;for (Map.Entry<Integer,List<Float>> entry : map1.entrySet()) {//System.out.println(entry.getValue());//map最后一個value為空,要進(jìn)行一波判斷if (entry.getValue().size()>0) {sum1 = sum1 + entry.getValue().get(0);sum2 = sum2 + entry.getValue().get(1);sum3 = sum3 + entry.getValue().get(2);}}list1.add(sum1/map1.size());list1.add(sum2/map1.size());list1.add(sum3/map1.size());sum1=0;sum2=0;sum3=0;for (Map.Entry<Integer,List<Float>> entry : map2.entrySet()) {if (entry.getValue().size()>0){sum1 = sum1 + entry.getValue().get(0);sum2 = sum2 + entry.getValue().get(1);sum3 = sum3 + entry.getValue().get(2);}}list2.add(sum1/map2.size());list2.add(sum2/map2.size());list2.add(sum3/map2.size());sum1=0;sum2=0;sum3=0;for (Map.Entry<Integer,List<Float>> entry : map3.entrySet()) {if (entry.getValue().size()>0){sum1 = sum1 + entry.getValue().get(0);sum2 = sum2 + entry.getValue().get(1);sum3 = sum3 + entry.getValue().get(2);}}list3.add(sum1/map3.size());list3.add(sum2/map3.size());list3.add(sum3/map3.size());}public static void main(String[] args) {System.out.print("請輸入迭代次數(shù):");Scanner input = new Scanner(System.in);int m = input.nextInt();//讀取數(shù)據(jù)ReadFile();//生成第一次的中心點System.out.print("第一次隨機生成中心點:");RandPoint();//分類,求中心,再分類KMeans(m);} }總結(jié)
以上是生活随笔為你收集整理的K-Means聚类算法Java实现的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: NASA 机智号火星直升机完成第 64
- 下一篇: 跨域问题的简单解决办法