當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

【web搜索】学习笔记-层次汇合聚类HAC算法

發布時間：2024/3/12 编程问答 33 豆豆

生活随笔收集整理的這篇文章主要介紹了【web搜索】学习笔记-层次汇合聚类HAC算法小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

層次匯合聚類（Hierarchical Agglomerative Clustering,HAC)
算法如下：

相關鏈接：
層次聚類算法原理及實現

學習筆記：
先附上上一段鏈接中的代碼：

// HAC_learning.cpp: 定義控制臺應用程序的入口點。 //#include "stdafx.h" #include <iostream> #include <vector> #include <algorithm> #include <fstream> using namespace std; const int iniClusNum = 12; const int stopNum = 3;class Point { public:double x;double y;int NumPBelong;Point(){x = 0;y = 0;NumPBelong = -1;}Point(double x1, double y1, int f = -1) :x(x1), y(y1), NumPBelong(f) {}const Point& operator=(const Point& p){x = p.x;y = p.y;NumPBelong = p.NumPBelong;return *this;} };class ManagerP { public:double getDistance(const Point& p1, const Point& p2){return sqrt(pow((p1.x - p2.x), 2) + pow((p1.y - p2.y), 2));}Point getMean(const Point& p1, const Point& p2){Point p;p.x = (p1.x + p2.x) / 2;p.y = (p1.y + p2.y) / 2;return p;} };class ManagerC { public:Point Cluster[iniClusNum];vector<int> ClusterLast[iniClusNum];bool isIndexClose[iniClusNum];bool isIndexClose2[iniClusNum];void initCluster()//use txt to init, import txt{ifstream myfile("point.txt");if (!myfile){cout << "cannot open file."; return;}Point p;int x, y;int i = 0;while (!myfile.eof()){myfile >> x >> y;p.x = x;p.y = y;Cluster[i] = p;i++;}myfile.close();}void initIndexClose(){for (int i = 0; i<iniClusNum; i++){isIndexClose[i] = false;isIndexClose2[i] = false;}}void print(){for (int i = 0; i<iniClusNum; i++){if (ClusterLast[i].empty()){continue;}cout << "cluster " << i + 1 << endl;vector<int>::iterator ite = ClusterLast[i].begin();for (; ite != ClusterLast[i].end(); ite++){cout << *ite << "\t";}cout << endl;}cout << endl;}void ClusterAlgo()//use minheap to realize, to optimize{int ClustNum = iniClusNum;int clus_index = 0;while (ClustNum>stopNum){double min = INT_MAX;int x = -1, y = -1;ManagerP mp;for (int i = 0; i<iniClusNum; i++){if (isIndexClose[i]){continue;}for (int j = i + 1; j<iniClusNum; j++){if (isIndexClose[j]){continue;}double new_d = mp.getDistance(Cluster[i], Cluster[j]);if (new_d < min){min = new_d;x = i; y = j;}}}if (x == -1 || y == -1){break;}Point p = mp.getMean(Cluster[x], Cluster[y]);//x<y store the resultif (Cluster[x].NumPBelong == -1 && Cluster[y].NumPBelong == -1){cout << "a0" << endl;ClusterLast[clus_index].push_back(x);//xchange to p, y closeClusterLast[clus_index].push_back(y);p.NumPBelong = clus_index;isIndexClose[y] = true;//y is closedCluster[x] = p;//new p is openisIndexClose[x] = false;isIndexClose2[x] = true;isIndexClose2[y] = true;clus_index++;}else if (Cluster[x].NumPBelong == -1 && Cluster[y].NumPBelong != -1)//already exists one cluster{cout << "a1" << endl;ClusterLast[Cluster[y].NumPBelong].push_back(x);isIndexClose[x] = true;//x is closedp.NumPBelong = Cluster[y].NumPBelong;Cluster[y] = p;//new p is openisIndexClose2[x] = true;}else if (Cluster[x].NumPBelong != -1 && Cluster[y].NumPBelong == -1){cout << "a2" << endl;ClusterLast[Cluster[x].NumPBelong].push_back(y);isIndexClose[y] = true;//y is closedp.NumPBelong = Cluster[x].NumPBelong;Cluster[x] = p;//new p is openisIndexClose2[y] = true;}else if (Cluster[x].NumPBelong != -1 && Cluster[y].NumPBelong != -1)//both are clusteroid{cout << "a3" << endl;vector<int>::iterator ite = ClusterLast[Cluster[y].NumPBelong].begin();//put y's node in xfor (; ite != ClusterLast[Cluster[y].NumPBelong].end(); ite++){ClusterLast[Cluster[x].NumPBelong].push_back(*ite);}ClusterLast[Cluster[y].NumPBelong].clear();isIndexClose[y] = true;//y is closedp.NumPBelong = Cluster[x].NumPBelong;Cluster[x] = p;//new p is open}ClustNum--;}int total_size = 0;for (int i = 0; i<stopNum; i++){total_size += ClusterLast[i].size();}if (total_size<iniClusNum){int j = 0;for (int i = 0; i<iniClusNum; i++){if (isIndexClose2[i] == false){ClusterLast[stopNum - 1 - j].push_back(i);j++;}}}}};int main() {ManagerC M;M.initCluster();M.initIndexClose();M.ClusterAlgo();M.print();system("pause"); }

其中，
point.txt:

4 10
4 8
7 10
6 8
3 4
2 2
5 2
9 3
10 5
11 4
12 3
12 6

運行結果：

草圖：

代碼理解：
定義類point，含有x,y（坐標）及NumPBelong（類別）
M.initCluster();輸入數據
M.initIndexClose();開啟所有點（設為false）
M.ClusterAlgo();HAC聚類
M.print();打印聚類結果

詳細理解M.ClusterAlgo()；

設定預期分類數stopNum，令實際分類數初始值為輸入數據數（每個數據自成一類）當（實際分類數>預期分類數stopNum）時 {clus_index=0；在x,y的isIndexClose值都不為true（關）的條件下（不為中心點/沒有改變過）找出距離最小的兩個點x,y并計算他們的距離min如果所有的點的isIndexClose值都為true（關）跳出循環//計算x,y的中點pa0:如果x，y都不是某類的中心點將x,y分入第clus_index類關閉y（isIndexClose=true）（y不再是某類中心點）把x,y的中點設為x，并令這個新的x為此類的中心點打開x（isIndexClose=true）（x為此類中心點）標記x,y（isIndexClose2= true;）a1：如果x不是某類中心點，y是某類中心點將x放入y所在的類別中關閉x把x,y的中點設為y標記xa2：如果x是某類中心點，y不是某類中心點將y放入x所在的類別中關閉y把x,y的中點設為x標記ya3：如果x，y都是中心點把y所在類別的所有點移入x所在類別中關閉y把x,y的中點設為x 實際分類數-1 } 計算所有已分類類別中數據的總數total_size 如果已分類數據總數total_size<原始輸入數據總數iniClusNum（有數據未分類） {對每一個數進行檢查，將所有未標記數（isIndexClose2 == false）平均分給各個類 }

總結

以上是生活随笔為你收集整理的【web搜索】学习笔记-层次汇合聚类HAC算法的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： Firefly-RK3288 Ubunt
下一篇： java enum枚举型使用