统计学习方法第十四章作业:聚类—层次聚类聚合/分裂算法、K_means聚类算法 代码实现
生活随笔
收集整理的這篇文章主要介紹了
统计学习方法第十四章作业:聚类—层次聚类聚合/分裂算法、K_means聚类算法 代码实现
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
層次聚類聚合/分裂算法
import numpy as np import copy import matplotlib.pyplot as pltclass Hierarchical_cluster:def __init__(self,k=None,p=2,dis_way='min',c_way='agg'):self.k = kself.p = pself.dis_way = dis_wayself.c_way = c_waydef cauclate_dis(self, x1, x2):return np.sum(abs(x1 - x2) ** self.p) ** (1 / self.p)def create_D_matrix(self):self.D = np.zeros((self.n,self.n))for i in range(self.n):for j in range(self.n):if i==j:self.D[i][j] = 0if i>j:self.D[i][j] = self.cauclate_dis(self.x[i],self.x[j])self.D[j][i] = self.cauclate_dis(self.x[i],self.x[j])def cauclate_cluster_dis(self,c1,c2):if self.dis_way == 'min':min_ = 999999999for i in c1:for j in c2:min_ = min(min_,self.D[i][j])return min_if self.dis_way == 'mean':mean1 = np.mean([self.x[i] for i in c1],axis=0)mean2 = np.mean([self.x[i] for i in c2],axis=0)return self.cauclate_dis(mean1,mean2)def split_C(self,C):C1 = []C2 = []max_ = 0for i in C:for j in C:if j >= i:if self.D[i][j] >= max_:max_ = self.D[i][j]max_i = imax_j = jC1.append(max_i)C2.append(max_j)for c in C:if c == max_i or c == max_j:continueelif self.D[max_i][c] >= self.D[max_j][c]:C2.append(c)else:C1.append(c)return C1, C2def fit(self,x):self.x = np.array(x)self.n = len(x)self.create_D_matrix()if self.c_way == 'agg':C_way = []start_c = []for i in range(self.n):start_c.append([i])C_way.append(start_c)iter = 0while len(C_way[iter]) > 1:num_c = len(C_way[iter])c_temp = []dis_temp = []for c1 in range(num_c):for c2 in range(num_c):if c1 > c2:c_temp.append([c1,c2])dis_temp.append(self.cauclate_cluster_dis(C_way[iter][c1],C_way[iter][c2]))min_dis = min(dis_temp)min_index = dis_temp.index(min_dis)c1 = c_temp[min_index][0]c2 = c_temp[min_index][1]new_c = copy.deepcopy(C_way[iter])new_c.append(new_c[c1]+new_c[c2])del new_c[c1]del new_c[c2]C_way.append(new_c)iter += 1self.result = C_wayif self.c_way == 'div':C_way = []new_c = list(range(self.n))C_way.append([new_c])iter = 0while len(C_way[iter]) < self.n:print(C_way)new_c = []for C in C_way[iter]:if len(C) == 1 :new_c.append(C)else:C1,C2 = self.split_C(C)new_c.append(C1)new_c.append(C2)C_way.append(new_c)iter+=1self.result = C_waydef predict(self):final = np.zeros(len(self.x))if self.c_way == 'div':result = self.result[self.k//2+1]if self.c_way == 'agg':result = self.result[-self.k]print(result)for i in range(len(result)):for j in result[i]:final[j] = ireturn list(final)def main():x = [[0,2],[0,0],[1,0],[5,0],[5,2],[1,5],[2,4],[3,5],[1,1],[1,4],[5,4],[5,3],[0,1],[-1,0],[1,1],[4,4],[3,4]]k = 3HC = Hierarchical_cluster(k,dis_way='mean',c_way='agg')HC.fit(x)result = HC.predict()print(result)color_list =['r','k','b','g']for i,c in zip(range(len(result)),color_list[:len(result)]):positive_ = np.array(x)[np.array(result) == i]plt.scatter([k[0] for k in positive_],[k[1] for k in positive_] , c=c)plt.show()if __name__ == '__main__':main()#-----result----------------- /usr/bin/python3 /Users/zhengyanzhao/PycharmProjects/tongjixuexi/shixian2/hierarchical_clustering.py [[3], [2, 1, 14, 8, 12, 0, 13], [16, 6, 7, 9, 5, 15, 10, 11, 4]] [1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0]K_means聚類算法
import numpy as np import matplotlib.pyplot as pltclass K_menas:def __init__(self,k=None,p=2):self.k = kself.p = pdef cauclate_dis(self, x1, x2):return np.sum(abs(x1 - x2) ** self.p) ** (1 / self.p)def fit(self,x):self.x = np.array(x)self.n = len(x)initial_c = np.random.choice(list(range(self.n)),self.k,replace=False)self.mean_list = [self.x[i] for i in initial_c]while True:self.c_list = [[] for _ in range(self.k)]self.x_list = [[] for _ in range(self.k)]for i in range(self.n):min_dis = 9999999for j in range(self.k):dis = self.cauclate_dis(self.x[i], self.mean_list[j])if min_dis > dis:min_dis = disc = jself.c_list[c].append(i)self.x_list[c].append(self.x[i])mean_list_ = np.array([np.mean(c,0) for c in self.x_list])if (mean_list_ == self.mean_list).all():breakself.mean_list = mean_list_def predict(self):dict_ = {}for i in range(len(self.c_list)):for j in self.c_list[i]:dict_[j]=ireturn [dict_[i] for i in range(self.n)]def compute_d(self,c):max_ = -1for c1 in range(len(c)):for c2 in range(len(c)):if c1 >= c2:max_ = max(max_,self.cauclate_dis(c[c1],c[c2]))return max_def compute_mean_d(self):return np.mean([self.compute_d(i) for i in self.x_list])def main():x = [[0,2],[0,0],[1,0],[5,0],[5,2],[1,5],[2,4],[3,5],[1,1],[1,4],[5,4],[5,3],[0,1],[-1,0],[1,1],[4,4],[3,4]]k_ = 5KM = K_menas(k_)KM.fit(x)result = KM.predict()print(result)k_list=[]for k in [1,2,3,4,5,6,7]:KM = K_menas(k)KM.fit(x)k_list.append([k,KM.compute_mean_d()])print(k_list)color_list =['r','k','b','g']for i,c in zip(range(len(result)),color_list[:len(result)]):positive_ = np.array(x)[np.array(result) == i]plt.scatter([k[0] for k in positive_],[k[1] for k in positive_] , c=c)plt.show()if __name__ == '__main__':main()#----------result---------------- /usr/bin/python3 /Users/zhengyanzhao/PycharmProjects/tongjixuexi/shixian2/K_means.py [3, 3, 3, 4, 2, 0, 0, 1, 3, 0, 2, 2, 3, 3, 3, 1, 1] [[1, 7.211102550927978], [2, 4.319596107466319], [3, 3.7863912010391503], [4, 1.9715873793431686], [5, 1.612899020449196], [6, 1.0107491837076632], [7, 1.0337239967856842]]總結
以上是生活随笔為你收集整理的统计学习方法第十四章作业:聚类—层次聚类聚合/分裂算法、K_means聚类算法 代码实现的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 再战FGM!Tensorflow2.0
- 下一篇: 统计学习方法第十五章作业:SVD矩阵分解