python实现的基于NMF的多图聚类算法
生活随笔
收集整理的這篇文章主要介紹了
python实现的基于NMF的多图聚类算法
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
python實現的基于NMF的多圖聚類算法的封裝
代碼均由論文復現
文章目錄
- python實現的基于NMF的多圖聚類算法的封裝
- 前言
- 參考論文
- 一、NMF
- 二、Mjnmf
- 總結
前言
怕忘記干了這個事,后面再來補充說明
參考論文
- Identification of multi-layer networks community by fusing nonnegative matrix factorization and topological structural information
- Graph Learning for Multiview Clustering
一、NMF
代碼
# -*- coding: utf-8 -*-import os import sys import numpy as np import networkx as nx import sklearn.preprocessing as nordef graphsToMatrix(graphs):"""## 返回圖的臨接矩陣表示"""if isinstance(graphs, list):return [np.asarray(nx.adjacency_matrix(g,weight='weight').todense(),dtype='float32') for g in graphs]else:return np.asarray(nx.adjacency_matrix(graphs,weight='weight').todense(),dtype='float32')def getRandomData(xNum,yNum):"""# 獲取隨機的數據"""return np.random.rand(xNum,yNum)class NMF(object):# 最小分母minTrueNum = 10 ** -8maxIterNum = 1000alpha = 10 ** -2# 聚類的數量k = 4# 節點的數量nodesNum = 0layerNum = 0# 圖數據的存儲graphs = []# 圖的臨接矩陣表示wItems = []#相似度矩陣sItems = []# 左側的綜合矩陣B = []# 右側的系數矩陣fItems = []# 錯誤的分數j = [100 ** 4]def __init__(self, graphs, k):self.graphs = graphsself.wItems = graphsToMatrix(graphs)self.sItems = [self.simplify(matrix) for matrix in self.wItems]# self.wItems = self.sItemsself.k = kself.nodesNum = graphs[0].number_of_nodes()self.layerNum = len(graphs)self.__initialize()def __initialize(self):self.B = getRandomData(self.nodesNum, self.k)self.fItems = [getRandomData(self.k,self.nodesNum) for i in range(self.layerNum)]def calCulateErr(self):"""## 計算本輪的誤差值"""itemErr = sum([np.sum((w - self.B @k) ** 2) for w,k in zip(self.wItems,self.fItems)])self.j.append(itemErr)return itemErrdef updateB(self):"""## 迭代更新B矩陣"""molecule = sum([w @ f.T for w,f in zip(self.wItems,self.fItems)])denominator = self.B @ sum([f @ f.T for f in self.fItems])self.B = self.B * (molecule / np.maximum(denominator, self.minTrueNum))def updateK(self):"""## 更新右側的系數矩陣"""molecules = [self.B.T @ w for w in self.wItems]denominators = [self.B.T @ self.B @ f for f in self.fItems]self.fItems = [f * (k1 / np.maximum(k2, self.minTrueNum)) for k1,k2,f in zip(molecules, denominators,self.fItems)]# self.fItems = [f * ((self.B.T @ w) / np.maximum(self.B.T @ self.B @ f , self.minTrueNum)) # for f,w in zip(self.fItems, self.wItems)]def simplify(self, w):"""# 計算獲得相似度矩陣"""cs = nor.normalize(w,axis=0,norm="l2")re = cs.T @csnp.fill_diagonal(re, 1)return redef fit(self):"""## 運行進行迭代更新"""for i in range(self.maxIterNum):self.updateB()self.updateK()errRate = self.calCulateErr()# print(f"iter:: {i} err -> {errRate}")if errRate < self.alpha:print(f"end iter in {i} 輪 -> errRate::{errRate}")def getClusterLabel(self):"""## 獲得得到的分類標簽"""labels = np.argmax(self.B, 1)return labels.tolist()# from tools import getGraphData# def runTest(): # graphs = getGraphData("CELE") # m = NMF(graphs, 5)# m.fit() # print(m.getClusterLabel())if __name__ == '__main__':# runTest()pass二、Mjnmf
代碼
# -*- coding: utf-8 -*-from logging import error from numpy.linalg import inv import pandas as pd import numpy as np import networkx as nx import matplotlib.pyplot as plt from sklearn import metrics import scipy.sparse as sp import sklearn.preprocessing as nor from copy import deepcopy from scipy.io import loadmat from munkres import Munkres from sklearn.cluster import KMeans import networkx as nx # import loadDatadef getRandomData(xNum,yNum):"""# 獲取隨機的數據"""return np.random.rand(xNum,yNum)def read_adj(filename):"""讀取txt文件獲取每一重圖中的信息return :``graphs`` -> 代表每重圖對應的鄰接矩陣的表示 ``nodes`` -> 每一重圖中的點``layers`` -> 不同的layer對應的id"""df = pd.read_csv(filename, header=None, sep=' ', names=['layerID', 'source', 'target', 'weight'], dtype='int32')layers = list(set(df['layerID'].tolist()))graphs = []nodes = []for i in layers:g = nx.from_pandas_edgelist(df[df['layerID'] == i], source='source', target='target',edge_attr='weight')nodelist=sorted(g.nodes())L = np.asarray(nx.adjacency_matrix(g, nodelist=nodelist,weight='weight').todense(),dtype='float32')graphs.append(L)nodes.append(nodelist)return graphs, nodes,layersclass Mjnmf(object):"""# Mjnmf 方法的封裝"""lower_control = 10 ** -10nodeNum = 0layerNum = 3l = 4theta = 0.1beta = 0.1alpha = 1lamda = 10iterNum = 700errorradio = 10 ** -6k = 4dataSet = []WItems =[]fItems = []h = []b = []wGroups = []uItems = []sItems = []errItems = []e = [10 **7] def __init__(self,dataSet, k = 4, layerNum =None , nodeNum = None):"""# 設置初始化的數據"""self.dataSet = dataSetif layerNum == None: self.layerNum = dataSet.shape[0]else : self.layerNum = layerNumif nodeNum == None: self.nodeNum = dataSet[0].shape[1]else :self.nodeNum = nodeNumprint(f"初始化 layerNum{self.layerNum} :: nodeNum{self.nodeNum}")self.k = kself.WItems = [self.constructkernal(item) for item in dataSet ]self.__runInitial()def __runInitial(self):self.fItems = [getRandomData(self.nodeNum,self.k) for i in range(self.nodeNum) ]self.h = getRandomData(self.nodeNum,self.k)self.b = getRandomData(self.nodeNum,self.k)self.wGroups = [self.high_order(WItem) for WItem in self.WItems]self.sItems = [self.similarity(adj) for adj in self.dataSet]for index,item in enumerate(self.sItems):np.fill_diagonal(self.sItems[index],1)self.uItems = [self.cal_sim(sItem) for sItem in self.sItems]self.wGroups = [self.cal_sim(wItem) for wItem in self.wGroups]def caldistance(self,adj):"""## 計算獲得距離矩陣"""'''numberoffeature = (adj.shape)[0]'''pingfanghe = np.sum(adj ** 2,0)jiaocha = adj.T @ adjdistance = np.sqrt(pingfanghe.reshape(self.nodeNum,1) + pingfanghe.reshape(1,self.nodeNum)- 2 * jiaocha)return distancedef constructkernal(self,adjcency):"""初始化得到 W 對應的矩陣"""dis = self.caldistance(adjcency)sig = np.median(dis)'''sig = 1'''fenzi = dis ** 2fenmu = max(2 *(sig ** 2), self.lower_control)wkernal = np.exp(-fenzi / fenmu)np.fill_diagonal(wkernal,0)return wkernaldef high_order(self,W):P = np.zeros((self.nodeNum,self.nodeNum))temp = 1for i in range(self.l):if i == 0:A = deepcopy(W)else:A = A @ Wtemp *= (i+1)P += ((A * (self.theta ** i)) / temp)return Pdef similarity(self,W):cs = nor.normalize(W,axis=0,norm="l2")re = cs.T @csreturn redef cal_sim(self,s):"""對矩陣進行歸一化處理"""re = nor.normalize(s,axis=1,norm="l1")return redef b_update(self,B):fenzi = sum([w @ f for w,f in zip(self.wGroups, self.fItems)])fenmu = B @ sum([f.T @ f for f in self.fItems])B = B * (fenzi / np.maximum(fenmu, self.lower_control))return Bdef f_update(self,w,u,f):fenzi = w.T @ self.b + self.beta * (u @ self.h) fenmu = f @ (self.b.T @ self.b) + self.beta * f'''f = f * np.sqrt(fenzi / np.maximum(fenmu, lower_control))'''f = f * (fenzi / np.maximum(fenmu, self.lower_control))return fdef h_update(self,H):uuh = 2 * self.beta * sum([u.T@(u @ H) for u in self.uItems])sh = 4 * self.alpha * sum([s.T@ H for s in self.sItems])uf = 2 * self.beta * sum([u.T @ f for u,f in zip(self.uItems,self.fItems)])hhh = 8 * (self.layerNum * self.alpha + self.lamda) * (H @ (H.T @ H))fenzi = -uuh + np.sqrt((uuh * uuh) + (2*hhh)* (sh + uf + 4 * self.lamda * H)) H = H * np.sqrt(fenzi / np.maximum(hhh, self.lower_control))return Hdef u_update(self,u,f):fenzi = f @ (self.h.T)fenmu = u @ self.h @ (self.h.T)u = u * (fenzi / np.maximum(fenmu, self.lower_control))return udef error(self,w,f,s,u):e1 = np.sum((w - self.b @ f.T)**2) e2 = self.alpha * np.sum((s - self.h @ self.h.T) ** 2) e3 = self.beta * np.sum((f - u @ self.h)**2)'''e4 = lamda * np.linalg.norm(h.T @ h - np.eye(k),2)'''err = e1 + e2 + e3 return errdef runIter(self):"""# 運行進行迭代"""for i in range(self.iterNum):self.b= self.b_update(self.b)self.fItems = [ self.f_update(w,u,f) for w,u,f in zip(self.wGroups,self.uItems, self.fItems)]self.uItems = [self.u_update(u,f) for u,f in zip(self.uItems,self.fItems)]self.h = self.h_update(self.h)self.errItems = [self.error(w,f,s,u) for w,f,s,u in zip(self.wGroups,self.fItems, self.sItems, self.uItems)]self.e.append(sum(self.errItems))# print(f"iter::{i} -> {self.e[-1]}")if abs(self.e[-1] - self.e[-2]) / self.e[-1] <= self.errorradio:print(f"結束于 {i}")breakdef getRes(self):"""# 獲得聚類的結果"""ans = np.argmax(self.h, 1)return ansdef dataTrans(fileName):graphs_adj, nodesF, _ = read_adj('./DataSet/' + fileName + '.txt')return graphs_adj, nodesFdef main(fileName:str,clusterNum:int):""":params:``fileName`` 對應的文件名:``clusterNum`` 對應的文件名"""data, nodes = dataTrans(fileName)d = np.array(data)m = Mjnmf(d, k = clusterNum)m.runIter()return m.getRes().tolist(), nodes[0]if __name__ == '__main__':print(main("CELE"))數據
網盤
提取碼: es4o
總結
有時間來補充非負矩陣分解的知識
總結
以上是生活随笔為你收集整理的python实现的基于NMF的多图聚类算法的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Tomcat 集群部署
- 下一篇: texstudio 使用方法_Latex