数据挖掘 —— 无监督学习(关联)
生活随笔
收集整理的這篇文章主要介紹了
数据挖掘 —— 无监督学习(关联)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
數據挖掘 —— 無監督學習(關聯)
- Apriori算法
- 代碼
- 結果
Apriori算法
代碼
from itertools import combinations def comb(lst):ret=[]for i in range(1,len(lst)+1):ret+=list(combinations(lst,i))return ret class AprLayer(object):d=dict()def __init__(self):self.d=dict() class AprNode(object):def __init__(self,node):self.s=set(node)self.size=len(self.s)self.lnk_nodes=dict()self.num=0def __hash__(self):return hash("__".join(sorted([str(itm) for itm in list(self.s)])))def __eq__(self, other):if "__".join(sorted([str(itm) for itm in list(self.s)]))=="__".join(sorted([str(itm) for itm in list(other.s)])):return Truereturn Falsedef isSubnode(self,node):return self.s.issubset(node.s)def incNum(self,num=1):self.num+=numdef addLnk(self,node):self.lnk_nodes[node]=node.sclass AprBlk():def __init__(self,data):cnt=0self.apr_layers = dict()self.data_num=len(data)for datum in data:cnt+=1datum=comb(datum)nodes=[AprNode(da) for da in datum]for node in nodes:if not node.size in self.apr_layers:self.apr_layers[node.size]=AprLayer()if not node in self.apr_layers[node.size].d:self.apr_layers[node.size].d[node]=nodeself.apr_layers[node.size].d[node].incNum()for node in nodes:if node.size==1:continuefor sn in node.s:sub_n=AprNode(node.s-set([sn]))self.apr_layers[node.size-1].d[sub_n].addLnk(node)def getFreqItems(self,thd=1,hd=1):freq_items=[]for layer in self.apr_layers:for node in self.apr_layers[layer].d:if self.apr_layers[layer].d[node].num<thd:continuefreq_items.append((self.apr_layers[layer].d[node].s,self.apr_layers[layer].d[node].num))freq_items.sort(key=lambda x:x[1],reverse = True)return freq_items[:hd]def getConf(self,low=True, h_thd=10, l_thd=1, hd=1):confidence = []for layer in self.apr_layers:for node in self.apr_layers[layer].d:if self.apr_layers[layer].d[node].num < h_thd:continuefor lnk_node in node.lnk_nodes:if lnk_node.num < l_thd:continueconf = float(lnk_node.num) / float(node.num)confidence.append([node.s, node.num, lnk_node.s, lnk_node.num, conf])confidence.sort(key=lambda x: x[4])if low:return confidence[:hd]else:return confidence[-hd::-1]class AssctAnaClass():def fit(self,data):self.apr_blk=AprBlk(data)return selfdef get_freq(self,thd=1,hd=1):return self.apr_blk.getFreqItems(thd=thd,hd=hd)def get_conf_high(self,thd,h_thd=10):return self.apr_blk.getConf(low=False, h_thd=h_thd, l_thd=thd)def get_conf_low(self,thd,hd,l_thd=1):return self.apr_blk.getConf(h_thd=thd,l_thd=l_thd,hd=hd)def main():data=[["牛奶","啤酒","尿布"],["牛奶","啤酒","咖啡","尿布"],["香腸","牛奶","餅干"],["尿布","果汁","啤酒"],["釘子","啤酒"],["尿布","毛巾","香腸"],["啤酒","毛巾","尿布","餅干"]]print("Freq",AssctAnaClass().fit(data).get_freq(thd=3,hd=10))print("Conf",AssctAnaClass().fit(data).get_conf_high(thd=3,h_thd=3)) if __name__=="__main__":main()結果
Freq [({'啤酒'}, 5), ({'尿布'}, 5), ({'啤酒', '尿布'}, 4), ({'牛奶'}, 3)] Conf [[{'尿布'}, 5, {'啤酒', '尿布'}, 4, 0.8], [{'啤酒'}, 5, {'啤酒', '尿布'}, 4, 0.8]]by CyrusMay 2022 04 05
總結
以上是生活随笔為你收集整理的数据挖掘 —— 无监督学习(关联)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: (数据挖掘 —— 无监督学习(聚类)
- 下一篇: 数据挖掘 —— 半监督学习(标签传播算法