FP算法挖掘疾病的关联规则
生活随笔
收集整理的這篇文章主要介紹了
FP算法挖掘疾病的关联规则
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
實驗題目:
某醫院為了研究幾種內科疾病的關聯,隨機抽取了七十名病人的檔案,得到的情況記錄在 數據集.txt 中,其中一部分數據如下圖所示:
Cardiacfailure表示心力衰竭,Myocardialinfarction表示心肌梗塞,uremia表示尿毒癥,diabetes表示糖尿病,Renalfailure 表示腎衰竭,Other表示其他疾病
要求:請自行根據數據的實際情況指定最小支持度與最小置信度,并采用FP增長算法挖掘這些疾病之間的關聯規則。
一、實驗代碼
import pprint def loadDataSet():f2 = open("數據集.txt", "r")# f2 = open("data.txt", "r")lines = f2.readlines()retData = []for line in lines:items = line.strip().split(' ')retData.append([(items[i]) for i in range(1, len(items))])return retData def transfer2FrozenDataSet(dataSet):frozenDataSet = {}for elem in dataSet:frozenDataSet[frozenset(elem)] = 1return frozenDataSet class TreeNode:def __init__(self, nodeName, count, nodeParent):self.nodeName = nodeNameself.count = countself.nodeParent = nodeParentself.nextSimilarItem = Noneself.children = {}def increaseC(self, count):self.count += countdef disp(self, ind=1):# 將樹以文本形式展示print(' ' * ind, self.nodeName, ' ', self.count)for child in self.children.values():child.disp(ind + 1) def createFPTree(frozenDataSet, minSupport):#第一次掃描數據集,篩選出小于支持的項headPointTable = {}for items in frozenDataSet:for item in items:headPointTable[item] = headPointTable.get(item, 0) + frozenDataSet[items]headPointTable = {k: vfor k, v in headPointTable.items() if v >= minSupport}frequentItems = set(headPointTable.keys())if len(frequentItems) == 0: return None, Nonefor k in headPointTable:headPointTable[k] = [headPointTable[k], None]fptree = TreeNode("null", 1, None)#第二次掃描數據集,篩選出每個記錄的項for items, count in frozenDataSet.items():frequentItemsInRecord = {}for item in items:if item in frequentItems:frequentItemsInRecord[item] = headPointTable [item][0]if len(frequentItemsInRecord) > 0:frequentItemsInRecord = sorted(frequentItemsInRecord. items(), key=lambda v: v[0])orderedFrequentItems = [v[0] for v in sorted(frequentItemsInRecord,key=lambda v:v[1], reverse=True)]updateFPTree(fptree, orderedFrequentItems, headPointTable, count)return fptree, headPointTable def updateFPTree(fptree, orderedFrequentItems, headPointTable, count):#處理第一項if orderedFrequentItems[0] in fptree.children:fptree.children[orderedFrequentItems[0]].increaseC(count)else:fptree.children[orderedFrequentItems[0]] = TreeNode(orderedFrequentItems[0], count, fptree)# 修改頭結點表if headPointTable[orderedFrequentItems[0]][1] == None:headPointTable[orderedFrequentItems[0]][1] = fptree.children[orderedFrequentItems[0]]else: updateHeadPointTable(headPointTable[orderedFrequentItems[0]][1], fptree.children[orderedFrequentItems[0]])# 處理除第一項外的其他項if (len(orderedFrequentItems) > 1):updateFPTree(fptree.children[orderedFrequentItems[0]], orderedFrequentItems[1::], headPointTable, count) def updateHeadPointTable(headPointBeginNode, targetNode):while (headPointBeginNode.nextSimilarItem != None):headPointBeginNode = headPointBeginNode.nextSimilarItemheadPointBeginNode.nextSimilarItem = targetNode def mineFPTree(headPointTable, prefix, frequentPatterns, minSupport):#對于頭結點表的每一項, 查找條件前綴路徑,創建條件FP樹#然后迭代,直到條件FP樹中只有一個元素headPointItems = [v[0] for v in sorted(headPointTable.items(), key=lambda v: v[1][0])]if (len(headPointItems) == 0): returnfor headPointItem in headPointItems:newPrefix = prefix.copy()newPrefix.add(headPointItem)support = headPointTable[headPointItem][0]frequentPatterns[frozenset(newPrefix)] = supportprefixPath = getPrefixPath(headPointTable, headPointItem)if (prefixPath != {}):conditionalFPtree, conditionalHeadPointTable = createFPTree(prefixPath, minSupport)if conditionalHeadPointTable != None:mineFPTree(conditionalHeadPointTable, newPrefix, frequentPatterns, minSupport) def getPrefixPath(headPointTable, headPointItem):prefixPath = {}beginNode = headPointTable[headPointItem][1]prefixs = ascendTree(beginNode)if ((prefixs != [])):prefixPath[frozenset(prefixs)] = beginNode.countwhile (beginNode.nextSimilarItem != None):beginNode = beginNode.nextSimilarItemprefixs = ascendTree(beginNode)if (prefixs != []):prefixPath[frozenset(prefixs)] = beginNode.countreturn prefixPath def ascendTree(treeNode):prefixs = []while ((treeNode.nodeParent!=None) and (treeNode.nodeParent. nodeName != 'null')):treeNode = treeNode.nodeParentprefixs.append(treeNode.nodeName)return prefixs def rulesGenerator(frequentPatterns, minConf, rules):for frequentset in frequentPatterns:if (len(frequentset) > 1):getRules(frequentset, frequentset, rules, frequentPatterns,minConf) def removeStr(set, str):tempSet = []for elem in set:if (elem != str):tempSet.append(elem)tempFrozenSet = frozenset(tempSet)return tempFrozenSet def getRules(frequentset, currentset, rules, frequentPatterns, minConf):for frequentElem in currentset:subSet = removeStr(currentset, frequentElem)confidence = frequentPatterns[frequentset] / frequentPatterns[subSet]if (confidence >= minConf):flag = Falsefor rule in rules:if (rule[0] == subSet and rule[1] == frequentset - subSet):flag = Trueif (flag == False):rules.append((subSet, frequentset - subSet, confidence))if (len(subSet) >= 2):getRules(frequentset, subSet, rules, frequentPatterns, minConf) if __name__ == '__main__':dataSet = loadDataSet()frozenDataSet = transfer2FrozenDataSet(dataSet)min_sup = 3fptree, headPointTable = createFPTree(frozenDataSet, min_sup)fptree.disp()frequentPatterns = {}prefix = set([])mineFPTree(headPointTable, prefix, frequentPatterns, min_sup)print("")print("頻繁模式:")pprint.pprint(frequentPatterns)min_conf = 0.7rules = []rulesGenerator(frequentPatterns, min_conf, rules)print("關聯規則:")pprint.pprint(rules)print('規則總數:', len(rules))二、實驗結果
構建得到的FP-TREE如下:
設置最小支持度和最小置信度:
挖掘得到的頻繁模式和關聯規則:
?
三、結果分析
根據以上關聯規則可得出以下結論:
最小支持度為3,最小置信度為70%時:
- 糖尿病、尿毒癥、腎衰竭三種疾病之間較強的關聯關系,心力衰竭、腎衰竭、糖尿病三種疾病之間有很強的關聯關系。?
- 對于糖尿病和尿毒癥的疾病人群而言,有80%的患者會并發腎衰竭。
- 對于尿毒癥和腎衰竭的疾病人群而言,也有80%的患者會并發糖尿病。
- 對于腎衰竭和心力衰竭的疾病人群而言,有100%的患者會并發糖尿病。
附 數據集.txt
1 Cardiacfailure Myocardialinfarction Other 2 Cardiacfailure 3 Cardiacfailure uremia Myocardialinfarction 4 Renalfailure Cardiacfailure diabetes uremia 5 uremia Cardiacfailure Renalfailure diabetes 6 diabetes 7 diabetes Cardiacfailure Myocardialinfarction Other 8 diabetes uremia 9 diabetes 10 Renalfailure diabetes uremia 11 diabetes 12 Cardiacfailure diabetes uremia Renalfailure 13 uremia diabetes Renalfailure Cardiacfailure 14 Renalfailure 15 Other Renalfailure 16 Renalfailure diabetes 17 Myocardialinfarction Cardiacfailure 18 uremia Renalfailure 19 Renalfailure 20 uremia diabetes Renalfailure 21 uremia Renalfailure 22 uremia 23 Cardiacfailure uremia Renalfailure diabetes Myocardialinfarction 24 Renalfailure diabetes uremia Cardiacfailure 25 Myocardialinfarction Cardiacfailure Other 26 diabetes Renalfailure uremia Cardiacfailure 27 uremia Renalfailure diabetes Cardiacfailure Myocardialinfarction 28 uremia diabetes Renalfailure Myocardialinfarction 29 diabetes uremia 30 Myocardialinfarction 31 diabetes Renalfailure uremia Cardiacfailure 32 Cardiacfailure diabetes Other 33 Renalfailure diabetes 34 uremia diabetes Renalfailure 35 Myocardialinfarction Cardiacfailure 36 uremia Renalfailure 37 Other Renalfailure Myocardialinfarction 38 Renalfailure diabetes uremia 39 Cardiacfailure Myocardialinfarction Other 40 Myocardialinfarction Other 41 uremia Renalfailure diabetes 42 Cardiacfailure diabetes uremia Renalfailure 43 Myocardialinfarction 44 diabetes uremia Renalfailure 45 Myocardialinfarction Renalfailure 46 Cardiacfailure Myocardialinfarction 47 diabetes 48 Myocardialinfarction Cardiacfailure 49 diabetes Renalfailure uremia 50 Renalfailure 51 diabetes uremia 52 uremia Renalfailure diabetes 53 Other Renalfailure Myocardialinfarction 54 Renalfailure diabetes uremia Cardiacfailure 55 Renalfailure diabetes uremia 56 Myocardialinfarction 57 Renalfailure diabetes 58 Cardiacfailure Myocardialinfarction Other 59 uremia diabetes Renalfailure 60 Cardiacfailure diabetes Other 61 uremia Renalfailure 62 Myocardialinfarction 63 uremia diabetes Renalfailure Myocardialinfarction 64 Renalfailure diabetes Cardiacfailure 65 uremia Renalfailure 66 diabetes 67 Cardiacfailure diabetes Other 68 diabetes Renalfailure uremia Cardiacfailure 69 uremia Renalfailure diabetes 70 Cardiacfailure Myocardialinfarction總結
以上是生活随笔為你收集整理的FP算法挖掘疾病的关联规则的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: DRM系列(14)之writeback_
- 下一篇: Win10问题篇:让AIDA64像鲁大师