#ID3算法
def ID3_chooseBestFeatureToSplit(dataset):numFeatures=len(dataset[0])-1baseEnt=jisuanEnt(dataset)bestInfoGain=0.0bestFeature=-1for i in range(numFeatures): #遍歷所有特征#for example in dataset:#featList=example[i] featList=[example[i]for example in dataset]uniqueVals=set(featList) #將特征列表創建成為set集合,元素不可重復。創建唯一的分類標簽列表newEnt=0.0for value in uniqueVals: #計算每種劃分方式的信息熵subdataset=splitdataset(dataset,i,value)p=len(subdataset)/float(len(dataset))newEnt+=p*jisuanEnt(subdataset)infoGain=baseEnt-newEnt
# print(u"ID3中第%d個特征的信息增益為:%.3f"%(i,infoGain))if (infoGain>bestInfoGain):bestInfoGain=infoGain #計算最好的信息增益bestFeature=ireturn bestFeature
#利用ID3算法創建決策樹
def ID3_createTree(dataset,labels):classList=[example[-1] for example in dataset]if classList.count(classList[0]) == len(classList):# 類別完全相同,停止劃分return classList[0]if len(dataset[0]) == 1:# 遍歷完所有特征時返回出現次數最多的return majorityCnt(classList)bestFeat = ID3_chooseBestFeatureToSplit(dataset)bestFeatLabel = labels[bestFeat]
# print(u"此時最優索引為:"+(bestFeatLabel))ID3Tree = {bestFeatLabel:{}}# print(bestFeatLabel)del(labels[bestFeat])# 得到列表包括節點所有的屬性值featValues = [example[bestFeat] for example in dataset]uniqueVals = set(featValues)for value in uniqueVals:subLabels = labels[:]#################################遞歸sub_dataset=splitdataset(dataset, bestFeat, value)ID3Tree[bestFeatLabel][value] = ID3_createTree(sub_dataset, subLabels)print(ID3Tree)return ID3Tree