svd 做协同过滤
svd 做協同過濾
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Jun 20 21:02:58 2018@author: luogan """#coding=UTF-8 from numpy import * from numpy import linalg as ladef loadExData():return[[0, 0, 0, 2, 2],[0, 0, 0, 3, 3],[0, 0, 0, 1, 1],[1, 1, 1, 0, 0],[2, 2, 2, 0, 0],[5, 5, 5, 0, 0],[1, 1, 1, 0, 0]]def loadExData2():return[[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],[0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],[0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],[3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],[5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],[0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],[4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],[0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],[0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],[0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],[1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]def ecludSim(inA,inB):return 1.0/(1.0 + la.norm(inA - inB))def pearsSim(inA,inB):if len(inA) < 3 : return 1.0return 0.5+0.5*corrcoef(inA, inB, rowvar = 0)[0][1]def cosSim(inA,inB):num = float(inA.T*inB)denom = la.norm(inA)*la.norm(inB)return 0.5+0.5*(num/denom)def standEst(dataMat, user, simMeas, item):n = shape(dataMat)[1]simTotal = 0.0; ratSimTotal = 0.0for j in range(n):userRating = dataMat[user,j]if userRating == 0: continueoverLap = nonzero(logical_and(dataMat[:,item].A>0, \dataMat[:,j].A>0))[0]if len(overLap) == 0: similarity = 0else: similarity = simMeas(dataMat[overLap,item], \dataMat[overLap,j])print ('the %d and %d similarity is: %f' % (item, j, similarity))simTotal += similarityratSimTotal += similarity * userRatingif simTotal == 0: return 0else: return ratSimTotal/simTotaldef svdEst(dataMat, user, simMeas, item):n = shape(dataMat)[1]simTotal = 0.0; ratSimTotal = 0.0U,Sigma,VT = la.svd(dataMat)Sig4 = mat(eye(4)*Sigma[:4]) #arrange Sig4 into a diagonal matrixxformedItems = dataMat.T * U[:,:4] * Sig4.I #create transformed itemsSig = mat(eye(n)*Sigma) #arrange Sig4 into a diagonal matrix#print Sig#print U * Sig * VT #back up source mat#print xformedItems #item feature begin compute item similer#print "user feature:"#xformedUsers = dataMat * VT[:,:4] * Sig4#print xformedUsers#print xformedUsers * xformedItems.T#print dataMatfor j in range(n):userRating = dataMat[user,j]if userRating == 0 or j==item: continuesimilarity = simMeas(xformedItems[item,:].T,\xformedItems[j,:].T)print ('the %d and %d similarity is: %f' % (item, j, similarity))simTotal += similarityratSimTotal += similarity * userRatingif simTotal == 0: return 0else: return ratSimTotal/simTotaldef recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):#print 'type', dataMat[:,:4] #the number user line or colprint (nonzero(dataMat[user,:].A==0)) # to arrayunratedItems=nonzero(dataMat[user,:].A==0)[1]print (unratedItems)#unratedItems = nonzero(dataMat[user,:].A==0)[1]#find unrated items if len(unratedItems) == 0: return 'you rated everything'itemScores = []for item in unratedItems:estimatedScore = estMethod(dataMat, user, simMeas, item)itemScores.append((item, estimatedScore))return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]def printMat(inMat, thresh=0.8):for i in range(32):for k in range(32):if float(inMat[i,k]) > thresh:print (1),else: print( 0),print ('')def imgCompress(numSV=3, thresh=0.8):myl = []for line in open('0_5.txt').readlines():newRow = []for i in range(32):newRow.append(int(line[i]))myl.append(newRow)myMat = mat(myl)print ("****original matrix******")printMat(myMat, thresh)U,Sigma,VT = la.svd(myMat)SigRecon = mat(zeros((numSV, numSV)))for k in range(numSV):#construct diagonal matrix from vectorSigRecon[k,k] = Sigma[k]reconMat = U[:,:numSV]*SigRecon*VT[:numSV,:]print ("****reconstructed matrix using %d singular values******" % numSV)printMat(reconMat, thresh) if __name__ == '__main__':print ("begin")myData=loadExData2()myMat=mat(myData)#myMat = mat(loadExData)recommend(myMat, 2, 3, cosSim, svdEst) begin (array([0, 0, 0, 0, 0, 0, 0, 0]), array([ 0, 1, 2, 3, 5, 6, 8, 10])) [ 0 1 2 3 5 6 8 10] the 0 and 4 similarity is: 0.487100 the 0 and 7 similarity is: 0.996341 the 0 and 9 similarity is: 0.490280 the 1 and 4 similarity is: 0.485583 the 1 and 7 similarity is: 0.995886 the 1 and 9 similarity is: 0.490272 the 2 and 4 similarity is: 0.485739 the 2 and 7 similarity is: 0.995963 the 2 and 9 similarity is: 0.490180 the 3 and 4 similarity is: 0.450495 the 3 and 7 similarity is: 0.482175 the 3 and 9 similarity is: 0.522379 the 5 and 4 similarity is: 0.506795 the 5 and 7 similarity is: 0.494716 the 5 and 9 similarity is: 0.496130 the 6 and 4 similarity is: 0.434401 the 6 and 7 similarity is: 0.479543 the 6 and 9 similarity is: 0.583833 the 8 and 4 similarity is: 0.490037 the 8 and 7 similarity is: 0.997067 the 8 and 9 similarity is: 0.490078 the 10 and 4 similarity is: 0.512896 the 10 and 7 similarity is: 0.524970 the 10 and 9 similarity is: 0.493617原文鏈接
posted on 2018-06-20 21:11 luoganttcc 閱讀(...) 評論(...) 編輯 收藏
總結
- 上一篇: pymysql dataframe 写入
- 下一篇: Python 位运算符号