用户标签
代碼:用戶標(biāo)簽
通過(guò)標(biāo)簽將用戶和物品聯(lián)系起來(lái)
標(biāo)簽的作用: 1.物品相關(guān):物品的屬性(時(shí)間,創(chuàng)作者等) 2.用戶相關(guān):用戶對(duì)物品的看法、任務(wù)(待讀等) 算法: 1.簡(jiǎn)單算法(推薦用戶常用標(biāo)簽下的熱門物品) 計(jì)算用戶對(duì)物品的喜好 $p(u,i)=\sum_b \frac{n_{u,b}}{log(1+n_b^{(u)})}\frac{n_{i,b}}{log(1+n_i^{(u)})}$# coding=gbk import pandas as pd import mathdata=pd.read_csv('delicious.dat',sep='\t',header=None)class SimpleTagBased:#{用戶1:{標(biāo)簽A:5,...}...}user_tag=dict()#用戶評(píng)價(jià)過(guò)的物品,在推薦時(shí)過(guò)濾掉這些物品user_item=dict()#每個(gè)標(biāo)簽對(duì)應(yīng)的物品tag_item=dict()tag_count=dict()item_count=dict()item_tag=dict()def __init__(self,data):for user,item,tags in data.itertuples(index=False):if type(tags)==float:continueif user not in self.user_item:self.user_item[user]=list()self.user_item[user].append(item)if item not in self.item_count:self.item_count[item]=0self.item_count[item]+=1if item not in self.item_tag:self.item_tag[item]=dict()tags=tags.split(' ')if user not in self.user_tag:self.user_tag[user]=dict()for tag in tags:tag = tag.lower()if tag not in self.user_tag[user]:self.user_tag[user][tag]=0self.user_tag[user][tag]+=1if tag not in self.tag_item:self.tag_item[tag]=dict()if item not in self.tag_item[tag]:self.tag_item[tag][item]=0self.tag_item[tag][item]+=1if tag not in self.tag_count:self.tag_count[tag]=0self.tag_count[tag]+=1if tag not in self.item_tag[item]:self.item_tag[item][tag]=0self.item_tag[item][tag]+=1def recommend(self,user):viewedItem=self.user_item[user]rank=dict()utags = self.user_tag[user]for tag,weight in utags.items():for item,wt in self.tag_item[tag].items():if item in viewedItem:continueif item not in rank:rank[item]=0#用戶user對(duì)物品item的喜好程度rank[item]+=weight*1.0/math.log(1+self.tag_count[tag])*wt/math.log(1+self.item_count[item])rank=[a[0] for a in sorted(rank.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:5]];res = []for item in rank:#物品被打的最多的10個(gè)標(biāo)簽作為物品的描述res.append([a[0] for a in sorted(self.item_tag[item].items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:10]])#用戶最常用的10個(gè)標(biāo)簽作為用戶興趣描述userdesc = [a[0] for a in sorted(utags.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:10]]return (userdesc,res) stb = SimpleTagBased(data=data) userdesc,res = stb.recommend(104) print userdesc print res
結(jié)果:
--用戶常用標(biāo)簽
['software', 'webdesign', 'tools', 'dev', 'howto', 'free', 'freeware', 'opensource', 'reference', 'linux']
--所推薦物品具有的標(biāo)簽
['css', 'webdesign', 'reference', 'design', 'web', 'development', 'html', 'tools', 'webdev', 'programming'],
['webdesign', 'templates', 'design', 'css', 'opensource', 'web', 'free', 'html', 'layout', 'template'],
['fonts', 'typography', 'webdesign', 'design', 'tools', 'css', 'web', 'font', 'type', 'reference'],
['opensource', 'software', 'freeware', 'linux', 'free', 'windows', 'tools', 'reference', 'download', 'alternative'],
['freeware', 'software', 'utilities', 'tools', 'free', 'reference', 'list', 'windows', 'download', 'opensource']
# coding=gbk import pandas as pd import mathdata=pd.read_csv('delicious.dat',sep='\t',header=None)item_tag=dict() for user,item,tags in data.itertuples(index=False):#如果tags為nan,跳過(guò)if type(tags)==float:continueif item not in item_tag:item_tag[item]=dict()tags=tags.split(' ')for tag in tags:tag = tag.lower()if tag not in item_tag[item]:item_tag[item][tag]=0item_tag[item][tag]+=1def recommend(taga,n,item_tag):nb=dict()nab=dict()na = 0l = len(item_tag)i=1for item,tags in item_tag.items():print i*1.0/li+=1if taga not in tags:for tag,v in tags.items():if tag not in nb:nb[tag]=0nb[tag]+=v*velse:av = tags[taga]na +=av*avfor tag,v in tags.items():if tag==taga:continueif tag not in nb:nb[tag]=0nb[tag]+=v*vif tag not in nab:nab[tag]=0nab[tag]+=av*vrank=dict()na = math.sqrt(na)for tag,v in nab.items():rank[tag]=v/na/math.sqrt(nb[tag])res = [a[0] for a in sorted(rank.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:n]]return restags=recommend('webdesign',10,item_tag) print tags
結(jié)果:和webdesign相關(guān)的標(biāo)簽
['design', 'css', 'web', 'webdev', 'html', 'web_design', 'inspiration', 'xhtml', 'webdevelopment', 'resources']
2.基于圖的算法 解法參看:概率圖模型 給用戶推薦標(biāo)簽: 對(duì)于物品i給用戶u推薦標(biāo)簽,標(biāo)簽$b_k$的推薦度為: $(1-\alpha)\frac{n_{u,b_k}}{max(n_{u,b_j})}+\alpha\frac{n_{i,b_k}}{max(n_{i,b_j})}$# coding=gbk import pandas as pd import mathdata=pd.read_csv('delicious.dat',sep='\t',header=None)item_tag=dict() user_tag=dict() for user,item,tags in data.itertuples(index=False):#如果tags為nan,跳過(guò)if type(tags)==float:continueif item not in item_tag:item_tag[item]=dict()if user not in user_tag:user_tag[user]=dict()tags=tags.split(' ')for tag in tags:tag = tag.lower()if tag not in item_tag[item]:item_tag[item][tag]=0item_tag[item][tag]+=1if tag not in user_tag[user]:user_tag[user][tag]=0user_tag[user][tag]+=1def recommend(user,item,user_tag,item_tag,alpha):utgs = user_tag[user]itgs = item_tag[item]udesc = [a[0] for a in sorted(utgs.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:10]]idesc = [a[0] for a in sorted(itgs.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:10]]rank = dict()maxu = max(utgs.values())maxi = max(itgs.values())for tag,v in utgs.items():if tag not in rank:rank[tag] = 0rank[tag]+=(1-alpha)*v/maxufor tag,v in itgs.items():if tag not in rank:rank[tag] = 0rank[tag]+= alpha*v/maxires=[a[0] for a in sorted(rank.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:10]];return udesc,idesc,resudesc,idesc,res=recommend(104,33911,user_tag,item_tag,0.8) print udesc print idesc print res
結(jié)果:
--用戶常用標(biāo)簽
['software', 'webdesign', 'tools', 'dev', 'howto', 'free', 'freeware', 'opensource', 'reference', 'linux']
--物品常被打標(biāo)簽
['web', 'softwareagents', 'java', 'howto', 'moviles', 'documentation', 'semantica', 'hpi', 'api', 'agents']
--推薦標(biāo)簽
['howto', 'web', 'moviles', 'softwareagents', 'hpi', 'agents', 'api', 'jade', 'agentes', 'java']
?
轉(zhuǎn)載于:https://www.cnblogs.com/porco/p/4452435.html
總結(jié)
- 上一篇: ios 绘制不规则 图形
- 下一篇: 钻石多少钱一克啊?