推荐算法实现之BMF(pymc3+MovieLen)
生活随笔
收集整理的這篇文章主要介紹了
推荐算法实现之BMF(pymc3+MovieLen)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
BMF是筆者根據PMF(http://papers.nips.cc/paper/3208-probabilistic-matrix-factorization.pdf)和BPMF(https://www.cs.toronto.edu/~rsalakhu/papers/bpmf.pdf)論文思路,自主構建的模型,主要思路是建立MF的概率模型,然后用貝葉斯推斷方法(變分)來求解。參考代碼如下:
# -*- Encoding:UTF-8 -*- ''' @author: Jason.F @data: 2019.07.22 @function: Implementing BMF(Bayesian Matrix Factorization) By VIDataset: Movielen Dataset(ml-1m) Evaluating: hitradio,ndcg ''' import sys import time import loggingimport pymc3 as pm import numpy as np import pandas as pd import theano import theano.tensor as t import heapq import mathdef getTraindata():data = []filePath = '/data/fjsdata/ctKngBase/ml/ml-1m.train.rating'u = 0i = 0maxr = 0.0with open(filePath, 'r') as f:for line in f:if line:lines = line[:-1].split("\t")user = int(lines[0])item = int(lines[1])score = float(lines[2])data.append((user, item, score))if user > u: u = userif item > i: i = itemif score > maxr: maxr = scoreprint("Loading Success!\n""Data Info:\n""\tUser Num: {}\n""\tItem Num: {}\n""\tData Size: {}".format(u, i, len(data)))R = np.zeros([u+1, i+1], dtype=np.float32)for i in data:user = i[0]item = i[1]rating = i[2]R[user][item] = ratingreturn Rdef getTestdata():testset = []filePath = '/data/fjsdata/ctKngBase/ml/ml-1m.test.negative'with open(filePath, 'r') as fd:line = fd.readline()while line != None and line != '':arr = line.split('\t')u = eval(arr[0])[0]testset.append([u, eval(arr[0])[1]])#one postive itemfor i in arr[1:]:testset.append([u, int(i)]) #99 negative itemsline = fd.readline()return testsetdef getHitRatio(ranklist, targetItem):for item in ranklist:if item == targetItem:return 1return 0 def getNDCG(ranklist, targetItem):for i in range(len(ranklist)):item = ranklist[i]if item == targetItem:return math.log(2) / math.log(i+2)return 0def build_BMF(R, K=8, alpha=2, std=0.01):alpha_u = 1 / R.var(axis=1).mean()alpha_v = 1 / R.var(axis=0).mean()logging.info('building the BMF model')n, m = R.shapewith pm.Model() as bmf:U = pm.MvNormal('U', mu=0, tau=alpha_u * np.eye(K), shape=(n, K), testval=np.random.randn(n, K) * std)V = pm.MvNormal('V', mu=0, tau=alpha_v * np.eye(K), shape=(m, K), testval=np.random.randn(m, K) * std)nR = pm.Normal('nR', mu=t.dot(U, V.T), tau=alpha * np.ones(R.shape),observed=R)logging.info('done building BMF model')return bmfif __name__ == "__main__":logging.basicConfig(level=logging.INFO,format='[%(asctime)s]: %(message)s')# Read data and build BMF model.R = getTraindata()bmf = build_BMF(R, K=8)#dim is the number of latent factorswith bmf:# sample with BMFtstart = time.time()logging.info('Start BMF sampling')inference = pm.ADVI()approx = pm.fit(n=1000, method=inference)trace = approx.sample(draws=500)'''start = pm.find_MAP()step = pm.NUTS()trace = pm.sample(1000, step, start=start, njobs=8)'''elapsed = time.time() - tstart logging.info('Complete BMF sampling in %d seconds' % int(elapsed))with bmf:#evaluationtestset = getTestdata()ppc = pm.sample_posterior_predictive(trace, progressbar=True)nR = np.mean(ppc['nR'],0)#three dims, calcuate the mean with the first dim for posteriorhits = []ndcgs = []prev_u = testset[0][0]pos_i = testset[0][1]scorelist = []for u, i in testset:if prev_u == u:scorelist.append([i,nR[u,i]])else:map_item_score = {}for item, rate in scorelist: #turn dictmap_item_score[item] = rateranklist = heapq.nlargest(10, map_item_score, key=map_item_score.get)#default Topn=10hr = getHitRatio(ranklist, pos_i)hits.append(hr)ndcg = getNDCG(ranklist, pos_i)ndcgs.append(ndcg)#next userscorelist = []prev_u = upos_i = iscorelist.append([i,nR[u,i]])hitratio,ndcg = np.array(hits).mean(), np.array(ndcgs).mean()print("hr: {}, NDCG: {}, At K {}".format(hitratio, ndcg, 8)) ''' hr: 0.10018214936247723, NDCG: 0.04508370537288355, At K 8'''有興趣推薦方向概率模型構建及貝葉斯方法求解的,歡迎一起探討!
總結
以上是生活随笔為你收集整理的推荐算法实现之BMF(pymc3+MovieLen)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 推荐经典算法实现之BPMF(pymc3+
- 下一篇: 概率编程库Edward安装