din算法 代码_DIN算法代码详细解读
首先給出論文的地址:Deep Interest Network for Click-Through Rate Prediction
然后給出兩篇對論文進行了詳細介紹的文章:
王喆:推薦系統中的注意力機制——阿里深度興趣網絡(DIN)
梁勇:推薦系統遇上深度學習(十八)-探秘阿里深度興趣網絡淺析及實現
建議先讀完上面兩篇文章,對模型有大概的了解之后再來讀本篇文章。本文主要從代碼的層面出發,逐行去分析理解論文中思想的具體實現。
本文使用的代碼地址為:zhougr1993/DeepInterestNetwork
開始!
源碼介紹
論 文 中 用 的 是 Amazon Product Data 數 據 , 包 含 兩 個 文 件 reviews_Electronics_5.json和 meta_Electronics.json。具體數據形式在本文最后有展 示。文件格式鏈接中有說明,其中 reviews 主要是用戶買了相關商品產生的上下 文信息,包括商品 id, 時間,評論等。meta 文件是關于商品本身的信息,包括商 品 id, 名稱,類別,買了還買等信息。其中 meta 部分為商品信息,reviewer 為點 擊者信息,轉換為 dataframe 格式之后的具體字段如下:
meta_df: ['asin','imUrl','description','categories','title','price','salesRank','related','brand'] reviews_df: ['reviewerID', 'asin', 'reviewerName', 'helpful', 'reviewText','overall', 'summary', 'unixReviewTime','reviewTime']源碼解析
一、數據預處理
二、生成正負樣本
build_dataset.py代碼詳解如下:
import random import pickle random.seed(1234) withopen('C:/Users/Administrator/Desktop/raw_data/remap.pkl','rb')asf:reviews_df=pickle.load(f)cate_list=pickle.load(f) user_count,item_count,cate_count,example_count=pickle.load(f) ''' pos_list(每個點擊者點擊的商品 ID 組成的 list)例如: [8] [9,6,4,5] [3] [8] ''' train_set=[] test_set=[] for reviewerID,histinreviews_df.groupby('reviewerID'): pos_list=hist['asin'].tolist() defgen_neg(): #取每個用戶點擊列表的第一個商品 neg=pos_list[0] while neg in pos_list: #隨機初始化,即給點擊者隨機初始化一個商品,item_count-1 為商品數 neg=random.randint(0,item_count-1) return negneg_list=[gen_neg() for i in range(len(pos_list))] #如果用戶點擊的商品數大于 1,則循環 for i in range(1,len(pos_list)): hist=pos_list[:i] #print(hist) ''' #下面的 if 語句控制正負樣本的個數和格式),例如某用戶點擊過 abcd 四 個商品,則最終生成的樣本為:(其中 X 為隨機初始化的某商品 ID) ((user_id,a,(b,1)) (user_id,a,(X,0)) (user_id,(a,b),(c,1)) user_id,(a,b),(X0)) (user_id,(a,b,c),(d,1)) (user_id,(a,b,c),(X,0)) '''if i != len(pos_list) - 1: train_set.append((reviewerID,hist,pos_list[i],1)) print(train_set) train_set.append((reviewerID,hist,neg_list[i],0)) print(train_set) #驗證集格式(user_id,a,(b,X)) else: label=(pos_list[i],neg_list[i]) print(label) test_set.append((reviewerID,hist,label)) print(test_set)#最終的數據集里點擊商品數小于 1 的數據刪除掉了 random.shuffle(train_set) random.shuffle(test_set) assertlen(test_set)==user_count with open('dataset.pkl','wb') as f: pickle.dump(train_set,f,pickle.HIGHEST_PROTOCOL) pickle.dump(test_set,f,pickle.HIGHEST_PROTOCOL) pickle.dump(cate_list,f,pickle.HIGHEST_PROTOCOL) pickle.dump((user_count, item_count, cate_count), f, pickle.HIGHEST_PROTOCOL)三、模型部分(包括attention機制實現)
下面介紹本算法的核心model.py文件,每行關鍵的代碼都給出了注釋。
import tensorflow as tf from Dice import diceclass Model(object):def __init__(self,user_count,item_count,cate_count,cate_list):# shape: [B], user id。 (B:batch size)self.u = tf.placeholder(tf.int32, [None, ])# shape: [B] i: 正樣本的itemself.i = tf.placeholder(tf.int32, [None, ])# shape: [B] j: 負樣本的itemself.j = tf.placeholder(tf.int32, [None, ])# shape: [B], y: labelself.y = tf.placeholder(tf.float32, [None, ])# shape: [B, T] #用戶行為特征(User Behavior)中的item序列。T為序列長度self.hist_i = tf.placeholder(tf.int32, [None, None])# shape: [B]; sl:sequence length,User Behavior中序列的真實序列長度(?)self.sl = tf.placeholder(tf.int32, [None, ])#learning rateself.lr = tf.placeholder(tf.float64, [])hidden_units = 128# shape: [U, H], user_id的embedding weight. U是user_id的hash bucket sizeuser_emb_w = tf.get_variable("user_emb_w", [user_count, hidden_units])# shape: [I, H//2], item_id的embedding weight. I是item_id的hash bucket sizeitem_emb_w = tf.get_variable("item_emb_w", [item_count, hidden_units // 2]) # [I, H//2]# shape: [I], biasitem_b = tf.get_variable("item_b", [item_count],initializer=tf.constant_initializer(0.0))# shape: [C, H//2], cate_id的embedding weight.cate_emb_w = tf.get_variable("cate_emb_w", [cate_count, hidden_units // 2])# shape: [C, H//2]cate_list = tf.convert_to_tensor(cate_list, dtype=tf.int64)# 從cate_list中取出正樣本的cateic = tf.gather(cate_list, self.i)# 正樣本的embedding,正樣本包括item和catei_emb = tf.concat(values=[tf.nn.embedding_lookup(item_emb_w, self.i),tf.nn.embedding_lookup(cate_emb_w, ic),], axis=1)# 偏置bi_b = tf.gather(item_b, self.i)# 從cate_list中取出負樣本的catejc = tf.gather(cate_list, self.j)# 負樣本的embedding,負樣本包括item和catej_emb = tf.concat([tf.nn.embedding_lookup(item_emb_w, self.j),tf.nn.embedding_lookup(cate_emb_w, jc),], axis=1)# 偏置bj_b = tf.gather(item_b, self.j)# 用戶行為序列(User Behavior)中的cate序列hc = tf.gather(cate_list, self.hist_i)# 用戶行為序列(User Behavior)的embedding,包括item序列和cate序列h_emb = tf.concat([tf.nn.embedding_lookup(item_emb_w, self.hist_i),tf.nn.embedding_lookup(cate_emb_w, hc),], axis=2)# attention操作hist_i = attention(i_emb, h_emb, self.sl) # -- attention end ---hist = tf.layers.batch_normalization(inputs=hist)hist = tf.reshape(hist,[-1,hidden_units])#添加一層全連接層,hist為輸入,hidden_units為輸出維數hist = tf.layers.dense(hist,hidden_units)u_emb = hist#下面兩個全連接用來計算y',i為正樣本,j為負樣本# fcn begindin_i = tf.concat([u_emb, i_emb], axis=-1)din_i = tf.layers.batch_normalization(inputs=din_i, name='b1')d_layer_1_i = tf.layers.dense(din_i, 80, activation=None, name='f1')d_layer_1_i = dice(d_layer_1_i, name='dice_1_i')d_layer_2_i = tf.layers.dense(d_layer_1_i, 40, activation=None, name='f2')d_layer_2_i = dice(d_layer_2_i, name='dice_2_i')d_layer_3_i = tf.layers.dense(d_layer_2_i, 1, activation=None, name='f3')din_j = tf.concat([u_emb, j_emb], axis=-1)din_j = tf.layers.batch_normalization(inputs=din_j, name='b1', reuse=True)d_layer_1_j = tf.layers.dense(din_j, 80, activation=None, name='f1', reuse=True)d_layer_1_j = dice(d_layer_1_j, name='dice_1_j')d_layer_2_j = tf.layers.dense(d_layer_1_j, 40, activation=None, name='f2', reuse=True)d_layer_2_j = dice(d_layer_2_j, name='dice_2_j')d_layer_3_j = tf.layers.dense(d_layer_2_j, 1, activation=None, name='f3', reuse=True)d_layer_3_i = tf.reshape(d_layer_3_i, [-1])d_layer_3_j = tf.reshape(d_layer_3_j, [-1])#預測的(y正-y負)x = i_b - j_b + d_layer_3_i - d_layer_3_j # [B]#預測的(y正)self.logits = i_b + d_layer_3_i# logits for all item:u_emb_all = tf.expand_dims(u_emb, 1)u_emb_all = tf.tile(u_emb_all, [1, item_count, 1])#將所有的除u_emb_all外的embedding,concat到一起all_emb = tf.concat([item_emb_w,tf.nn.embedding_lookup(cate_emb_w, cate_list)], axis=1)all_emb = tf.expand_dims(all_emb, 0)all_emb = tf.tile(all_emb, [512, 1, 1])# 將所有的embedding,concat到一起din_all = tf.concat([u_emb_all, all_emb], axis=-1)din_all = tf.layers.batch_normalization(inputs=din_all, name='b1', reuse=True)d_layer_1_all = tf.layers.dense(din_all, 80, activation=None, name='f1', reuse=True)d_layer_1_all = dice(d_layer_1_all, name='dice_1_all')d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=None, name='f2', reuse=True)d_layer_2_all = dice(d_layer_2_all, name='dice_2_all')d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name='f3', reuse=True)d_layer_3_all = tf.reshape(d_layer_3_all, [-1, item_count])self.logits_all = tf.sigmoid(item_b + d_layer_3_all)# -- fcn end -------self.mf_auc = tf.reduce_mean(tf.to_float(x > 0))self.score_i = tf.sigmoid(i_b + d_layer_3_i)self.score_j = tf.sigmoid(j_b + d_layer_3_j)self.score_i = tf.reshape(self.score_i, [-1, 1])self.score_j = tf.reshape(self.score_j, [-1, 1])self.p_and_n = tf.concat([self.score_i, self.score_j], axis=-1)# Step variableself.global_step = tf.Variable(0, trainable=False, name='global_step')self.global_epoch_step = tf.Variable(0, trainable=False, name='global_epoch_step')self.global_epoch_step_op = tf.assign(self.global_epoch_step, self.global_epoch_step + 1)# loss and trainself.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,labels=self.y))trainable_params = tf.trainable_variables()self.train_op = tf.train.GradientDescentOptimizer(learning_rate=self.lr).minimize(self.loss)def train(self,sess,uij,l):loss,_ = sess.run([self.loss,self.train_op],feed_dict={#self.u : uij[0],self.i : uij[1],self.y : uij[2],self.hist_i : uij[3],self.sl : uij[4],self.lr : l})return lossdef eval(self, sess, uij):u_auc, socre_p_and_n = sess.run([self.mf_auc, self.p_and_n], feed_dict={#self.u: uij[0],self.i: uij[1],#正樣本self.j: uij[2],#負樣本self.hist_i: uij[3],self.sl: uij[4],})return u_auc, socre_p_and_ndef test(self, sess, uid, hist_i, sl):return sess.run(self.logits_all, feed_dict={self.u: uid,self.hist_i: hist_i,self.sl: sl,})def save(self, sess, path):saver = tf.train.Saver()saver.save(sess, save_path=path)def restore(self, sess, path):saver = tf.train.Saver()saver.restore(sess, save_path=path)def extract_axis_1(data, ind):batch_range = tf.range(tf.shape(data)[0])indices = tf.stack([batch_range, ind], axis=1)res = tf.gather_nd(data, indices)return res#item_embedding,history_behivior_embedding,sequence_length def attention(queries,keys,keys_length):'''queries: [B, H] [batch_size,embedding_size]keys: [B, T, H] [batch_size,T,embedding_size]keys_length: [B] [batch_size]#T為歷史行為序列長度'''#(?,32)->(None,32)->32# tile()函數是用來對張量(Tensor)進行擴展的,其特點是對當前張量內的數據進行一定規則的復制。最終的輸出張量維度不變# tf.shape(keys)[1]==T# 對queries的維度進行reshape# (?,T,32)這里是為了讓queries和keys的維度相同而做的操作# (?,T,128)把u和v以及u v的element wise差值向量合并起來作為輸入,# 然后喂給全連接層,最后得出兩個item embedding,比如u和v的權重,即g(Vi,Va)queries_hidden_units = queries.get_shape().as_list()[-1]queries = tf.tile(queries,[1,tf.shape(keys)[1]])queries = tf.reshape(queries,[-1,tf.shape(keys)[1],queries_hidden_units])din_all = tf.concat([queries,keys,queries-keys,queries * keys],axis=-1) # B*T*4H# 三層全鏈接(d_layer_3_all為訓練出來的atteneion權重)d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, name='f1_att')d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, name='f2_att')d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name='f3_att') #B*T*1#為了讓outputs維度和keys的維度一致outputs = tf.reshape(d_layer_3_all,[-1,1,tf.shape(keys)[1]]) #B*1*T# bool類型 tf.shape(keys)[1]為歷史行為序列的最大長度,keys_length為人為設定的參數,# 如tf.sequence_mask(5,3) 即為array[True,True,True,False,False]# 函數的作用是為了后面補齊行為序列,獲取等長的行為序列做鋪墊key_masks = tf.sequence_mask(keys_length,tf.shape(keys)[1])#在第二維增加一維,也就是由B*T變成B*1*Tkey_masks = tf.expand_dims(key_masks,1) # B*1*T#tf.ones_like新建一個與output類型大小一致的tensor,設置填充值為一個很小的值,而不是0,padding的mask后補一個很小的負數,這樣softmax之后就會接近0paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)#填充,獲取等長的行為序列# tf.where(condition, x, y),condition是bool型值,True/False,返回值是對應元素,condition中元素為True的元素替換為x中的元素,為False的元素替換為y中對應元素#由于是替換,返回值的維度,和condition,x , y都是相等的。outputs = tf.where(key_masks,outputs,paddings) # B * 1 * T# Scale(縮放)outputs = outputs / (keys.get_shape().as_list()[-1] ** 0.5)# Activationoutputs = tf.nn.softmax(outputs) # B * 1 * T# Weighted Sum outputs=g(Vi,Va) keys=Vi#這步為公式中的g(Vi*Va)*Vioutputs = tf.matmul(outputs,keys) # B * 1 * H 三維矩陣相乘,相乘發生在后兩維,即 B * (( 1 * T ) * ( T * H ))return outputs下一篇文章中將詳細講述在實際的業務中,如何應用DIN算法,敬請期待。
總結
以上是生活随笔為你收集整理的din算法 代码_DIN算法代码详细解读的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: pythonsocket自定义协议_小渣
- 下一篇: 想当年,我也是斩获20+大厂offer的