-
大小: 746KB文件類型: .rar金幣: 2下載: 1 次發(fā)布日期: 2021-05-22
- 語言: Python
- 標(biāo)簽: 矩陣分解??movielens??推薦系統(tǒng)??
資源簡介
推薦系統(tǒng)中矩陣分解被最廣泛的應(yīng)用,本項(xiàng)目采用python并在數(shù)據(jù)集Movielens 100K上進(jìn)行實(shí)現(xiàn)。

代碼片段和文件信息
#!usr/bin/python
#?-*-?coding:UTF-8?-*-
#Created?on:?2018/3/12
#author:?Xiuze?Zhou
#e-mail:?zhouxiuze@foxmail.com
?
#-------------------------FUNCTION---------------------------#
from?pylab?import?*
import?numpy?as?np
import?random
import?math
def?SGD(traintestNMgammaDlambda_1Step):
????#?train:?train?data
????#?test:?test?data
????#?N:the?number?of?user
????#?M:the?number?of?item
????#?gamma:?the?learning?rata
????#?D:?the?number?of?latent?factor
????#?lambda_1:?regularization?parameter
????#?Step:?the?max?iteration
????p?=?np.random.random((N?D))
????q?=?np.random.random((M?D))
????rmse=[]
????loss=[]
????for?ste?in?range(Step):
????????los=0.0
????????for?data?in?train:
????????????u=data[0]
????????????i=data[1]
????????????r=data[2]
????????????e=r-np.dot(p[u]q[i].T)????????????
????????????p[u]=p[u]+gamma*(e*q[i]-lambda_1*p[u])
????????????q[i]=q[i]+gamma*(e*p[u]-lambda_1*q[i])
????????????los=los+e**2+lambda_1*(np.square(p[u]).sum()+np.square(q[i]).sum())
????????loss.append(los)
????????rms=RMSE(pqtest)
????????rmse.append(rms)
????????if?ste%10==0:
????????????print?ste/10
????return?lossrmsepq
???????????
def?RMSE(pqtest):
????count=len(test)
????sum_rmse=0.0
????for?t?in?test:
????????u=t[0]
????????i=t[1]
????????r=t[2]
????????pr=np.dot(p[u]q[i].T)
????????sum_rmse+=np.square(r-pr)
????rmse=np.sqrt(sum_rmse/count)
????return?rmse
def?Load_data(filedirratio):
????user_set={}
????item_set={}
????N=0;#the?number?of?user
????M=0;#the?number?of?item
????u_idx=0
????i_idx=0
????data=[]
????f?=?open(filedir)
????for?line?in?f.readlines():
????????uirt=line.split()
????????if?int(u)?not?in?user_set:
????????????user_set[int(u)]=u_idx
????????????u_idx+=1
????????if?int(i)?not?in?item_set:
????????????item_set[int(i)]=i_idx
????????????i_idx+=1
????????data.append([user_set[int(u)]item_set[int(i)]int(r)])
????f.close()
????N=u_idx;
????M=i_idx;
????np.random.shuffle(data)
????train=data[0:int(len(data)*ratio)]
????test=data[int(len(data)*ratio):]
????return?NMtraintest
def?Figure(lossrmse):
????fig1=plt.figure(‘LOSS‘)
????x?=?range(len(loss))
????plot(x?loss?color=‘g‘linewidth=3)
????plt.title(‘Convergence?curve‘)
????plt.xlabel(‘Iterations‘)
????plt.ylabel(‘Loss‘)
????fig2=plt.figure(‘RMSE‘)
????x?=?range(len(rmse))
????plot(x?rmse?color=‘r‘linewidth=3)
????plt.title(‘Convergence?curve‘)
????plt.xlabel(‘Iterations‘)
????plt.ylabel(‘RMSE‘)
????show()
#----------------------------SELF?TEST----------------------------#
?
def?main():
????dir_data=“./u.data“
????ratio=0.8
????NMtraintest=Load_data(dir_dataratio)
????????
????gamma=0.005
????D=10
????lambda_1=0.1
????Step=50
????lossrmsepq=SGD(traintestNMgammaDlambda_1Step)
????print?rmse[-1];
????Figure(lossrmse)
????
?????????
if?__name__?==?‘__main__‘:?
????main()
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????文件???????1189??2018-03-13?23:54??SGD_MF\README.md
?????文件???????2946??2018-03-13?00:35??SGD_MF\SGD_MF\SGD_MF.py
?????文件????1979173??2018-03-12?05:39??SGD_MF\SGD_MF\u.data
?????文件?????????75??2018-03-13?23:51??SGD_MF\SGD_MF\講解和推導(dǎo).txt
?????目錄??????????0??2018-03-13?00:42??SGD_MF\SGD_MF
?????目錄??????????0??2018-03-13?23:50??SGD_MF
-----------?---------??----------?-----??----
??????????????1983383????????????????????6
評(píng)論
共有 條評(píng)論