資源簡介
基于用戶最近鄰模型的協同過濾算法的Python代碼實現,用戶相似度用Pearson相關系數進行計算。
代碼片段和文件信息
#-------------------------------------------------------------------------------
#?Name:????????PearsonUserNeighCF
#?Purpose:?????Personalized?Recommendation
#
#?Author:??????Jinkun?Wang
#?Email:???????wangjinkun90@foxmail.com?if?you?have?any?question?about?the
#??????????????code?please?do?not?hesitate?to?contact?me.
#
#?Created:?????10/09/2014
#?Copyright:???(c)?Jinkun?Wang?2014
#-------------------------------------------------------------------------------
from?math?import?sqrt
import?numpy?as?np
import?matplotlib.pyplot?as?plt
def?loadData():
????trainSet?=?{}
????testSet?=?{}
????movieUser?=?{}
????u2u?=?{}
????TrainFile?=?‘ml-100k/u1.base‘???#指定訓練集
????TestFile?=?‘ml-100k/u1.test‘????#指定測試集
????#加載訓練集,生成電影用戶的倒排序表?movieUser
????for?line?in?open(TrainFile):
????????(userId?itemId?rating?_)?=?line.strip().split(‘\t‘)
????????trainSet.setdefault(userId{})
????????trainSet[userId].setdefault(itemIdfloat(rating))
????????movieUser.setdefault(itemId[])
????????movieUser[itemId].append(userId.strip())
????#防止測試集有訓練集中沒有出現過的項目
????item_in_train?=?[]
????for?m?in?movieUser.keys():
????????item_in_train.append(m)
????#加載測試集
????for?line?in?open(TestFile):
????????(userId?itemId?rating?_)?=?line.strip().split(‘\t‘)
????????testSet.setdefault(userId{})
????????testSet[userId].setdefault(itemIdfloat(rating))
????return?trainSettestSetmovieUseritem_in_train
#計算一個用戶的平均評分
def?getAverageRating(user):
????average?=?(sum(trainSet[user].values())?*?1.0)?/?len(trainSet[user].keys())
????return?average
#計算用戶相似度
def?UserSimPearson(trainSet):
????userSim?=?{}
????for?u1?in?trainSet.keys():
????????userSim.setdefault(u1{})
????????u1_rated?=?trainSet[u1].keys()
????????for?u2?in?trainSet.keys():
????????????userSim[u1].setdefault(u20)
????????????if?u1?!=?u2:
????????????????u2_rated?=?trainSet[u2].keys()
????????????????co_rated?=?list(set(u1_rated).intersection(set(u2_rated)))
????????????????if?co_rated?==?[]:
????????????????????userSim[u1][u2]?=?0
????????????????else:
????????????????????num?=?0?????#皮爾遜計算公式的分子部分
????????????????????den1?=?0????#皮爾遜計算公式的分母部分1
????????????????????den2?=?0????#皮爾遜計算公式的分母部分2
????????????????????sigma_u1_m?=?0??#計算用戶u1對共同評價項目的評分均值
????????????????????sigma_u2_m?=?0??#計算用戶u2對共同評價項目的評分均值
????????????????????for?m?in?co_rated:
????????????????????????sigma_u1_m?+=?trainSet[u1][m]
????????????????????????sigma_u2_m?+=?trainSet[u2][m]
????????????????????ave_u1_m?=?sigma_u1_m?/?len(co_rated)
????????????????????ave_u2_m?=?sigma_u2_m?/?len(co_rated)
????????????????????for?m?in?co_rated:
????????????????????????num?+=?(trainSet[u1][m]?-?ave_u1_m)?*?(trainSet[u2][m]?-?ave_u2_m)?*?1.0
????????????????????????den1?+=?pow(trainSet[u1][m]?-?ave_u1_m?2)?*?1.0
????????????????????????den2?+=?pow(trainSet[u2][m]?-?ave_u2_m?2)?*?1.0
????????????????????den1?=?sqrt(den1)
????????????????????den2?=?sqrt(den2)
????????????????????if?den1?==
評論
共有 條評論