資源簡介
Python源碼,基于物品的協同過濾算法源碼及數據集,可運行

代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?Mon?Jun??4?09:41:15?2018
@author:?Administrator
“““
#?-*-coding=utf-8?-*-??
????
import?math??
from?texttable?import?Texttable??
import?imp
??
#計算余弦距離??
def?getCosDist(user1?user2):??
????sum_x?=?0.0??
????sum_y?=?0.0??
????sum_xy?=?0.0??
????for?key1?in?user1:??
????????for?key2?in?user2:??
????????????if?key1[0]?==?key2[0]:??
????????????????sum_x?+=?key1[1]?*?key1[1]??
????????????????sum_y?+=?key2[1]?*?key2[1]??
????????????????sum_xy?+=?key1[1]?*?key2[1]??
????if?sum_xy?==?0.0:??
????????return?0??
????demo?=?math.sqrt(sum_x?*?sum_y)??
????return?sum_xy?/?demo??
??
#讀取文件,讀取以行為單位,每一行是列表里的一個元素??
def?readFile(filename):??
????contents?=?[]??
????f?=?open(filename?“r“encoding?=?“ISO-8859-1“)??
????contents?=?f.readlines()??
????f.close()??
????return?contents??
??
#數據格式化為二維數組??
def?getRatingInfo(ratings):??
????rates?=?[]??
????for?line?in?ratings:??
????????rate?=?line.split(“\t“)??
????????rates.append([int(rate[0])?int(rate[1])?int(rate[2])])??
????return?rates??
??
#生成用戶評分數據結構??
def?getUserScoreDataStructure(rates):??
??????
????#userDict[2]=[(15)(42)]....?表示用戶2對電影1的評分是5,對電影4的評分是2??
????userDict?=?{}??
????itemUser?=?{}??
????for?k?in?rates:??
????????user_rank?=?(k[1]?k[2])??
????????if?k[0]?in?userDict:??
????????????userDict[k[0]].append(user_rank)??
????????else:??
????????????userDict[k[0]]?=?[user_rank]??
??
????????if?k[1]?in?itemUser:??
????????????itemUser[k[1]].append(k[0])??
????????else:??
????????????itemUser[k[1]]?=?[k[0]]??
????return?userDict?itemUser??
??
#計算與指定用戶最相近的鄰居??
def?getNearestNeighbor(userId?userDict?itemUser):??
????neighbors?=?[]??
????for?item?in?userDict[userId]:??
????????for?neighbor?in?itemUser[item[0]]:??
????????????if?neighbor?!=?userId?and?neighbor?not?in?neighbors:??
????????????????neighbors.append(neighbor)??
????neighbors_dist?=?[]??
????for?neighbor?in?neighbors:??
????????dist?=?getCosDist(userDict[userId]?userDict[neighbor])??
????????neighbors_dist.append([dist?neighbor])??
????neighbors_dist.sort(reverse?=?True)??
????return?neighbors_dist??
??
#使用UserFC進行推薦,輸入:文件名用戶ID鄰居數量??
def?recommendByUserFC(filename?userId?k?=?5):??
??????
????#讀取文件??
????contents?=?readFile(filename)??
??
????#文件格式數據轉化為二維數組??
????rates?=?getRatingInfo(contents)??
??
????#格式化成字典數據??
????userDict?itemUser?=?getUserScoreDataStructure(rates)??
??
????#找鄰居??
????neighbors?=?getNearestNeighbor(userId?userDict?itemUser)[:5]??
??
????#建立推薦字典??
????recommand_dict?=?{}??
????for?neighbor?in?neighbors:??
????????neighbor_user_id?=?neighbor[1]??
????????movies?=?userDict[neighbor_user_id]??
????????for?movie?in?movies:??
????????????if?movie[0]?not?in?recommand_dict:??
????????????????recommand_dict[movie[0]]?=?neighbor[0]??
????????????else:??
????????????????recommand_dict[movie[0]]?+=?neighbor[0]??
??
????#建立推薦列表??
????recommand_list?=?[]??
????for?key?in?recomman
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2018-06-06?14:43??ubcf\
?????文件?????????716??2000-07-20?05:09??ubcf\allbut.pl
?????文件?????????643??2000-07-20?05:09??ubcf\mku.sh
?????文件????????6750??2016-01-30?04:26??ubcf\README
?????文件?????1979173??2000-07-20?05:09??ubcf\u.data
?????文件?????????202??2000-07-20?05:09??ubcf\u.genre
?????文件??????????36??2000-07-20?05:09??ubcf\u.info
?????文件??????236344??2000-07-20?05:09??ubcf\u.item
?????文件?????????193??2000-07-20?05:09??ubcf\u.occupation
?????文件???????22628??2000-07-20?05:09??ubcf\u.user
?????文件?????1586544??2001-03-09?02:33??ubcf\u1.ba
?????文件??????392629??2001-03-09?02:32??ubcf\u1.test
?????文件?????1583948??2001-03-09?02:33??ubcf\u2.ba
?????文件??????395225??2001-03-09?02:33??ubcf\u2.test
?????文件?????1582546??2001-03-09?02:33??ubcf\u3.ba
?????文件??????396627??2001-03-09?02:33??ubcf\u3.test
?????文件?????1581878??2001-03-09?02:33??ubcf\u4.ba
?????文件??????397295??2001-03-09?02:33??ubcf\u4.test
?????文件?????1581776??2001-03-09?02:34??ubcf\u5.ba
?????文件??????397397??2001-03-09?02:33??ubcf\u5.test
?????文件?????1792501??2001-03-09?02:34??ubcf\ua.ba
?????文件??????186672??2001-03-09?02:34??ubcf\ua.test
?????文件?????1792476??2001-03-09?02:34??ubcf\ub.ba
?????文件??????186697??2001-03-09?02:34??ubcf\ub.test
?????目錄???????????0??2018-06-06?14:43??ubcf\ubcf\
?????文件????????4669??2018-06-06?13:59??ubcf\ubcf.py
?????文件?????????716??2000-07-20?05:09??ubcf\ubcf\allbut.pl
?????文件?????????643??2000-07-20?05:09??ubcf\ubcf\mku.sh
?????文件????????6750??2016-01-30?04:26??ubcf\ubcf\README
?????文件?????1979173??2000-07-20?05:09??ubcf\ubcf\u.data
?????文件?????????202??2000-07-20?05:09??ubcf\ubcf\u.genre
............此處省略19個文件信息
評論
共有 條評論