資源簡介
今天基于Movielens數據集把《推薦系統實踐》上的部分算法實現了一下,順便鞏固python和pandas庫的使用,發現書本上的代碼有很多不靠譜之處(也許是我水平不夠),所以基本都是自己寫的,不當之處,還望指正。
代碼片段和文件信息
#-*-?coding:?utf-8?-*-
‘‘‘
Created?on?2015-06-22
@author:?Lockvictor
‘‘‘
import?sys
import?random
import?math
import?os
from?operator?import?itemgetter
random.seed(0)
class?ItembasedCF(object):
????‘‘‘?TopN?recommendation?-?Item?based?Collaborative?Filtering?‘‘‘
????def?__init__(self):
????????self.trainset?=?{}
????????self.testset?=?{}
????????self.n_sim_movie?=?20
????????self.n_rec_movie?=?10
????????self.movie_sim_mat?=?{}
????????self.movie_popular?=?{}
????????self.movie_count?=?0
????????print(‘Similar?movie?number?=?%d‘?%?self.n_sim_movie?file=sys.stderr)
????????print(‘Recommended?movie?number?=?%d‘?%
??????????????self.n_rec_movie?file=sys.stderr)
????@staticmethod
????def?loadfile(filename):
????????‘‘‘?load?a?file?return?a?generator.?‘‘‘
????????fp?=?open(filename?‘r‘)
????????for?i?line?in?enumerate(fp):
????????????yield?line.strip(‘\r\n‘)
????????????if?i?%?100000?==?0:
????????????????print?(‘loading?%s(%s)‘?%?(filename?i)?file=sys.stderr)
????????fp.close()
????????print?(‘load?%s?succ‘?%?filename?file=sys.stderr)
????def?generate_dataset(self?filename?pivot=0.7):
????????‘‘‘?load?rating?data?and?split?it?to?training?set?and?test?set?‘‘‘
????????trainset_len?=?0
????????testset_len?=?0
????????for?line?in?self.loadfile(filename):
????????????user?movie?rating?_?=?line.split(‘::‘)
????????????#?split?the?data?by?pivot
????????????if?random.random()?????????????????self.trainset.setdefault(user?{})
????????????????self.trainset[user][movie]?=?int(rating)
????????????????trainset_len?+=?1
????????????else:
????????????????self.testset.setdefault(user?{})
????????????????self.testset[user][movie]?=?int(rating)
????????????????testset_len?+=?1
????????print?(‘split?training?set?and?test?set?succ‘?file=sys.stderr)
????????print?(‘train?set?=?%s‘?%?trainset_len?file=sys.stderr)
????????print?(‘test?set?=?%s‘?%?testset_len?file=sys.stderr)
????def?calc_movie_sim(self):
????????‘‘‘?calculate?movie?similarity?matrix?‘‘‘
????????print(‘counting?movies?number?and?popularity...‘?file=sys.stderr)
????????for?user?movies?in?self.trainset.items():
????????????for?movie?in?movies:
????????????????#?count?item?popularity
????????????????if?movie?not?in?self.movie_popular:
????????????????????self.movie_popular[movie]?=?0
????????????????self.movie_popular[movie]?+=?1
????????print(‘count?movies?number?and?popularity?succ‘?file=sys.stderr)
????????#?save?the?total?number?of?movies
????????self.movie_count?=?len(self.movie_popular)
????????print(‘total?movie?number?=?%d‘?%?self.movie_count?file=sys.stderr)
????????#?count?co-rated?users?between?items
????????itemsim_mat?=?self.movie_sim_mat
????????print(‘building?co-rated?users?matrix...‘?file=sys.stderr)
????????for?user?movies?in?self.trainset.items():
????????????for?m1?in?movies:
????????????????for?m2?in?movies:
????????????????????if?m1?==?m2:
?????????
- 上一篇:操作系統課程設計-進程狀態模擬轉換
- 下一篇:Abaqus 二維voronoi圖插件
評論
共有 條評論