91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 6KB
    文件類型: .py
    金幣: 1
    下載: 0 次
    發布日期: 2021-06-02
  • 語言: Python
  • 標簽: 協同過濾??

資源簡介

今天基于Movielens數據集把《推薦系統實踐》上的部分算法實現了一下,順便鞏固python和pandas庫的使用,發現書本上的代碼有很多不靠譜之處(也許是我水平不夠),所以基本都是自己寫的,不當之處,還望指正。

資源截圖

代碼片段和文件信息

#-*-?coding:?utf-8?-*-
‘‘‘
Created?on?2015-06-22
@author:?Lockvictor
‘‘‘
import?sys
import?random
import?math
import?os
from?operator?import?itemgetter


random.seed(0)


class?ItembasedCF(object):
????‘‘‘?TopN?recommendation?-?Item?based?Collaborative?Filtering?‘‘‘

????def?__init__(self):
????????self.trainset?=?{}
????????self.testset?=?{}

????????self.n_sim_movie?=?20
????????self.n_rec_movie?=?10

????????self.movie_sim_mat?=?{}
????????self.movie_popular?=?{}
????????self.movie_count?=?0

????????print(‘Similar?movie?number?=?%d‘?%?self.n_sim_movie?file=sys.stderr)
????????print(‘Recommended?movie?number?=?%d‘?%
??????????????self.n_rec_movie?file=sys.stderr)

????@staticmethod
????def?loadfile(filename):
????????‘‘‘?load?a?file?return?a?generator.?‘‘‘
????????fp?=?open(filename?‘r‘)
????????for?i?line?in?enumerate(fp):
????????????yield?line.strip(‘\r\n‘)
????????????if?i?%?100000?==?0:
????????????????print?(‘loading?%s(%s)‘?%?(filename?i)?file=sys.stderr)
????????fp.close()
????????print?(‘load?%s?succ‘?%?filename?file=sys.stderr)

????def?generate_dataset(self?filename?pivot=0.7):
????????‘‘‘?load?rating?data?and?split?it?to?training?set?and?test?set?‘‘‘
????????trainset_len?=?0
????????testset_len?=?0

????????for?line?in?self.loadfile(filename):
????????????user?movie?rating?_?=?line.split(‘::‘)
????????????#?split?the?data?by?pivot
????????????if?random.random()?????????????????self.trainset.setdefault(user?{})
????????????????self.trainset[user][movie]?=?int(rating)
????????????????trainset_len?+=?1
????????????else:
????????????????self.testset.setdefault(user?{})
????????????????self.testset[user][movie]?=?int(rating)
????????????????testset_len?+=?1

????????print?(‘split?training?set?and?test?set?succ‘?file=sys.stderr)
????????print?(‘train?set?=?%s‘?%?trainset_len?file=sys.stderr)
????????print?(‘test?set?=?%s‘?%?testset_len?file=sys.stderr)

????def?calc_movie_sim(self):
????????‘‘‘?calculate?movie?similarity?matrix?‘‘‘
????????print(‘counting?movies?number?and?popularity...‘?file=sys.stderr)

????????for?user?movies?in?self.trainset.items():
????????????for?movie?in?movies:
????????????????#?count?item?popularity
????????????????if?movie?not?in?self.movie_popular:
????????????????????self.movie_popular[movie]?=?0
????????????????self.movie_popular[movie]?+=?1

????????print(‘count?movies?number?and?popularity?succ‘?file=sys.stderr)

????????#?save?the?total?number?of?movies
????????self.movie_count?=?len(self.movie_popular)
????????print(‘total?movie?number?=?%d‘?%?self.movie_count?file=sys.stderr)

????????#?count?co-rated?users?between?items
????????itemsim_mat?=?self.movie_sim_mat
????????print(‘building?co-rated?users?matrix...‘?file=sys.stderr)

????????for?user?movies?in?self.trainset.items():
????????????for?m1?in?movies:
????????????????for?m2?in?movies:
????????????????????if?m1?==?m2:
?????????

評論

共有 條評論