資源簡介

代碼片段和文件信息
import?random
import?math
class?UserbasedCF:
????def?__init__(selfdatafile?=?None):
????????self.datafile?=?datafile
????????self.readData()
????????self.splitData(347)
????def?readData(selfdatafile?=?None):
????????“““
????????read?the?data?from?the?data?file?which?is?a?data?set
????????“““
????????self.datafile?=?datafile?or?self.datafile
????????self.data?=?[]
????????for?line?in?open(self.datafile):
????????????useriditemidrecordmtime?=?line.split(“\t“)
????????????self.data.append((useriditemidint(record)))
????def?splitData(selfkseeddata?=?NoneM?=?8):
????????“““
????????split?the?data?set
????????testdata?is?a?test?data?set
????????traindata?is?a?train?set
????????“““
????????self.testdata?=?{}
????????self.traindata?=?{}
????????data?=?data?or?self.data
????????random.seed(seed)
????????for?useritemrecord?in?self.data:
????????????if?random.randint(0M)?==?k:
????????????????self.testdata.setdefault(user{})
????????????????self.testdata[user][item]?=?record
????????????else:
????????????????self.traindata.setdefault(user{})
????????????????self.traindata[user][item]?=?record
????def?userSimilarity(selftrain?=?None):
????????“““
????????one?method?of?getting?user?similarity?matrix
????????“““
????????train?=?train?or?self.traindata
????????self.userSim?=?dict()
????????for?u?in?train.keys():
????????????for?v?in?train.keys():
????????????????if?u?==?v:
????????????????????continue
????????????????self.userSim.setdefault(u{})
????????????????self.userSim[u][v]?=?len(set(train[u].keys())?&?set(train[v].keys()))
????????????????self.userSim[u][v]?/=?math.sqrt(len(train[u])?*?len(train[v])?*?1.0)
????def?userSimilarityBest(selftrain?=?None):
????????“““
????????the?other?method?of?getting?user?similarity?which?is?better?than?above
????????you?can?get?the?method?on?page?46
????????In?this?experiment?we?use?this?method
????????“““
????????train?=?train?or?self.traindata
????????self.userSimBest?=?dict()
????????item_users?=?dict()
????????#build?inverse?table?for?item_users
????????for?uitem?in?train.items():
????????????for?i?in?item.keys():
????????????????item_users.setdefault(iset())
????????????????item_users[i].add(u)
????????#calculate?co-rated?items?between?users
????????user_item_count?=?dict()
????????count?=?dict()
????????for?itemusers?in?item_users.items():
????????????for?u?in?users:
????????????????user_item_count.setdefault(u0)
????????????????user_item_count[u]?+=?1
????????????????for?v?in?users:
????????????????????if?u?==?v:
????????????????????????continue
????????????????????count.setdefault(u{})
????????????????????count[u].setdefault(v0)
????????????????????count[u][v]?+=?1
????????#calculate?finial?similarity?matrix?W
????????for?urelated_users?in?count.items():
????????????self.userSimBest.setdefault(udict())
????????????for?vcuv?in?related_users.items():
????????????????self.userSimBest[u][v]?=?cuv?/?math.sqrt(us
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????6527??2015-03-06?14:59??Userba
?????文件????????266??2015-03-06?15:50??Userba
?????文件????1979173??2000-07-19?16:09??Userba
?????目錄??????????0??2015-03-06?15:47??Userba
?????目錄??????????0??2015-03-06?15:47??Userba
-----------?---------??----------?-----??----
??????????????1985966????????????????????5
- 上一篇:決策樹回歸算法
- 下一篇:crf--python編碼
評論
共有 條評論