資源簡介
自己寫的比較初級的基于標簽的用戶協同算法,數據集用的是ml-100k,對數據集進行訓練集和測試集的劃分,里面還有準確率,召回率,標準平均絕對誤差等的計算代碼
代碼片段和文件信息
#?coding=utf-8
#?-*-?coding=utf-8?-*-
#見185
import?math
import?sys
import?imp
import?datetime
from?texttable?import?Texttable
import?random
import?pprint?pickle
#
#???使用?|A&B|/sqrt(|A?||?B?|)計算余弦距離
#
#
#
def?calcCosDistSpe(user1?user2):
????avg_x?=?0.0
????avg_y?=?0.0
????for?key?in?user1:
????????avg_x?+=?key[1]
????avg_x?=?avg_x?/?len(user1)
????for?key?in?user2:
????????avg_y?+=?key[1]
????avg_y?=?avg_y?/?len(user2)
????u1_u2?=?0.0
????for?key1?in?user1:
????????for?key2?in?user2:
????????????if?key1[1]?>?avg_x?and?key2[1]?>?avg_y?and?key1[0]?==?key2[0]:
????????????????u1_u2?+=?1
????u1u2?=?len(user1)?*?len(user2)?*?1.0
????sx_sy?=?u1_u2?/?math.sqrt(u1u2)
????return?sx_sy
#
#???計算余弦距離
#
#
def?calcCosDist(user1?user2):
????sum_x?=?0.0
????sum_y?=?0.0
????sum_xy?=?0.0
????for?key1?in?user1:
????????for?key2?in?user2:
????????????if?key1[0]?==?key2[0]:
????????????????sum_xy?+=?key1[1]?*?key2[1]
????????????????sum_y?+=?key2[1]?*?key2[1]
????????????????sum_x?+=?key1[1]?*?key1[1]
????if?sum_xy?==?0.0:
????????return?0
????sx_sy?=?math.sqrt(sum_x?*?sum_y)
????return?sum_xy?/?sx_sy
#
#
#???相似余弦距離
#
#
#
def?calcSimlaryCosDist(user1?user2):
????#print(user1)
????#print(user2)
????sum_x?=?0.0
????sum_y?=?0.0
????sum_xy?=?0.0
????avg_x?=?0.0
????avg_y?=?0.0
????for?key?in?user1:#user1的評分
????????avg_x?+=?key[1]
????avg_x?=?avg_x?/?len(user1)
????for?key?in?user2:
????????avg_y?+=?key[1]
????avg_y?=?avg_y?/?len(user2)
????for?key1?in?user1:
????????for?key2?in?user2:
????????????if?key1[0]?==?key2[0]:
????????????????sum_xy?+=?(key1[1]?-?avg_x)?*?(key2[1]?-?avg_y)
????????????????sum_y?+=?(key2[1]?-?avg_y)?*?(key2[1]?-?avg_y)
????????sum_x?+=?(key1[1]?-?avg_x)?*?(key1[1]?-?avg_x)
????if?sum_xy?==?0.0:
????????return?0
????sx_sy?=?math.sqrt(sum_x?*?sum_y)
????return?sum_xy?/?sx_sy
def?calcSimlaryCosDist1(user1?user2):
????#print(user1)
????#print(user2)
????sum_x?=?0.0
????sum_y?=?0.0
????sum_xy?=?0.0
????avg_x?=?0.0
????avg_y?=?0.0
????for?key?in?user1:#user1的評分
????????#print(key)
????????avg_x?+=?key[1]
????avg_x?=?avg_x?/?len(user1)
????for?key?in?user2:
????????avg_y?+=?key[1]
????avg_y?=?avg_y?/?len(user2)
????for?key1?in?user1:
????????for?key2?in?user2:
????????????if?key1[0]?==?key2[0]:
????????????????sum_xy?+=?(key1[1]?-?avg_x)?*?(key2[1]?-?avg_y)
????????????????sum_y?+=?(key2[1]?-?avg_y)?*?(key2[1]?-?avg_y)
????????sum_x?+=?(key1[1]?-?avg_x)?*?(key1[1]?-?avg_x)
????#print(sum_xy)
????return?sum_xy
#該函數根據余弦相似度計算的
def?calcSimlaryCosDist2(users1user2):
????#[(174?5)?(259?4)?(308?5)?(603?4)]
?????n=0#user1和user2共同的項目
?????#print(users1)
?????for?item1?in?users1:
?????????for?item2?in?user2:
?????????????if?item1[0]==item2[0]:
?????????????????n+=1
?????return?n/math.sqrt(len(user2)*len(users1))n
#基于余弦的相似度計算
def?calcSimlaryCosDist3(users1user2):
????#[(174?5)?(259?4)?(308?5)?(603?4
- 上一篇:12306搶票Python代碼,內含視頻教程
- 下一篇:自動掃雷系統+Python
評論
共有 條評論