資源簡介
這是我自己寫的一個樸素貝葉斯的python代碼,應該是比較直觀易懂的
代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?Fri?Sep?02?14:36:44?2011
@author:?Liping
“““
from?__future__?import?division
import?numpy?as?np
class?naive_bayes:
????def?__init__(self):
????????“““?Constructor?“““
????????
????def?read_data(selffilename):
????????fid?=?open(filename“r“)
????????data?=?[]
????????d?=?[]
????????for?line?in?fid.readlines():
????????????d.append(line.strip())
????????for?d1?in?d:
????????????data.append(d1.split(““))
????????fid.close()
????????self.featureNames?=?data[0]
????????self.featureNames?=?self.featureNames[:-1]####################
????????data?=?data[1:]
????????self.classes?=?[]
????????for?d?in?range(len(data)):
????????????self.classes.append(data[d][-1])
????????????data[d]?=?data[d][:-1]
????????return?dataself.classesself.featureNames?
?
????#Calculate?the?Prob.?of?class:cls
????def?NB(selfTr_dataTr_classesTe_data):
????????cls_vals?=?[]
????????for?aclass?in?Tr_classes:
????????????if??cls_vals.count(aclass)==0:
?????????????????cls_vals.append(aclass)?#?Class?names
????????
????????#calculate?the?probability?for?each?class
????????cnt?=?[0]*len(cls_vals)
????????proba_cla?=?[0]*len(cls_vals)
????????
????????for?i?in?range(len(cls_vals)):
????????????for?aclass?in?Tr_classes:
????????????????if??cls_vals[i]?==?aclass:
?????????????????????cnt[i]?+=?1?#?The?number?of?data?for?each?class
????????????#sub_sum[i]?=?cnt[i]
????????????proba_cla[i]?=?float(cnt[i]/len(Tr_data))
?
????????#return?cls_valscntproba_cla
????????
????#Calculate?the?Prob(attr|cls)
????#def?PT(selfdataclasses):
????????num_uni_attr?=?[]
????????pro_uni_attr_cla?=?[]
????????vol_all_attr?=?[]
????????vol_uni_attr?=?[]
????????L?=?[]
????????
????????vol_all_attr?=?np.array(Tr_data).T
????????for?i?in?range(len(vol_all_attr)):
????????????a?=?np.unique(vol_all_attr[i])
????????????vol_uni_attr.append(a)?#?The?unique?values?of?each?feature
????????
????????‘‘‘define?the?pro_uni_attr_cla:?The?probabilites?of?diffirent?values?of?each?attribute?for?diffirent?classes??‘‘‘??
????????‘‘‘this?is?very?important!!!!!!!?How?to?dynamic?create?a?multi-dimensional?array!‘‘‘
????????x?=?len(cls_vals)
????????y?=?len(vol_uni_attr)
????????pro_uni_attr_cla?=?[[None]*y?for?_?in
評論
共有 條評論