資源簡介
decision_tree_v2.py
代碼片段和文件信息
“““
基于信息熵劃分屬性的決策樹算法
“““
import?numpy?as?np
import?pandas?as?pd
import?matplotlib.pyplot?as?plt
import?math
import?copy
class?Node:
????def?__init__(selfntype):
????????self.ntype?=?ntype
????????self.children?=?[]
????????self.a?=?-1
????????self.limit?=?“None“
????def?__str__(self):
????????return?“node->ntype?:?%s?node->children?:?%s?node->a?:?%s?node->limit?:?%s“%(self.ntypeself.childrenself.aself.limit)
????????
def?judge(xa):
????for?ar?in?a:
????????if((x.loc[:ar][0]==x.loc[:ar]).all()?==?False):
????????????return?0
????return?1
def?maxy(y):
????if(sum(y)*2?>=?len(y)):
????????return?1
????else:
????????return?0
????
def?calEnt(p1):??#p1?=?sum(y)/len(y)
????p0?=?1?-?p1
????if(p1?==?0?or?p0?==?0):
????????return?0
????return?-1*(math.log(p12)*p1+math.log(p02)*p0)
def?getdict(xav):
????d?=?dict.fromkeys(set(x[av])0)
????arr?=?np.array(x[av])
????for?i?in?range(len(arr)):
????????d[arr[i]]?+=?1
????return?d
def?getp1(xyav):
????p1?=?dict.fromkeys(set(x[av])0)
????arr?=?np.array(x[av])
????for?i?in?range(len(arr)):
????????if(y[i]?==?1):
????????????p1[arr[i]]?+=?1
????return?p1
def?getTb(xav):
????Ta?=?[]
????Tb?=?[]
????for?i?in?range(x.shape[0]):
????????Ta.append(x[av][i])
????list.sort(Ta)
????
????for?i?in?range(len(Ta)-1):
????????Tb.append((Ta[i]?+?Ta[i+1])/2)
????return?Tb
def?divideByT(xyavtb):
????t?=?f?=?0
????pt1?=?pf1?=?0
????for?i?in?range(x.shape[0]):
????????if(x[av][i]?>=?tb):
????????????t?+=?1
????????????if(y[i]?==?1):
????????????????pt1?+=?1
????????else:
????????????f?+=?1
????????????if(y[i]?==?1):
????????????????pf1?+=1
????return?tfpt1pf1
def?calGain(xyav):
????if(x[av].dtype?!=?“float64“):
????????d?=?getdict(xav)
????????p1?=?getp1(xyav)
????
????????ans?=?calEnt(sum(y)/len(y))
????????for?keyvalue?in?d.items():
????????????ans?-=?(value/len(y))*calEnt(p1[key]/value)
????????return?ansNone
????else:
????????Tb?=?getTb(xav)
????????Max?=?-1
????????Maxt?=?Tb[0]
????????for?tb?in?Tb:
????????????ans?=?calEnt(sum(y)/len(y))
????????????tfpt1pf1?=?divideByT(xyavtb)
????????????ans?-=?((t/len(y))*calEnt(pt1/t)?+?(f/len(y))*calEnt(pf1/f))
????????????if?ans?>?Max:
????????????????Max?=?ans
????????????????Maxt?=?tb
????????return?MaxMaxt????
????????
def?getMaxAv(xya):
????Max?=?-1
????MaxAv?=?a[0]
????Maxt?=?None
????for?av?in?a:
????????tempt?=?calGain(xyav)
????????if(temp?>?Max):
????????????Max?=?temp
????????????MaxAv?=?av
????????????Maxt?=?t
????return?MaxAvMaxt
def?getdata(xyavai):
????xt?=?copy.deepcopy(x)
????yt?=?[]?
????for?i?in?range(x.shape[0]):
????????if(x[av][i]?!=?ai):
????????
評論
共有 條評論