資源簡介
資源中包含完整的C4.5決策樹算法Python代碼和測試數據,其中有4個文件:C45.py是算法的實現代碼,treePlotter.py是繪制決策樹代碼,PlayData.txt是樣本數據,C45test.py用來構建、繪制并測試決策樹,您可以運行該文件來依次進行決策樹的構建、剪枝、繪制樹型圖,并對測試樣本進行分類。

代碼片段和文件信息
#?-*-?coding:?cp936?-*-
from?math?import?log
import?operator
import?os
import?re
from?numpy?import?inf
import?copy
#?計算信息熵
def?calcShannonEnt(dataSet?labelIndex):
????#?type:?(list)?->?float
????numEntries?=?0??#?樣本數(按權重計算)
????labelCounts?=?{}
????for?featVec?in?dataSet:??#?遍歷每個樣本
????????if?featVec[labelIndex]?!=?‘N‘:
????????????weight?=?float(featVec[-2])
????????????numEntries?+=?weight
????????????currentLabel?=?featVec[-1]??#?當前樣本的類別
????????????if?currentLabel?not?in?labelCounts.keys():??#?生成類別字典
????????????????labelCounts[currentLabel]?=?0
????????????labelCounts[currentLabel]?+=?weight??#?數據集的倒數第二個值用來標記樣本權重
????shannonEnt?=?0.0
????for?key?in?labelCounts:??#?計算信息熵
????????prob?=?float(labelCounts[key])?/?numEntries
????????shannonEnt?=?shannonEnt?-?prob?*?log(prob?2)
????return?shannonEnt
def?splitDataSet(dataSet?axis?value?LorR=‘N‘):
????“““
????type:?(list?int?string?or?float?string)?->?list
????劃分數據集
????axis:按第幾個特征劃分
????value:劃分特征的值
????LorR:?N?離散屬性;?L?小于等于value值;?R?大于value值
????“““
????retDataSet?=?[]
????featVec?=?[]
????if?LorR?==?‘N‘:??#?離散屬性
????????for?featVec?in?dataSet:
????????????if?featVec[axis]?==?value:
????????????????reducedFeatVec?=?featVec[:axis]
????????????????reducedFeatVec.extend(featVec[axis?+?1:])
????????????????retDataSet.append(reducedFeatVec)
????elif?LorR?==?‘L‘:
????????for?featVec?in?dataSet:
????????????if?featVec[axis]?!=?‘N‘:
????????????????if?float(featVec[axis])?????????????????????retDataSet.append(featVec)
????elif?LorR?==?‘R‘:
????????for?featVec?in?dataSet:
????????????if?featVec[axis]?!=?‘N‘:
????????????????if?float(featVec[axis])?>?value:
????????????????????retDataSet.append(featVec)
????return?retDataSet
def?splitDataSetWithNull(dataSet?axis?value?LorR=‘N‘):
????“““
????type:?(list?int?string?or?float?string)?->?list
????劃分數據集
????axis:按第幾個特征劃分
????value:劃分特征的值
????LorR:?N?離散屬性;?L?小于等于value值;?R?大于value值
????“““
????retDataSet?=?[]
????nullDataSet?=?[]
????featVec?=?[]
????totalWeightV?=?calcTotalWeight(dataSet?axis?False)??#?非空樣本權重
????totalWeightSub?=?0.0
????if?LorR?==?‘N‘:??#?離散屬性
????????for?featVec?in?dataSet:
????????????if?featVec[axis]?==?value:
????????????????reducedFeatVec?=?featVec[:axis]
????????????????reducedFeatVec.extend(featVec[axis?+?1:])
????????????????retDataSet.append(reducedFeatVec)
????????????elif?featVec[axis]?==?‘N‘:
????????????????reducedNullVec?=?featVec[:axis]
????????????????reducedNullVec.extend(featVec[axis?+?1:])
????????????????nullDataSet.append(reducedNullVec)
????elif?LorR?==?‘L‘:
????????for?featVec?in?dataSet:
????????????if?featVec[axis]?!=?‘N‘:
????????????????if?float(featVec[axis])?????????????????????retDataSet.append(featVec)
????????????elif?featVec[axis]?==?‘N‘:
????????????????nullDataSet.append(featVec)
????elif?LorR?==?‘R‘:
????????for?featVec?in?dataSet:
????????????if?featVec[axis]?!=?‘N‘:
????????????
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件??????23623??2018-12-09?22:57??C4.5決策樹\C45.py
?????文件???????1186??2018-12-09?23:31??C4.5決策樹\C45test.py
?????文件????????393??2018-12-04?22:19??C4.5決策樹\PlayData.txt
?????文件???????5610??2018-12-04?22:48??C4.5決策樹\treePlotter.py
?????目錄??????????0??2018-12-09?23:31??C4.5決策樹
-----------?---------??----------?-----??----
????????????????30812????????????????????5
- 上一篇:python音樂播放+濾波器
- 下一篇:spider_LOL.py
評論
共有 條評論