-
大小: 8KB文件類型: .rar金幣: 2下載: 0 次發(fā)布日期: 2021-05-28
- 語(yǔ)言: Matlab
- 標(biāo)簽: 數(shù)據(jù)挖掘??ID3??
資源簡(jiǎn)介
ID3算法,使用熵最小策略構(gòu)建決策樹(shù),MATLAB實(shí)現(xiàn)代碼。對(duì)應(yīng)中科大機(jī)器學(xué)習(xí)課程中ID3算法實(shí)現(xiàn).

代碼片段和文件信息
%%?ID3函數(shù),使用熵最小策略構(gòu)建決策樹(shù),并打印顯示
%?zzhh@mail.ustc.edu.cn?創(chuàng)建時(shí)間:2019年3月17日
%?運(yùn)行版本:R2018a(9.4.0.813654)win64
function?myTree?=?ID3(datasetlabels)
%?輸入?yún)?shù):
%?dataset:數(shù)據(jù)集,元胞數(shù)組或字符串?dāng)?shù)組
%?labels:屬性標(biāo)簽,元胞數(shù)組或字符串?dāng)?shù)組
%?輸出參數(shù):
%?myTree:構(gòu)建的決策樹(shù),containers.Map類型
myTree?=?createTree(datasetlabels);?%生成決策樹(shù)
[nodeidsnodevaluebranchvalue]?=?print_tree(myTree);?%解析決策樹(shù)
tree_plot(nodeidsnodevaluebranchvalue);?%畫(huà)出決策樹(shù)
end
%%?使用熵最小策略構(gòu)建決策樹(shù)
function?myTree?=?createTree(datasetlabels)
%?輸入?yún)?shù):
%?dataset:數(shù)據(jù)集,元胞數(shù)組或字符串?dāng)?shù)組
%?labels:屬性標(biāo)簽,元胞數(shù)組或字符串?dāng)?shù)組
%?輸出參數(shù):
%?myTree:構(gòu)建的決策樹(shù),containers.Map類型
%?數(shù)據(jù)為空,則報(bào)錯(cuò)
if(isempty(dataset))
????error(‘必須提供數(shù)據(jù)!‘)
end
size_data?=?size(dataset);
%?數(shù)據(jù)大小與屬性數(shù)量不一致,則報(bào)錯(cuò)
if?(size_data(2)-1)~=length(labels)
????error(‘屬性數(shù)量與數(shù)據(jù)集不一致!‘)
end
classList?=?dataset(:size_data(2));
%全為同一類,熵為0,返回
if?length(unique(classList))==1
????myTree?=??char(classList(1));
????return?
end
%%屬性集為空,應(yīng)該用找最多數(shù)的那一類,這里取值NONE
if?size_data(2)?==?1
????myTree?=??‘NONE‘;
????%myTree?=??char(classList(1));
????return
end
%?選取特征屬性
bestFeature?=?chooseFeature(dataset);?
bestFeatureLabel?=?char(labels(bestFeature));
%?構(gòu)建樹(shù)
myTree?=?containers.Map;
leaf?=?containers.Map;
%?該屬性下的不同取值?
featValues?=?dataset(:bestFeature);?
uniqueVals?=?unique(featValues);
%?刪除該屬性
labels=[labels(1:bestFeature-1)?labels(bestFeature+1:length(labels))];?%刪除該屬性
%?對(duì)該屬性下不同取值,遞歸調(diào)用ID3函數(shù)
for?i=1:length(uniqueVals)
????subLabels?=?labels(:)‘;
????value?=?char(uniqueVals(i));
????subdata?=?splitDataset(datasetbestFeaturevalue);%數(shù)據(jù)集分割
????leaf(value)?=?createTree(subdatasubLabels);?%遞歸調(diào)用
????myTree(char(bestFeatureLabel))?=?leaf;
end
end
%%?計(jì)算信息熵
function?shannonEnt?=?calShannonEnt(dataset)
data_size?=?size(dataset);
labels?=?dataset(:data_size(2));
numEntries?=?data_size(1);
labelCounts?=?containers.Map;
for?i?=?1:length(labels)
????label?=?char(labels(i));
????if?labelCounts.isKey(label)
????????labelCounts(label)?=?labelCounts(label)+1;?
????else
????????labelCounts(label)?=?1;
????end??
end
shannonEnt?=?0.0;
for?key?=?labelCounts.keys
????key?=?char(key);
????labelCounts(key);
????prob?=?labelCounts(key)?/?numEntries;
????shannonEnt?=?shannonEnt?-?prob*(log(prob)/log(2));
end??
end
%%?選擇熵最小的屬性特征
function?bestFeature=chooseFeature(dataset~)
baseEntropy?=?calShannonEnt(dataset);
data_size?=?size(dataset);
numFeatures?=?data_size(2)?-?1;
minEntropy?=?2.0;
bestFeature?=?0;
for?i?=?1:numFeatures
????uniqueVals?=?unique(dataset(:i));
????newEntropy?=?0.0;
????for?j=1:length(uniqueVals)
????????value?=?uniqueVals(j);
????????subDataset?=?splitDataset(datasetivalue);
????????size_sub?=?size(subDataset);
????????prob?=?size_sub(1)/data_size(1);
????????%ShannonEnt?=?calShannonEnt(subDataset);
????????newEntropy?=?newEntropy?+?prob*calShannonEnt(subDataset);
????end
????%gain?=?baseEntropy-?newEntropy;
????if?newEntropy ????????minEntropy?=?newEntropy;
????????bestFeature?=?i;
????end
end
end
%%?分割數(shù)據(jù)集,取出該特征
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????文件??????53649??2019-03-17?14:25??ID3_shared\car_data.csv
?????文件????????425??2019-03-17?13:44??ID3_shared\golf.csv
?????文件???????5594??2019-04-21?21:58??ID3_shared\ID3.m
?????文件???????1396??2019-05-22?19:05??ID3_shared\ID3_run.m
?????文件????????464??2019-03-17?14:17??ID3_shared\sale.csv
?????文件????????614??2019-03-11?16:17??ID3_shared\watermelon.csv
?????目錄??????????0??2019-06-05?16:20??ID3_shared
-----------?---------??----------?-----??----
????????????????62142????????????????????7
評(píng)論
共有 條評(píng)論