資源簡介
對于決策樹來說,主要有兩種算法:ID3算法和C4.5算法,本資源實現的是決策樹分類算法中的ID3算法,利用matlab編程實現

代碼片段和文件信息
function?[?tree?]?=?id3(?examples?attributes?activeAttributes)
%%?ID3?算法?,構建ID3決策樹
????...參考:https://github.com/gwheaton/ID3-Decision-Tree
%?輸入參數:
%?example:?輸入矩陣;
%?attributes:?屬性值,含有Label;
%?activeAttributes:?活躍的屬性值;-11向量,1表示活躍;
%?輸出參數:
%?tree:構建的決策樹;
%%?提供的數據為空,則報異常
if?(isempty(examples));
????error(‘必須提供數據!‘);
end
%?常量
numberAttributes?=?length(activeAttributes);%用于分類的屬性數目
numberExamples?=?length(examples(:1));%數據行數
%?創建樹節點
tree?=?struct(‘value‘?‘null‘);
%?對class列進行統計
global?NumofClass
matrix_class=zeros(1NumofClass);
for?i=1:NumofClass
????matrix_class(i)=length(find(examples(:numberAttributes+1)==i));
end
%如果樣本中的數據都屬于一類,則標記為葉子節點
for?i=1:NumofClass
????if(matrix_class(i)==numberExamples)
????????tree.value=i;
????????tree.NumofChild=0;
????????return;
????end
end
global?count;
%?如果活躍的屬性為空或樹的深度過高,則返回label最多的屬性值
if?(sum(activeAttributes)?==?0)?||?count>30;
????tree.value=find(max(matrix_class));
????tree.NumofChild=0;
????return
end
%%?計算當前屬性的熵
currentEntropy=0;
for?i=1:NumofClass
????if?matrix_class(i)==0
????????continue;
????end
????p=matrix_class(i)/numberExamples;
????currentEntropy=currentEntropy-p*log2(p);
end
%%?尋找最大增益
gains?=?-1*ones(1numberAttributes);?%?初始化增益
%NumofActiveAttributes=length(find(activeAttributes~=0));
for?i=1:numberAttributes;
????if?(activeAttributes(i)~=0)?%?該屬性仍處于活躍狀態,對其更新
????????matrix=zeros(6NumofClass);??%計數矩陣行為活躍屬性,列為類別
????????for?j=1:numberExamples;
????????????a=examples(ji);%row?a
????????????b=examples(jnumberAttributes+1);%column?b
????????????matrix(ab)=matrix(ab)+1;
????????end?????
????????%計算當前屬性每個類的熵
????????matrix_Ent=zeros(16);
????????for?m=1:6
????????????for?n=1:NumofClass
????????????????if(matrix(mn)==0)
????????????????????continue;
????????????????else
????????????????????tmp=matrix(mn)/numberExamples;
????????????????????matrix_Ent(1m)=matrix_Ent(1m)-tmp*log2(tmp);
????????????????end
????????????end
????????end
????????tmp=0;
????????for?j=1:5
????????????if(matrix_Ent(1j)==0)
????????????????continue;
????????????end
????????????tmp=tmp+(sum(matrix(j:))/numberExamples)*matrix_Ent(1j);
????????end
????????gains(i)?=?currentEntropy?-?tmp;
????end
end
%?選出最大增益
[~?bestAttribute]?=?max(gains);
%?設置相應值
tree.value?=?attributes{bestAttribute};
%?去活躍狀態
activeAttributes(bestAttribute)?=?0;
%?根據bestAttribute把數據進行分組
NumofChild=1;
for?i=1:6
????examples_new=examples(examples(:bestAttribute)==i:);
????%leaf=struct(‘value‘‘null‘);
????%葉子節點
????if(isempty(examples_new))
????????%leaf.value=find(max(matrix_class));
????????%tree.child{NumofChild}=leaf;
????????continue;
????end
????%?非葉子節點,遞歸
????tree.child{NumofChild}?=?id3(examples_new?attributes?activeAttributes);
????tree.class{NumofChild}=i;
????NumofChild=NumofChild+1;
end
tree.NumofChild=NumofChild-1;
??
%?返回
return
end
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-01-12?10:59??ID3\
?????文件????????6875??2017-01-09?21:26??ID3\BalanceScale.txt
?????文件???????19889??2016-12-27?09:57??ID3\breast-cancer-wisconsin.data
?????文件?????????220??2017-01-09?20:40??ID3\Bullons.txt
?????文件???????51867??2016-12-27?10:00??ID3\car.data
?????文件???????25920??2016-12-30?19:48??ID3\CarEvaluation.txt
?????文件????????3178??2016-12-30?19:54??ID3\id3.m
?????文件????????1397??2017-01-12?10:15??ID3\ID3_decision_tree.m
?????文件?????????888??2017-01-12?10:59??ID3\id3_preprocess.m
?????文件?????????984??2017-01-09?20:48??ID3\id3_test.m
?????文件????????8726??2016-12-27?10:02??ID3\machine.data
?????文件????????1478??2016-12-30?21:20??ID3\print_tree.m
?????文件?????????707??2017-01-12?10:56??ID3\tree_plot.m
- 上一篇:利用matlab將風場nc文件讀取成txt文件
- 下一篇:matlab心電信號處理
評論
共有 條評論