資源簡介
實現(xiàn)ID3 決策樹算法,并使用MATLAB自帶的工具箱函數(shù)畫出決策樹,生成相應(yīng)的規(guī)則
代碼片段和文件信息
function?type=Classifier(?PValue?)
????global?node?child_value?child_node_num
????[nodechild_valuechild_node_num]=ID3();
????n=1;????????%從樹的根結(jié)點(即node{1})開始查找
????k=1;
????while?~isempty(child_node_num{n})
????????for?i=1:length(child_value{n})
????????????if?PValue(node{n})==child_value{n}(i)
????????????????n=child_node_num{n}(i);
????????????????break;
????????????end????????????
????????end
????????if?i==length(child_value{n})
????????????%?若這個值在分類器中不存在,則取其最近的值進(jìn)行分類
???????????PValue(node{n})=PValue(node{n})+0.1*k;
????????end
????????k=(-1)^k*(?abs(k)+1?);
????end
????type=node{n};
end
%?函數(shù)返回一棵決策樹
function??[nodechild_valuechild_node_num]=ID3()
????clear;
????clear?global?node?child_value?child_node_num;
????global?node?child_value?child_node_num
????[SL?SW?PL?PW?CN]=textread(‘training?data.txt‘‘%f?%f?%f?%f?%s‘);
????DValue=[SL?SW?PL?PW];????%SL=DValue(:1)
????for?i=1:length(SL)
????????A(i)=i;
????end
????ClassPNum=[1?2?3?4];
????m=0;
????[nodechild_valuechild_node_num]=TreeNode(?DValue?CN?A?ClassPNumm?);?
end
%?生成樹結(jié)點
%?DValue--前四列數(shù)據(jù)
%?A--參與劃分的行號
%?CN--屬性值的集合(第5列數(shù)據(jù))
%?ClassPNum為劃分的剩余屬性編號
%?當(dāng)前node的父親結(jié)點為node{m}
function?[nodechild_valuechild_node_num]=TreeNode(?DValue?CN?A?ClassPNumm)
????global?node?child_value?child_node_num
????%ClassName={‘SL‘?‘SW‘?‘PL‘?‘PW‘};
????n=length(node);
????if?m>0
????????%如果父親結(jié)點存在,將本結(jié)點的序號存入父親結(jié)點的子結(jié)點序號集中
????????k=length(child_node_num{m});
????????child_node_num{m}(k+1)=n+1;
????end
????%?1、樣本為空,則樹為空
????if?isempty(DValue)
????????node{?n+1?}=[];
????????child_value{?n+1?}=[];
????????child_node_num{?n+1?}=[];
????????return;
????end?
????%?2、用于劃分的剩余屬性為空,選擇多數(shù)元組所在的類作為結(jié)點
????if?isempty(?ClassPNum?)?
???????node{?n+1?}=find_most(?CNA?);
???????child_value{?n+1?}=[];
???????child_node_num{?n+1?}=[];
???????return;
????end?
????%?3、樣本中所有數(shù)據(jù)都屬于同一類,將此類作為結(jié)點
????CNRowNum=CN_sta(?CN?A);
????if?length(?find(CNRowNum==0)?)>=2
????????node{?n+1?}=CN{A(1)};
????????child_value{?n+1?}=[];
????????child_node_num{?n+1?}=[];
????????return;
????%?4、樣本中所有數(shù)據(jù)不屬于同一類
????else
????????I=Exp(?CNA?);
????????for?i=1:length(?ClassPNum?)????????????
????????????Entropy(i)=avg_entropy(?DValue(:ClassPNum(i))?A?CN);
????????????Gain(i)=I-Entropy(i);
????????end
????????%?4.1、各屬性的信息增益均小于0,選擇多數(shù)元組所在的類作為結(jié)點
????????if?max(Gain)<=0
????????????node{?n+1?}=find_most(?CNA?);
????????????child_value{?n+1?}=[];
????????????child_node_num{?n+1?}=[];
????????return;
????????%?4.2、在信息增益最大的屬性上進(jìn)行劃分
????????else
????????????maxG=find(?Gain==max(Gain)?);
????????????[PValue?RowNum]=type_sta(?DValue(:ClassPNum(maxG(1)))?A?);
????????????node{?n+1?}=ClassPNum(maxG(1));
????????????child_value{?n+1?}=PValue;
????????????child_node_num{?n+1?}=[];
????????????ClassPNum(maxG)=[];?????%?刪除ClassPNum(maxG)--已經(jīng)進(jìn)行劃分的屬性
????????????for?i=1:length(PValue)
????????????????[nodechild_valuechild_node_num]=TreeNode(?DValue?CN?RowNum{i}?ClassPNum
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????5297??2014-05-07?21:16??Classifier.m
-----------?---------??----------?-----??----
?????????????????5297????????????????????1
評論
共有 條評論