資源簡介
內含原始數據集,測試集和實驗要求,運用sklearn簡單實現決策樹,用于學習python,sklearn基礎,能夠生成決策樹pdf以供入門者參考

代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?Wed?Jan??2?13:52:27?2019
“““
import?numpy?as?np
import?pandas?as?pd
from?sklearn?import?tree
from?sklearn.cross_validation?import?train_test_split?
from?sklearn.metrics?import?precision_recall_curve??
from?sklearn.metrics?import?classification_report?
import?os???????
os.environ[“PATH“]?+=?os.pathsep?+?‘C:\Program?Files?(x86)\Graphviz2.38\bin‘
import?graphviz?
import?pydotplus?
agelist?=[]
marital_statuslist?=?[]
occupationlist?=?[]
capital_gainlist?=[]
hours_per_weeklist?=?[]
native_countrylist?=[]
classlist?=[]
file?=?open(‘adult.data‘‘r‘)
while?True:
????mystr?=?file.readline()
????if?not?mystr:
????????break
????list=mystr.strip().split(‘‘)
????if?list?==?[““]:
????????break
????agelist.append(list[0]?if?list[0]!=‘??‘?else?‘None‘?)
????marital_statuslist.append(list[5]?if?list[5]!=‘??‘?else?‘None‘)
????occupationlist.append(list[6]?if?list[6]!=‘??‘?else?‘None‘)
????capital_gainlist.append(list[10]?if?list[10]!=‘??‘?else?‘None‘)
????hours_per_weeklist.append(list[12]?if?list[12]!=‘??‘?else?‘None‘)
????native_countrylist.append(list[13]?if?list[13]!=‘??‘?else?‘None‘)
????classlist.append(list[14]?if?list[14]!=‘??‘?else?‘None‘)
#print(native_countrylist)
dict?=?{‘age‘:agelist‘marital-status‘:marital_statuslist
????????‘occupation‘:occupationlist‘capital-gain‘:capital_gainlist
????????‘hours-per-week‘:hours_per_weeklist‘native-country‘:native_countrylist‘class‘:classlist}
frame?=?pd.Dataframe(dict)
#print(frame)
listEurope?=?[‘?England‘‘?Germany‘‘?Greece‘‘?Italy‘‘?Poland‘‘?Ireland‘‘?France‘‘?Columbia‘‘?Hungary‘
??????????????‘?Scotland‘‘?Yugoslavia‘‘?Holand-Netherlands‘]
listAsia?=[‘?Cambodia‘‘?India‘‘?Japan‘‘?South‘‘?China‘‘?Iran‘‘?Philippines‘‘?Vietnam‘‘?Laos‘‘?Taiwan‘
???????????‘?Thailand‘‘?Hong‘]
listNorthAmerica?=[‘?United-States‘‘?Puerto-Rico‘‘?Canada‘‘?Outlying-US(Guam-USVI-etc)‘
???????????????????‘?Cuba‘‘?Jamaica‘‘?Mexico‘‘?Haiti‘‘?Nicaragua‘‘?El-Salvador‘‘?Trinadad&Tobago‘]
listSouthAmerica?=[‘?Honduras‘‘?Portugal‘‘?Dominican-Republic‘‘?Ecuador‘‘?Guatemala‘‘?Peru‘]
#歐洲?1,亞洲2,北美洲3,南美洲4
#處理大洲情況
k?=0
for?a?in?frame[‘native-country‘]:
????if?a?in?listEurope:
????????frame[‘native-country‘][k]=1
????if?a?in?listAsia:
????????frame[‘native-country‘][k]=2
????if?a?in?listNorthAmerica:
????????frame[‘native-country‘][k]=3????
????if?a?in?listSouthAmerica:
????????frame[‘native-country‘][k]=4
????if?a?==?‘None‘:??#北美洲數量占大部分,可以用來填充缺失值
????????frame[‘native-country‘][k]=3????
????k=k+1
#對年齡歸約,填充缺失值
frame[‘age‘].fillna(frame[‘age‘].mean)
k=0
for?a?in?frame[‘age‘]:
????if?int(a)>=0?and?int(a)<10:
????????frame[‘age‘][k]=0
????if?int(a)>=10?and?int(a)<20:
????????frame[‘age‘][k]=1
????if?int(a)>=20?and?int(a)<30:
????????frame[‘age‘][k]=2
????if?int(a)>=30?and?int(a)<40:
????????frame[‘age‘][k]=3
????if?int(a)>=40?and?int(a)<50:
????????frame[‘age‘][k]=4
????if?int(a)>=50?
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件??????47971??2016-12-20?13:34??adult\Adult?Data?Set.docx
?????文件????3974305??2016-12-20?13:26??adult\adult.data
?????文件???????5229??2016-12-20?13:26??adult\adult.names
?????文件????2003132??2019-01-03?13:17??adult\adult.test
?????文件???????8257??2019-01-03?14:37??adult\adult1.dot
?????文件??????47914??2019-01-03?14:37??adult\adult1.pdf
?????文件???????8257??2019-01-03?14:35??adult\adult2.dot
?????文件??????48436??2019-01-03?14:35??adult\adult2.pdf
?????文件??????47910??2019-01-03?14:35??adult\adult3.pdf
?????文件????????140??2016-12-20?13:26??adult\Index.txt
?????文件???????4267??2016-12-20?13:26??adult\old.adult.names
?????文件???????8516??2019-01-03?14:34??adult\unti
?????文件???????6544??2019-01-03?13:53??adult\unti
?????文件??????14612??2019-01-02?23:58??adult\實驗內容.docx
?????目錄??????????0??2019-01-06?15:15??adult
-----------?---------??----------?-----??----
??????????????6225490????????????????????15
評論
共有 條評論