資源簡介
ID3決策樹python代碼,有注釋,有數據讀入和處理,正確率統計等功能,非常實用,歡迎下載

代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?Sun?Dec?31?20:53:34?2017
@author:?Administrator
“““
#?-*-?coding:?utf-8?-*-
“““
Created?on?Sun?Dec?31?14:30:17?2017
@author:?Administrator
“““
from?sklearn.feature_extraction?import?DictVectorizer
##涉及到對csv文件的讀取,故導入csv接口
import?csv
from?sklearn?import?preprocessing
from?sklearn?import?tree
from?sklearn.externals.six?import?StringIO
from?sklearn.tree?import?DecisionTreeClassifier
from?sklearn.metrics?import?accuracy_score
##將csv文件中的數據讀取到變量allElectronicsData中
traindata=open(r‘D:\Spyderworkspcce\data\atrain.csv‘)
##csv自帶的reader可以按行讀取allElectronicsData中的數據
reader=csv.reader(traindata)
##讀取第一行數據即title
headers=reader.next()
#headers2=readertest.next()
#print?headers
featureList?=?[]
labelList?=?[]
for?row?in?reader:
????labelList.append(row[len(row)-1])
????rowDict?=?{}
????for?i?in?range(0len(row)-1):
????????#?print?row[i]
????????rowDict[headers[i]]?=?row[i]
????????#?print?“rowDict:“rowDict
????featureList.append(rowDict)
????
?
###??list中的每一個字典對應原始數據中的一行數據?
vec=?DictVectorizer()
train_x?=?vec.fit_transform(featureList).toarray()
#定義樣本訓練屬性集和測試屬性集
train_xx=train_x[0:30164]
test_xx=train_x[30164:]
lb?=?preprocessing.LabelBinarizer()
train_y=lb.fit_transform(labelList)
#定義樣本訓練標簽集和測試標簽集
train_yy=train_y[0:30164]
test_yy=train_y[30164:]
model?=?DecisionTreeClassifier(criterion=‘entropy‘min_samples_leaf=3)
model?=?model.fit(train_xx?train_yy)
##
##
y_train_pred?=?model.predict(train_xx)
y_test_pred?=?model.predict(test_xx)
##print?y_train_pred
print?‘測試集預測類標?‘+str(y_test_pred)
###
acc_train?=?accuracy_score(train_yy?y_train_pred)
acc_test?=?accuracy_score(test_yy?y_test_pred)
###
print?‘\t訓練集準確率:?%.4f%%‘?%?(100*acc_train)
print?‘\t測試集準確率:?%.4f%%\n‘?%?(100*acc_test)
predict
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????2118??2018-01-03?20:06??ID3.py
-----------?---------??----------?-----??----
?????????????????2118????????????????????1
- 上一篇:django入門-增刪改
- 下一篇:python實現決策樹分類算法
評論
共有 條評論