資源簡介
以鶯尾花數據為例,里面包含決策樹的創建,訓練,預測,并畫出創建后的決策樹圖像。已附帶iris數據集
代碼片段和文件信息
#!/usr/bin/python
#?-*-?coding:utf-8?-*-
import?numpy?as?np
import?pandas?as?pd
import?matplotlib.pyplot?as?plt
import?matplotlib?as?mpl
from?sklearn?import?tree
from?sklearn.tree?import?DecisionTreeClassifier
from?sklearn.model_selection?import?train_test_split
from?sklearn.pipeline?import?Pipeline
import?pydotplus
#?花萼長度、花萼寬度,花瓣長度,花瓣寬度
iris_feature_E?=?‘sepal?length‘?‘sepal?width‘?‘petal?length‘?‘petal?width‘
iris_feature?=?u‘花萼長度‘?u‘花萼寬度‘?u‘花瓣長度‘?u‘花瓣寬度‘
iris_class?=?‘Iris-setosa‘?‘Iris-versicolor‘?‘Iris-virginica‘
if?__name__?==?“__main__“:
????mpl.rcParams[‘font.sans-serif‘]?=?[u‘SimHei‘]
????mpl.rcParams[‘axes.unicode_minus‘]?=?False
????path?=?‘..\\8.Regression\\iris.data‘??#?數據文件路徑
????data?=?pd.read_csv(path?header=None)
????x?=?data[range(4)]
????y?=?pd.Categorical(data[4]).codes
????#?為了可視化,僅使用前兩列特征
????x?=?x.iloc[:?:2]
????x_train?x_test?y_train?y_test?=?train_test_split(x?y?train_size=0.7?random_state=1)
????print?y_test.shape
????#?決策樹參數估計
????#?min_samples_split?=?10:如果該結點包含的樣本數目大于10,則(有可能)對其分支
????#?min_samples_leaf?=?10:若將某結點分支后,得到的每個子結點樣本數目都大于10,則完成分支;否則,不進行分支
????model?=?DecisionTreeClassifier(criterion=‘entropy‘)
????model.fit(x_train?y_train)
????y_test_hat?=?model.predict(x_test)??????#?測試數據
????#?保存
????#?dot?-Tpng?my.dot?-o?my.png
????#?1、輸出
????with?open(‘iris.dot‘?‘w‘)?as?f:
????????tree.export_graphviz(model?out_file=f)
????#?2、給定文件名
????#?tree.export_graphviz(model?out_file=‘iris1.dot‘)
????#?3、輸出為pdf格式
????dot_data?=?tree.export_graphviz(model?out_file=None?feature_names=iris_feature_E?class_names=iris_class
????????????????????????????????????filled=True?rounded=True?special_characters=True)
????graph?=?pydotplus.graph_from_dot_data(dot_data)
????graph.write_pdf(‘iris.pdf‘)
????f?=?open(‘iris.png‘?‘wb‘)
????f.write(graph.create_png())
????f.close()
????#?畫圖
????N?M?=?50?50??#?橫縱各采樣多少個值
????x1_min?x2_min?=?x.min()
????x1_max?x2_max?=?x.max()
????t1?=?np.linspace(x1_min?x1_max?N)
????t2?=?np.linspace(x2_min?x2_max?M)
????x1?x2?=?np.meshgrid(t1?t2)??#?生成網格采樣點
????x_show?=?np.stack((x1.flat?x2.flat)?axis=1)??#?測試點
????print?x_show.shape
????#?#?無意義,只是為了湊另外兩個維度
????#?#?打開該注釋前,確保注釋掉x?=?x[:?:2]
????#?x3?=?np.ones(x1.size)?*?np.average(x[:?2])
????#?x4?=?np.ones(x1.size)?*?np.average(x[:?3])
????#?x_test?=?np.stack((x1.flat?x2.flat?x3?x4)?axis=1)??#?測試點
????cm_light?=?mpl.colors.ListedColormap([‘#A0FFA0‘?‘#FFA0A0‘?‘#A0A0FF‘])
????cm_dark?=?mpl.colors.ListedColormap([‘g‘?‘r‘?‘b‘])
????y_show_hat?=?model.predict(x_show)??#?預測值
????print?y_show_hat.shape
????print?y_show_hat
????y_show_hat?=?y_show_hat.reshape(x1.shape)??#?使之與輸入的形狀相同
????print?y_show_hat
????plt.figure(facecolor=‘w‘)
????plt.pcolormesh(x1?x2?y_show_hat?cmap=cm_light)??#?預測值的顯示
????plt.scatter(x_test[0]?x_test[1]?c=y_test.ravel()?edgecolors=‘k‘?s=150?zorder=10?cmap=cm_dark?marker=‘*‘)??#?測試數據
????plt.scatter(x[0
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-04-03?20:08??10.RandomForest\
?????目錄???????????0??2017-04-03?20:08??10.RandomForest\.idea\
?????文件?????????459??2017-04-03?13:20??10.RandomForest\.idea\10.RandomForest.iml
?????文件?????????687??2017-04-03?13:20??10.RandomForest\.idea\misc.xm
?????文件?????????282??2017-04-03?15:50??10.RandomForest\.idea\modules.xm
?????文件???????33260??2017-04-03?15:51??10.RandomForest\.idea\workspace.xm
?????文件????????4872??2017-04-03?13:49??10.RandomForest\10.1.Iris_DecisionTree.py
?????文件????????2620??2017-04-03?14:50??10.RandomForest\10.2.Iris_DecisionTree_Enum.py
?????文件????????1282??2017-02-10?22:47??10.RandomForest\10.3.DecisionTreeRegressor.py
?????文件????????1475??2017-04-03?14:38??10.RandomForest\10.4.MultiOutput_DTR.py
?????文件????????2754??2017-04-03?14:50??10.RandomForest\10.5.Iris_RandomForest_Enum.py
?????文件????????2026??2017-04-03?14:51??10.RandomForest\10.6.Bagging.py
?????文件????35831808??2016-11-03?12:11??10.RandomForest\graphviz-2.38.msi
?????文件????????5632??2017-04-03?13:48??10.RandomForest\iris.dot
?????文件???????30639??2017-04-03?13:48??10.RandomForest\iris.pdf
?????文件??????567161??2017-04-03?13:48??10.RandomForest\iris.png
評論
共有 條評論