資源簡介
kmeans聚類算法的python實現程序
Ch10
├── Portland.png
├── kMeans.py
├── kMeans.pyc
├── places.txt
├── portlandClubs.txt
├── testSet.txt
└── testSet2.txt
0 directories, 7 files
代碼片段和文件信息
‘‘‘
Created?on?Feb?16?2011
k?Means?Clustering?for?Ch10?of?Machine?Learning?in?Action
@author:?Peter?Harrington
‘‘‘
from?numpy?import?*
def?loadDataSet(fileName):??????#general?function?to?parse?tab?-delimited?floats
????dataMat?=?[]????????????????#assume?last?column?is?target?value
????fr?=?open(fileName)
????for?line?in?fr.readlines():
????????curLine?=?line.strip().split(‘\t‘)
????????fltLine?=?map(floatcurLine)?#map?all?elements?to?float()
????????dataMat.append(fltLine)
????return?dataMat
def?distEclud(vecA?vecB):
????return?sqrt(sum(power(vecA?-?vecB?2)))?#la.norm(vecA-vecB)
def?randCent(dataSet?k):
????n?=?shape(dataSet)[1]
????centroids?=?mat(zeros((kn)))#create?centroid?mat
????for?j?in?range(n):#create?random?cluster?centers?within?bounds?of?each?dimension
????????minJ?=?min(dataSet[:j])?
????????rangeJ?=?float(max(dataSet[:j])?-?minJ)
????????centroids[:j]?=?mat(minJ?+?rangeJ?*?random.rand(k1))
????return?centroids
????
def?kMeans(dataSet?k?distMeas=distEclud?createCent=randCent):
????m?=?shape(dataSet)[0]
????clusterAssment?=?mat(zeros((m2)))#create?mat?to?assign?data?points?
??????????????????????????????????????#to?a?centroid?also?holds?SE?of?each?point
????centroids?=?createCent(dataSet?k)
????clusterChanged?=?True
????while?clusterChanged:
????????clusterChanged?=?False
????????for?i?in?range(m):#for?each?data?point?assign?it?to?the?closest?centroid
????????????minDist?=?inf;?minIndex?=?-1
????????????for?j?in?range(k):
????????????????distJI?=?distMeas(centroids[j:]dataSet[i:])
????????????????if?distJI?????????????????????minDist?=?distJI;?minIndex?=?j
????????????if?clusterAssment[i0]?!=?minIndex:?clusterChanged?=?True
????????????clusterAssment[i:]?=?minIndexminDist**2
????????print?centroids
????????for?cent?in?range(k):#recalculate?centroids
????????????ptsInClust?=?dataSet[nonzero(clusterAssment[:0].A==cent)[0]]#get?all?the?point?in?this?cluster
????????????centroids[cent:]?=?mean(ptsInClust?axis=0)?#assign?centroid?to?mean?
????return?centroids?clusterAssment
def?biKmeans(dataSet?k?distMeas=distEclud):
????m?=?shape(dataSet)[0]
????clusterAssment?=?mat(zeros((m2)))
????centroid0?=?mean(dataSet?axis=0).tolist()[0]
????centList?=[centroid0]?#create?a?list?with?one?centroid
????for?j?in?range(m):#calc?initial?Error
????????clusterAssment[j1]?=?distMeas(mat(centroid0)?dataSet[j:])**2
????while?(len(centList)?????????lowestSSE?=?inf
????????for?i?in?range(len(centList)):
????????????ptsInCurrCluster?=?dataSet[nonzero(clusterAssment[:0].A==i)[0]:]#get?the?data?points?currently?in?cluster?i
????????????centroidMat?splitClustAss?=?kMeans(ptsInCurrCluster?2?distMeas)
????????????sseSplit?=?sum(splitClustAss[:1])#compare?the?SSE?to?the?currrent?minimum
????????????sseNotSplit?=?sum(clusterAssment[nonzero(clusterAssment[:0].A!=i)[0]1])
????????????print?“sseSplit?and?notSplit:?“sseSplitsseNotSplit
???????
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2011-12-28?15:30??Ch10\
?????文件????????3105??2011-03-17?21:17??Ch10\portlandClubs.txt
?????目錄???????????0??2020-11-16?01:04??__MACOSX\
?????目錄???????????0??2020-11-16?01:04??__MACOSX\Ch10\
?????文件?????????222??2011-03-17?21:17??__MACOSX\Ch10\._portlandClubs.txt
?????文件??????459112??2011-03-18?12:00??Ch10\Portland.png
?????文件?????????222??2011-03-18?12:00??__MACOSX\Ch10\._Portland.png
?????文件????????6397??2011-12-28?15:30??Ch10\kMeans.pyc
?????文件?????????222??2011-12-28?15:30??__MACOSX\Ch10\._kMeans.pyc
?????文件????????1600??2011-03-15?13:03??Ch10\testSet.txt
?????文件?????????222??2011-03-15?13:03??__MACOSX\Ch10\._testSet.txt
?????文件????????6419??2011-12-28?15:52??Ch10\kMeans.py
?????文件?????????222??2011-12-28?15:52??__MACOSX\Ch10\._kMeans.py
?????文件????????1194??2011-03-16?10:16??Ch10\testSet2.txt
?????文件?????????222??2011-03-16?10:16??__MACOSX\Ch10\._testSet2.txt
?????文件????????4693??2011-03-18?09:56??Ch10\places.txt
?????文件?????????222??2011-03-18?09:56??__MACOSX\Ch10\._places.txt
?????文件?????????222??2011-12-28?15:30??__MACOSX\._Ch10
- 上一篇:Python各種樹木制作代碼
- 下一篇:Python-霍蘭德人格分析圖繪制
評論
共有 條評論
相關資源
- 《機器學習實戰》源代碼Python3
- python新浪微博爬蟲,爬取微博和用戶
- 《機器學習實戰》Python3代碼
- 機器學習實戰 Python實現
- 機器學習實戰python實現
- 人工智能-python機器學習實戰高清完整
- 《機器學習實戰》pdf及所和數據集文
- 機器學習實戰Python 開發 高清 非掃描
- python 機器學習實戰 pdf 中文文字版
- 《Python 3數據分析與機器學習實戰》自
- 《Python 3數據分析與機器學習實戰》隨
- 《機器學習實戰》python3完美運行代碼
- Python——機器學習實戰——Apriori算法
- 老唐的1——python數據分析與機器學習
- 機器學習實戰python2SVM 訓練數據
- 機器學習實戰:基于 Scikit-Learn 和 T
- 《機器學習實戰》源代碼
- 機器學習實戰python2SVM與核函數 訓練數
- Python——機器學習實戰——AdaBoost分類
- 《機器學習實戰》中決策樹python2.7代