資源簡介
python實現k-means算法,將結果保存,同時一畫圖的方式顯示,完成python2.7.12, numpy,scipy,matplotlib的基本配置

代碼片段和文件信息
#k-means
#coding?utf-8
from?numpy?import?*
import?time
import?matplotlib.pyplot?as?plt
#calculate?Euclidean
def?euclDistance(vector1?vector2):
????return?sqrt(sum(power(vector2?-?vector1?2)))
#init?centroids?with?random?sample
def?initCentroids(dataSet?k):
????row?col?=?dataSet.shape
????centroids?=?zeros((k?col))
????for?i?in?range(k):
????????index?=?int(random.uniform(0?row))
????????centroids[i?:]?=?dataSet[index?:]
????return?centroids
#k-means?cluster
def?kmeans(dataSet?k):
????row?col?=?dataSet.shape
????#first?column?stores?which?cluster?this?sample?belongs?to
????#sencond?column?stores?the?error?between?this?sample?and?its?centroid
????clusterAssment?=?mat(zeros((row?2)))
????clusterChanged?=?True
????#?step1:?init?centroids
????centroids?=?initCentroids(dataSet?k)
????while?clusterChanged:
????????clusterChanged?=?False
????????#foreach?each?sample
????????for?i?in?xrange(row):
????????????minDistance?=?1000
????????????minIndex?=?0
????????????#for?each?centroid
????????????##?step2:?find?the?centroid?who?is?the?closest
????????????for?j?in?range(k):
????????????????distance?=?euclDistance(centroids[j:]?dataSet[i:])
????????????????if?distance?????????????????????minDistance?=?distance
????????????????????minIndex?=?j
????????????##step3:update?its?cluster
????????????if?clusterAssment[i0]?!=?minIndex:
????????????????clusterChanged?=?True
????????????????clusterAssment[i:]?=?minIndex?minDistance**2
????????#step4?update?its?centroids
????????for?j?in?range(k):
????????????pointsInCluster?=?dataSet[nonzero(clusterAssment[:?0].A?==?j)[0]]
????????????centroids[j:]?=?mean(pointsInCluster?axis?=?0)
????print?‘Congratulations?k-means?cluster?is?completed!!!!!!!!!!!!!!!‘
????return?centroids?clusterAssment
#show?your?cluster?only?avaliable?with?2D?data
def?showCluster(dataSet?k?centroids?clusterAssment):
????row?col?=?dataSet.shape
????if?col?!=?2:
????????print?“Sorry!?I?can?not?draw?because?the?dimension?of?your?data?is??not?2“
????????return?1
????mark?=?[‘or‘?‘ob‘?‘og‘?‘ok‘?‘^r‘?‘+r‘?‘sr‘?‘dr‘?‘ ????if(k?>?len(mark)):
????????print?“Sorry!?Your?k?is?too?large!?Please?choose?smaller?k“
????????return?1
????#draw?all?samples
????for?i?in?xrange(row):
????????markIndex?=?int(clusterAssment[i0])
????????plt.plot(dataSet[i?0]?dataSet[i1]?mark[markIndex])
????mark?=?[‘Dr‘?‘Db‘?‘Dg‘?‘Dk‘?‘^b‘?‘+b‘?‘sb‘?‘db‘?‘????#draw?the?centroids
????for?j?in?range(k):
????????plt.plot(centroids[j0]?centroids[j1]?mark[j]?markersize?=?12)
????plt.show()
#write?centroids?and?clusterAssment?in?txt
def?writeResult(dataSet?k?centroids?clusterAssment):
????fileOut?=?open(“C:\Users\Administrator\Desktop\kmeans-python\centroids.txt“?“w“)
????out?=?open(“C:\Users\Administrator\Desktop\kmeans-python\clusterAssment.txt“?“w“)
????for?i?in?range(k):
????????fileOut.write(str(c
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件?????????93??2016-12-06?17:01??kmeans-python\centroids.txt
?????文件???????3229??2016-12-06?17:01??kmeans-python\clusterAssment.txt
?????文件????????237??2016-12-06?15:41??kmeans-python\data.txt
?????文件???????1930??2016-12-06?15:40??kmeans-python\data2.txt
?????文件?????????27??2016-12-06?16:35??kmeans-python\haha.txt
?????文件???????3283??2016-12-06?17:01??kmeans-python\kmeans.py
?????文件???????3393??2016-12-06?17:01??kmeans-python\kmeans.pyc
?????文件????????234??2016-12-06?16:35??kmeans-python\test.py
?????文件????????750??2016-12-06?16:47??kmeans-python\test_kmeans.py
?????目錄??????????0??2016-12-06?17:01??kmeans-python
-----------?---------??----------?-----??----
????????????????13176????????????????????10
- 上一篇:Kmeans算法python實現
- 下一篇:Maya Python游戲與影視編程指南
評論
共有 條評論