資源簡介
這是我在網上搜到的一個python實現的k-means算法,我對其中的著色方法做了一定的修改。代碼不長且可以演示算法的運行過程。
代碼片段和文件信息
#a?simple?k-means?algorithm
#
#useage
#step1:?specify?N?the?number?of?training?data?and?k?the?cluster?number
#step2:?make?data
# X?=?makeTrainingData(N?k)
#step3:?call?kmeans?algorithm
# kmeans(X?k?observer)
#
#for?example
#N?=?300
#k?=?5
#X?=?makeTrainingData(N?k)
#kmeans(X?k?observer)
from?__future__?import?with_statement
import?cPickle?as?pickle
from?matplotlib?import?pyplot
from?numpy?import?zeros?array?tile
from?scipy.linalg?import?norm
import?numpy.matlib?as?ml
import?random
?
def?observer(iter?k?labels?centers):
print?“iter?%d.“?%?iter
#generate?the?RGB?sequence
colors?=?zeros(k)
for?i?in?range(k):?
colors[i]?=?i?*?255?/?(k-1)
pyplot.plot(hold=False)??#?clear?previous?plot
pyplot.hold(True)
#?draw?points
data_colors=[colors[lbl]?for?lbl?in?labels]
pyplot.scatter(X[:?0]?X[:?1]?c=data_colors?alpha=0.5)
#?draw?centers
pyplot.scatter(centers[:?0]?centers[:?1]?s=200?c=colors)
#pyplot.savefig(‘kmeans/iter_%02d.png‘?%?iter?format=‘png‘)
def?kmeans(X?k?observer=None?threshold=1e-15?maxiter=300):
????N?=?len(X)
????labels?=?zeros(N?dtype=int)
????centers?=?array(random.sample(X?k))
????iter?=?0
?
????def?calc_J():
????????sum?=?0
????????for?i?in?xrange(N):
????????????sum?+=?norm(X[i]-centers[labels[i]])
????????return?sum
?
????def?distmat(X?Y):
????????n?=?len(X)
- 上一篇:問卷星爬蟲帶驗證碼
- 下一篇:反冪法求接近給定值對應的矩陣特征值
評論
共有 條評論