資源簡介
使用python進行編碼,DP-means和 k - means聚類算法的比較,里面有數據集。

代碼片段和文件信息
#!/usr/bin/env?python
from?pprint?import?pprint
import?sys
import?random
import?math
import?timer
class?kmeans(object):
def?__init__(self?_X?_k?_xVal?=?0?_stop=False):
#?X?is?sample?size?lists?of?dim?length
#
#?_xVal?is?the?number?of?records?to?hold?out?cross-validation.
#?To?use?this?you?must?randomize?input?data!
#
#?Setting?_stop=True?causes?iteration?to?stop?when?out?of?cross-validate
#?error?starts?to?rise.
#
self.nFeatures?=?len(_X[0])
self.xValSize?=?_xVal
self.allSize?=?len(_X)
self.size?=?self.allSize?-?self.xValSize
self.X?=?_X
self.k?=?_k
self.stop?=?_stop
#?Initialize?group?memebership
self.dataClusterId?=?[-1?for?i?in?range(0?self.allSize)]?#?index?of?group?for?each?data?pair
self.clusters?=?{}
idx?=?0
#?initialize?to?k?random?data?points
#?don‘t?assign?x-val?as?a?strat?center
for?i?in?random.sample(range(0?self.size)?self.k):
self.clusters[idx]?=?self.X[i]
idx?+=?1
#?output?records
self.record?=?[]
self.errorRecord?=?[]
def?dSquared(self?x?y):
dist2?=?0.0
for?jk?in?zip(xy):
dist2?+=?(j?-?k)**2
return?dist2
def?error(self):
res?=?0.0
for?i?in?range(0?self.size):
res?+=?self.dSquared(self.X[i]?self.clusters[self.dataClusterId[i]])
#?error?on?non?training?data
res1?=?0.0
err1?=?0.0
for?i?in?range(self.size?self.allSize):
res1?+=?self.dSquared(self.X[i]?self.clusters[self.dataClusterId[i]])
if?res1?>?0.0:
err1?=?res1/self.xValSize
return?res/self.size?err1
def?nearestCluster(self?x):
cmin?=?sys.maxint
cidx?=?-sys.maxint
for?j?in?self.clusters:
dist?=?math.sqrt(self.dSquared(x?self.clusters[j]))
if?dist? cmin?=?dist
cidx?=?j
return?cidx?cmin
def?assign(self):
for?i?in?range(0?self.allSize):
self.dataClusterId[i]?dmin?=?self.nearestCluster(self.X[i])
def?updateClusters(self):
ctemp?=?{}?#?dim?sums?by?cluster
for?j?in?range(0?self.k):
ctemp[j]?=?[]
for?k?in?range(0?self.nFeatures):
ctemp[j].append(0.0)?#?init?sums
ctemp[j].append(0)?#?init?counter
#?only?calculate?clusters?on?training?not?cross-validation?set
for?i?in?range(0self.size):
for?j?in?range(0?self.nFeatures):
ctemp[self.dataClusterId[i]][j]?+=?self.X[i][j]
ctemp[self.dataClusterId[i]][self.nFeatures]?+=?1?#?count
for?c?in?self.clusters:
if?ctemp[c][self.nFeatures]?<>?0:
self.clusters[c]?=?[?ctemp[c][k]/ctemp[c][self.nFeatures]?for?k?in?range(0self.nFeatures)]
else:
#?no?members?in?this?cluster
pass
return
def?run(self?nmax?=?100?eps?=?1e-7):
prev?=?0.0
prevXVal?=?float(sys.maxint)
for?iter?in?range(0nmax):
#?update?assignments
self.assign()
#?calculate?error
err?errXVal?=?self.error()
#
if?self.stop?and?errXVal?-?prevXVal?>=?0.0:
sys.stderr.write(“Cross-validation?error?increasing?at?step?%d\n“%iter)
break
prevXVal?=?errXVal
#
if?abs(err-prev)? sys.stderr.write(“Tolerance?reached?a
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2014-12-01?17:25??Python-DP-Means-Clustering-master\
?????文件?????????295??2014-12-01?17:25??Python-DP-Means-Clustering-master\.gitignore
?????文件????????1748??2014-12-01?17:25??Python-DP-Means-Clustering-master\DPopt.py
?????文件????????1317??2014-12-01?17:25??Python-DP-Means-Clustering-master\LICENSE
?????文件????????6058??2014-12-01?17:25??Python-DP-Means-Clustering-master\README
?????文件????????6124??2014-12-01?17:25??Python-DP-Means-Clustering-master\cluster.py
?????文件?????????354??2014-12-01?17:25??Python-DP-Means-Clustering-master\costTest.bash
?????文件????????1232??2014-12-01?17:25??Python-DP-Means-Clustering-master\createTestData.py
?????目錄???????????0??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\
?????文件???????18195??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\2d-sample-data.png
?????文件???????96576??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\3d-sample-data.png
?????文件???????20568??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\error.png
?????文件??????241453??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\iters.png
?????文件???????18088??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\opt_error.png
?????文件??????174533??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\opt_iters.png
?????文件???????20973??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_errors.png
?????文件???????22520??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_errors_20.png
?????文件???????30375??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_errors_20_annotated.png
?????文件???????24465??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times-error.png
?????文件???????26844??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times-error_10.png
?????文件???????27245??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times-error_12.png
?????文件???????27581??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times-error_15.png
?????文件???????26937??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times-error_18.png
?????文件???????24941??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times-error_2.png
?????文件???????26406??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times-error_20.png
?????文件???????27115??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times-error_3.png
?????文件???????26713??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times-error_4.png
?????文件???????27006??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times-error_5.png
?????文件???????26386??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times-error_8.png
?????文件???????21904??2014-12-01?17:25??Python-DP-Means-Clustering-master\img\test_times.png
?????目錄???????????0??2014-12-01?17:25??Python-DP-Means-Clustering-master\input\
............此處省略14個文件信息
評論
共有 條評論