資源簡介
k-means代碼

代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?Thu?Nov?17?16:13:56?2016
@author:?phl
“““
print(“k-means算法程序“)
from?numpy?import?*??
import?time??
import?matplotlib.pyplot?as?plt?
#?歐幾里得聚類計算
def?euclDistance(vector1?vector2):
????return?sqrt(sum(power(vector2?-?vector1?2)))??
#?隨機初始化聚類的中心
def?initCentriods(dataSetk):
????print(dataSet)
????numSamplesdim?=?dataSet.shape?#dim列數
????centroids?=?zeros((k?dim))????
????print(“行數:“numSamples)
????print(“列數:“dim)
????for?i?in?range(k):
????????index?=?int(random.uniform(0?numSamples))?
????????centroids[i?:]?=?dataSet[index?:]
????return?centroids
#?k-means?cluster??
def?kmeans(dataSet?k):
????numSamples?=?dataSet.shape[0]??#dataSet.shape是幾行幾列的意思,這里是7行2列
????print(“行數:“numSamples)
????clusterAssment?=?mat(zeros((numSamples?2)))#初始化一個行兩列的0矩陣
????clusterChanged?=?True
????##?step?1:?初始化聚類中心
????centroids?=?initCentriods(dataSet?k)
????print(“隨機初始化的兩個點:“centroids)
????##?循環遍歷數據
????while?clusterChanged:?
????????clusterChanged?=?False
????????for?i?in?range(numSamples):
????????????minDist??=?100000.0?
????????????minIndex?=?0
????????????##?循環遍歷中心點
????????????##?step?2:計算離中心點的距離
????????????for?j?in?range(k):
????????????????distance?=?euclDistance(centroids[j?:]?dataSet[i?:])
????????????????if?distance?????????????????????minDist??=?distance
????????????????????minIndex?=?j?#minIndex代表類別
????????????##更新聚類分配
????????????if?clusterAssment[i0]?!=?minIndex:
????????????????clusterChanged?=?True
????????????????clusterAssment[i?:]?=?minIndex?minDist**2
????????##?step?4:?更新聚類中心
????????for?j?in?range(k):??
????????????pointsInCluster?=?dataSet[nonzero(clusterAssment[:?0].A?==?j)[0]]?
????????????centroids[j?:]?=?mean(pointsInCluster?axis?=?0)?
????print(‘恭喜你,聚類完成‘)??
????return?centroids?clusterAssment?
#?show?your?cluster?only?available?with?2-D?data??
def?showCluster(dataSet?k?centroids?clusterAssment):??
????numSamples?dim?=?dataSet.shape??
????if?dim?!=?2:??
????????print(“Sorry!?I?can?not?draw?because?the?dimension?of?your?data?is?not?2!“)??
????????return?1??
??
????mark?=?[‘or‘?‘ob‘?‘og‘?‘ok‘?‘^r‘?‘+r‘?‘sr‘?‘dr‘?‘ ????if?k?>?len(mark):??
????????print(“Sorry!?Your?k?is?too?large!?please?contact?Zouxy“)??
????????return?1??
??
????#?draw?all?samples??
????for?i?in?range(numSamples):??
????????markIndex?=?int(clusterAssment[i?0])??
????????plt.plot(dataSet[i?0]?dataSet[i?1]?mark[markIndex])??
??
????mark?=?[‘Dr‘?‘Db‘?‘Dg‘?‘Dk‘?‘^b‘?‘+b‘?‘sb‘?‘db‘?‘????#?draw?the?centroids??
????for?i?in?range(k):??
????????plt.plot(centroids[i?0]?centroids[i?1]?mark[i]?markersize?=?12)??
????plt.show()?
def?showData(dataSet):
????x?=?[]
????y?=?[]
????plt.figure(figsize=(96))
????for?i?in?dataSet:
????????x.append([float(i[0])])
????????y.append([float(i[1])])
????plt.scatter(xyc=“b“s=25alpha=0.4marker=‘o‘)
????#T:散點的顏色
????#s:散點的大小
??
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2019-03-22?11:01??k_means\__pycache__\
?????文件????????2552??2018-03-22?19:13??k_means\__pycache__\k_means.cpython-36.pyc
?????文件????????3306??2018-03-22?19:13??k_means\k_means.py
?????文件?????????753??2018-04-24?09:34??k_means\test_kmeans.py
?????文件??????????59??2018-02-21?18:02??k_means\testSet.txt
評論
共有 條評論