資源簡介
使用python實現kmeans 算法,注釋詳盡。與sklearn中的kmeans算法的結果進行比較。
代碼片段和文件信息
import?numpy?as?np
import?random
from?matplotlib?import?pyplot?as?plt
from?matplotlib?import?image?as?mpimg
from?sklearn.cluster?import?KMeans
%matplotlib?inline
def?handleData(imageN):
????#?歸一化
????if?filename[-4:]==‘.jpg‘:
????????image=image/256
????elif?filename[-4:]==‘.png‘:
????????image=image[::0:3]
????
????data=image.reshape((N3))
????return?data
#?計算歐式距離
def?calDis(ab):
????return?np.sum(np.power(a-b2))
#?????return?np.sum(np.fabs(a-b))
#?初始化聚類中心向量
def?initCenter(dataKseed=None):
????if?seed!=None:
????????random.seed(seed)
????????
????m?=?np.shape(data)[1]
????center?=?np.mat(np.zeros((K?m)))
????for?col?in?range(m):
????????min_col?=?min(data[:?col])
????????max_col?=?max(data[:?col])
????????center[:?col]?=?min_col?+?float((max_col-min_col))?*?np.random.rand(K?1)
????return?center
#?????return?random.sample(list(data)K)
def?myKMeans(dataKseed=None):
#???parameters?check
????if?K<=0:
????????raise?ValueError(“Invalid?number?of?initializations:?K?must?be?bigger?than?0“)
????if?data.ndim!=2:
????????raise?ValueError(“Invalid?number?of?initializations:?data‘s?dimention?must?be?2“)
????if?data.shape[0]<=0?or?data.shape[1]<=0:
????????raise?ValueError(“Invalid?number?of?initializations:?data?can?not?be?empty“)
????
????N=data.shape[0]
????#labels存儲每個像素點對應的聚類中心向量
????labels=np.zeros(N)
????#centers存儲聚類中心向量值
????centers=initCenter(dataKseed)
????print(centers)
????
????#損失函數值
????J=np.Inf
????dis_sum=0
????is_continue=True
????while?is_continue:
????????dis_sum=0
????????is_continue=False
????????#?計算各個像素點距離哪個聚類中心最近
????????for?i?in?range(N):
????????????min_dis=np.Inf
????????????min_center=-1
????????????for?j?in?range(K):
??
- 上一篇:Python淘寶評論爬取
- 下一篇:Python實現PCA
評論
共有 條評論