91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡(jiǎn)介

數(shù)據(jù)挖掘k-means k-medoids python代碼實(shí)現(xiàn) 含測(cè)試數(shù)據(jù)

資源截圖

代碼片段和文件信息

from?numpy?import?*
import?matplotlib.pyplot?as?plt


#?calculate?Euclidean?distance?歐幾里得度量
def?euclDistance(vector1?vector2):
????return?sqrt(sum(power(vector2?-?vector1?2)))??#?求這兩個(gè)矩陣的距離,vector1、2均為矩陣

#?init?centroids?with?random?samples
#?在樣本集中隨機(jī)選取k個(gè)樣本點(diǎn)作為初始質(zhì)心
def?initCentroids(dataSet?k):
????numSamples?dim?=?dataSet.shape??#?矩陣的行數(shù)、列數(shù)
????centroids?=?zeros((k?dim))??#?感覺要不要你都可以
????for?i?in?range(k):
????????index?=?int(random.uniform(0?numSamples))??#?隨機(jī)產(chǎn)生一個(gè)浮點(diǎn)數(shù),然后將其轉(zhuǎn)化為int型
????????centroids[i?:]?=?dataSet[index?:]
????return?centroids


#?k-means?cluster
#?dataSet為一個(gè)矩陣
#?k為將dataSet矩陣中的樣本分成k個(gè)類
def?kmeans(dataSet?k):
????numSamples?=?dataSet.shape[0]??#?讀取矩陣dataSet的第一維度的長(zhǎng)度即獲得有多少個(gè)樣本數(shù)據(jù)
????#?first?column?stores?which?cluster?this?sample?belongs?to
????#?second?column?stores?the?error?between?this?sample?and?its?centroid
????clusterAssment?=?mat(zeros((numSamples?2)))??#?得到一個(gè)N*2的零矩陣
????clusterChanged?=?True

????##?step?1:?init?centroids
????centroids?=?initCentroids(dataSet?k)??#?在樣本集中隨機(jī)選取k個(gè)樣本點(diǎn)作為初始質(zhì)心

????while?clusterChanged:
????????clusterChanged?=?False
????????##?for?each?sample
????????for?i?in?range(numSamples):??#?range
????????????minDist?=?100000.0
????????????minIndex?=?0
????????????##?for?each?centroid
????????????##?step?2:?find?the?centroid?who?is?closest
????????????#?計(jì)算每個(gè)樣本點(diǎn)與質(zhì)點(diǎn)之間的距離,將其歸內(nèi)到距離最小的那一簇
????????????for?j?in?range(k):
????????????????x1=centroids[j?:]
????????????????x2=dataSet[i?:]
????????????????distance?=?euclDistance(centroids[j?:]?dataSet[i?:])
????????????????if?distance?????????????????????minDist?=?distance
????????????????????minIndex?=?j

????????????????????##?step?3:?update?its?cluster
????????????#?k個(gè)簇里面與第i個(gè)樣本距離最小的的標(biāo)號(hào)和距離保存在clusterAssment中
????????????#?若所有的樣本不在變化,則退出while循環(huán)
????????????if?clusterAssment[i?0]?!=?minIndex:
????????????????clusterChanged?=?True
????????????????clusterAssment[i?:]?=?minIndex?minDist?**?2??#?兩個(gè)**表示的是minDist的平方

????????##?step?4:?update?centroids?更新質(zhì)心
????????for?j?in?range(k):
????????????#?clusterAssment[:0].A==j是找出矩陣clusterAssment中第一列元素中等于j的行的下標(biāo),返回的是一個(gè)以array的列表,第一個(gè)array為等于j的下標(biāo)
????????????pointsInCluster?=?dataSet[nonzero(clusterAssment[:?0].A?==?j)[0]]??#?將dataSet矩陣中相對(duì)應(yīng)的樣本提取出來
????????????#?xx=clusterAssment[:?0].A?==?j
????????????#?yy=nonzero(clusterAssment[:?0].A?==?j);
????????????#?zz=nonzero(clusterAssment[:?0].A?==?j)[0];
????????????#print(findMedioio(pointsInCluster))
????????????centroids[j?:]?=?mean(pointsInCluster?axis=0)??#?計(jì)算標(biāo)注為j的所有樣本的平均值
????????????#centroids[j?:]?=?findMedioio(pointsInCluster)??#?計(jì)算標(biāo)注為j的所有樣本的平均值

????print(‘Congratulations?cluster?complete!‘)
????return?centroids?clusterAssment
#k-medoids中用于尋找質(zhì)心
def?findMedioio(pointsInCluster):
????minDist?=?10000000.0
????minIndex=0
????for?i?in?range(pointsInCluster.shape[0]):#循環(huán)作為質(zhì)心
????????currDist=0
????????for?j?in?range(pointsInCluster.shape[0]):
????????????currDist+=euclDistance(p

?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2019-01-06?15:43??k-means_k-medoids\
?????文件????????4859??2019-01-06?15:21??k-means_k-medoids\KMeans.py
?????文件????????4107??2019-01-02?19:04??k-means_k-medoids\KMeans2.py
?????文件????????1112??2019-01-06?15:42??k-means_k-medoids\MainPro.py
?????文件???????15288??2019-01-06?11:32??k-means_k-medoids\test.csv
?????文件????????1755??2019-01-01?16:17??k-means_k-medoids\testSet.txt
?????文件????????1126??2019-01-06?10:15??k-means_k-medoids\__init__.py
?????文件????????1139??2019-01-06?15:38??k-means_k-medoids\__init__2.py

評(píng)論

共有 條評(píng)論