資源簡介
Kmeans算法的python3.5實現 帶數據可以直接運行

代碼片段和文件信息
from?numpy?import?*
from?matplotlib?import?pyplot?as?plt
#classify?data
def?loadDataSet(path):
????numFeat=len(open(path).readline().split(‘?‘))
????dataMat=[]
????labelMat=[]
????fr=open(path)
????for?line?in?fr.readlines():
????????lineArr=[]
????????curline=line.strip().split(‘?‘)
????????for?i?in?range(numFeat-1):
????????????lineArr.append(float(curline[i]))
????????dataMat.append(lineArr)
????????labelMat.append(curline[-1])
????return?dataMatlabelMat
#cluster?data
def?loadData(path):
????dataset=[]
????fr=open(path)
????for?line?in?fr.readlines():
????????lineArr=line.strip().split(‘\t‘)
????????dataset.append([float(lineArr[0])float(lineArr[1])])
????return?mat(dataset)
def?caeuclDistance(vect1?vect2):
????return?sqrt(sum(power(vect1-vect22)))
def?initCentroids(dataSetk):
????numSamplesdim=?dataSet.shape
????centroids=zeros((kdim))
????for?i?in?range(k):
????????index?=?int(random.uniform(0numSamples))
????????centroids[i:]=dataSet[index:]
????return?centroids
def?kmeans(datasetk):
????numSamples=dataset.shape[0]
????clusterAssment=mat(zeros((numSamples2)))
????clusterChange=True
????centroids=initCentroids(datasetk)
????while?clusterChange:
????????clusterChange=False
????????for?i?in?range(numSamples):
????????????minDist=100000.00
????????????clusterlabel=0
????????????for?j?in?range(k):
????????????????distance?=?caeuclDistance(centroids[j:]dataset[i:])
????????????????if?distance ????????????????????minDist=distance
????????????????????clusterlabel=j
????????????if?clusterAssment[i0]!=?clusterlabel:
????????????????clusterChange=True
????????????????clusterAssment[i:]=clusterlabelminDist**2
????????for?j?in?range(k):
????????????#why?
????????????pointsInCluster=dataset[nonzero(clusterAssment[:0].A==j)[0]]
????????????centroids[j:]=mean(pointsInClusteraxis=0)
????print(‘cluster?complete‘)
????return?centroidsclusterAssment
def?plot(dataSetkcentroidsclusterAssment):
????numSamplesdim?=?dataSet.shape
????if?dim?!=2:
????????print(‘Sorry!?I?can?not?draw?the?pictrue?bescause?the?dimension?of?your?data?is?not?2!‘)
????????return?1
????mark?=?[‘or‘?‘ob‘?‘og‘?‘ok‘?‘^r‘?‘+r‘?‘sr‘?‘dr‘?‘ ????if?k>len(mark):
????????print(‘Sorry!?Your?k?is?too?large!‘)
????????return?1
????for?i?in?range(numSamples):
????????markindex=int(clusterAssment[i0])
????????plt.plot(dataSet[i0]dataSet[i1]mark[markindex])
????markc=[‘Dr‘?‘Db‘?‘Dg‘?‘Dk‘?‘^b‘?‘+b‘?‘sb‘?‘db‘?‘????for?i?in?range(k):
????????plt.plot(centroids[i0]centroids[i1]mark[i]markersize=12)
????plt.show()
def?main():
????print(‘load?data....‘)
????filepath=‘..\data\data.txt‘
????dataset=loadData(filepath)
????k=4
????print(‘cluster?data‘)
????centroids?clusterAssment=kmeans(datasetk)
????print(‘show?the?result!‘)
????plot(datasetkcentroidsclusterAssment)
if?__name__?==?‘__main__‘:
????main()
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-08-06?20:42??kmeans\
?????目錄???????????0??2017-08-01?22:10??kmeans\.idea\
?????文件???????????6??2017-08-01?22:10??kmeans\.idea\.name
?????文件?????????164??2017-08-01?22:10??kmeans\.idea\encodings.xm
?????文件?????????284??2017-08-01?22:10??kmeans\.idea\kmeans.iml
?????文件????????1137??2017-08-01?22:10??kmeans\.idea\misc.xm
?????文件?????????264??2017-08-01?22:10??kmeans\.idea\modules.xm
?????目錄???????????0??2017-08-01?22:10??kmeans\.idea\scopes\
?????文件?????????143??2017-08-01?22:10??kmeans\.idea\scopes\scope_settings.xm
?????文件?????????164??2017-08-01?22:10??kmeans\.idea\vcs.xm
?????文件???????25780??2017-08-10?15:14??kmeans\.idea\workspace.xm
?????目錄???????????0??2017-08-06?20:54??kmeans\data\
?????文件????????1598??2017-08-06?20:55??kmeans\data\data.txt
?????目錄???????????0??2017-08-10?15:18??kmeans\src\
?????文件????????2975??2017-08-10?15:17??kmeans\src\kmeans.py
?????文件??????????30??2017-08-06?20:42??kmeans\src\__init__.py
評論
共有 條評論