資源簡介
要求是對鳶尾花數據集IRIS做聚類分析,課上講了幾種算法:系統,分解,c均值及其改進,ISODATA,這里分享一下自己的思路與代碼。不過感覺提到聚類首先想到的是k均值、meanshift這些算法,可能課堂上講的都是些最經典最基礎的吧。
代碼片段和文件信息
import?csv
import?numpy?as?np
dataname?=?‘iris.csv‘
#?讀數據,構建列表[[][][]]
def?loadIRISdata(filename):
????data=[]
????with?open(filename‘r‘)?as?f:
????????reader=csv.reader(f)
????????result=list(reader)
????????for?i?in?range(1151):
????????????data.append(list(map(floatresult[i][1:5])))
????return?data
#?計算兩個向量的歐氏距離
def?eularDist(ab):
????return?np.sqrt(np.sum((np.array(a)-np.array(b))**2))
#?計算新的聚類中心
def?centerGet(datakassignments):
????sum?=?[]
????tag?=?0
????for?i?in?range(150):
????????if?assignments[i]?==?k:
????????????sum.append(data[i])
????????????tag?+=?1
????return?np.sum(sum?0)?/?tag
def?c_means_clustering(data?assignments?c0?c1?c2):
????count?=?0
????while?True:
????????for?i?in?range(150):
????????????#?計算每個樣本與三個聚類中心的歐氏距離,并取最小值歸類
????????????d?=?[]
????????????d.append(eularDist(data[i]?c0))
????????????d.append(eularDist(data[i]?c1))
????????????d.append(eularDist(data[i]?c2))
????????????d?=?np.array(d)
????????????assignments[i]=d.argmin()
????????new_c0=centerGet(data0assignments)
????????new_c1=centerGet(data1assignments)
????????new_c2=centerGet(data2assignments)
????????if?all(c0==new_c0)?and?all(c1==new_c1)?and?all(c2==new_c2):
????????????return?assignments
????????else:
????????????c0=new_c0
????????????c1=new_c1
????????????c2=new_c2
????????count+=1
????????print(‘這是第‘count‘次循環‘)
#?計算聚類的準確度
def?acc(result):
????sum?=?0
????all?=?0
????for?i?in?range(50):
????????if?result[i][0]?==?0:
????????????sum?+=?1
????????all?+=?1
????for?i?in?range(50):
????????if?result[i?+?50][0]?==?1:
????????????sum?+=?1
????????all?+=?1
????for?i?in?range(50):
????????if?result[i?+?100][0]?==?2:
????????????sum?+=?1
????????all?+=?1
????print(‘正確聚類的結果數量:‘?sum?‘總數:‘?all)
????return?sum?all
if?__name__?==?“__main__“:
????data?=?loadIRISdata(dataname)
????assignments?=?[]
????for?i?in?range(150):
????????assignments.append(0)??#?先全部歸到?0?類
????#?選取?3?個?初始的聚類中心
????‘‘‘
????c0=[5.04.01.50.3]
????c1=[6.03.04.51.5]
????c2=[6.03.05.52.0]‘‘‘
????#?換一組初始中心
????‘‘‘
????c0=data[0]
????c1=data[1]
????c2=data[2]‘‘‘
????#?再換一組初始中心
????c0=data[60]
????c1=data[70]
????c2=data[80]
????answer=c_means_clustering(data?assignments?c0?c1?c2)
????result?=?list(zip(answer?data))
????for?i?in?range(3):
????????tag?=?0
????????print(‘\n‘)
????????print(“第%d類數據有:“?%?(i?+?1))
????????for?tuple?in?range(len(result)):
????????????if?(result[tuple][0]?==?i):
????????????????print(tuple?end=‘?‘)
????????????????tag?+=?1
????????????if?tag?>?20:
????????????????print(‘\n‘)
????????????????tag?=?0
????print(‘\n‘)
????sum?all?=?acc(result)
????print(‘分解聚類法準確度為:%2f%%‘?%?((sum?/?all)?*?100))
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2020-06-12?16:53??iris_cluster\
?????目錄???????????0??2020-07-02?00:15??iris_cluster\.idea\
?????目錄???????????0??2020-05-27?11:02??iris_cluster\.idea\inspectionProfiles\
?????文件?????????174??2020-05-27?11:02??iris_cluster\.idea\inspectionProfiles\profiles_settings.xm
?????文件?????????361??2020-05-27?11:02??iris_cluster\.idea\iris_cluster.iml
?????文件?????????203??2020-05-27?11:02??iris_cluster\.idea\misc.xm
?????文件?????????283??2020-05-27?11:02??iris_cluster\.idea\modules.xm
?????文件???????10770??2020-07-02?00:15??iris_cluster\.idea\workspace.xm
?????文件????????3020??2020-05-31?12:55??iris_cluster\cmeans_cluster.py
?????文件????????4946??2020-06-01?22:26??iris_cluster\cmeans_cluster_2.py
?????文件????????4334??2020-05-27?17:12??iris_cluster\fenjie_cluster.py
?????文件????????5912??2020-06-01?20:19??iris_cluster\iosdata_cluster.py
?????文件????????4972??2020-05-27?10:41??iris_cluster\iris.csv
?????文件????????2702??2020-05-27?11:34??iris_cluster\iris.txt
?????文件??????????38??2020-06-12?16:53??iris_cluster\test.py
?????目錄???????????0??2020-05-27?11:00??iris_cluster\venv\
?????目錄???????????0??2020-07-02?10:01??iris_cluster\venv\Include\
?????目錄???????????0??2020-05-27?11:00??iris_cluster\venv\Lib\
?????目錄???????????0??2020-05-29?20:43??iris_cluster\venv\Lib\site-packages\
?????目錄???????????0??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\PyYAML-5.3.1.dist-info\
?????文件???????????4??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\PyYAML-5.3.1.dist-info\INSTALLER
?????文件????????1101??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\PyYAML-5.3.1.dist-info\LICENSE
?????文件????????1690??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\PyYAML-5.3.1.dist-info\me
?????文件????????3372??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\PyYAML-5.3.1.dist-info\RECORD
?????文件?????????106??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\PyYAML-5.3.1.dist-info\WHEEL
?????目錄???????????0??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\__pycache__\
?????文件???????20763??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\__pycache__\appdirs.cpython-37.pyc
?????文件???????13303??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\__pycache__\cfgv.cpython-37.pyc
?????文件????????9556??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\__pycache__\filelock.cpython-37.pyc
?????文件???????37345??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\__pycache__\nodeenv.cpython-37.pyc
?????文件??????242445??2020-05-29?20:33??iris_cluster\venv\Lib\site-packages\__pycache__\pyparsing.cpython-37.pyc
............此處省略3198個文件信息
評論
共有 條評論