-
大小: 330KB文件類型: .rar金幣: 2下載: 0 次發布日期: 2021-06-02
- 語言: Python
- 標簽:
資源簡介
使用的是python3版本,自己編寫的,能夠完美運行,只需要運行主程序就行,數據啥的都準備好了

代碼片段和文件信息
import?random
import?numpy?as?np
import?pandas?as?pd
#用pandas的readcsv打開文件,除去最后一行的標簽,獲得2維data,不采樣將數據進行輸入
def?getdata(data):??
????f?=?pd.read_csv(data)
????data?=f.values
????truelabel?=?np.transpose([data[:-1]])
????newdata?=?np.delete(data-1axis?=?1)
????
????return(newdata)
#將數據采樣獲得的datamat轉化為numpy數組
def?sampledata(filename?k_samplegetdata=getdata):
????datamat?=[]
????data?=getdata(filename)
????SampleLine?=?random.sample([i?for?i?in?range(len(data))]?k_sample)
????for?i?in?SampleLine:
????????datamat.append(data[i])
????datamat?=np.array(datamat)
????return(datamat)
#計算兩個numpy向量的歐式距離
def?dist(AB):
????return?np.sqrt(np.sum(np.power(A?-?B?2)))
#初始化質心
def?randcent(datak):
????size?=?len(data)
????medoids_idx?=?random.sample([i?for?i?in?range(size)]?5)
????return(medoids_idx)
????
????
#定義代價,獲取質心,將樣本到質心的距離存放與distancecache中,減少運算并比較
distances_cache?=?{}
def?totalcost(datamedoids_idx):
????size?=?len(data)
????total_cost?=?0.0
????medoids?=?{}
????for?idx?in?medoids_idx:
????????medoids[idx]?=?[]
????for?i?in?range(size):
????????choice?=?None
????????min_cost?=?np.inf
????????for?m?in?medoids:
????????????tmp?=?distances_cache.get((m?i)?None)
????????????if?tmp?==?None:
????????????????tmp?=?dist(data[m]?data[i])
????????????????distances_cache[(m?i)]?=?tmp
????????????if?tmp?????????????????choice?=?m
????????????????min_cost?=?tmp
????????medoids[choice].append(i)
????????total_cost?+=?min_cost
????return(total_cost?medoids)
#k中心算法
def?PAM(data?k):
????size?=?len(data)
????medoids_idx?=?randcent(datak)
????pre_cost?medoids?=?totalcost(data?medoids_idx)
????
????current_cost?=?np.inf??#?maxmum?of?pearson_distances?is?2.
????best_choice?=?[]
????best_res?=?{}
????iter_count?=?0
????while?1:
????????for?m?in?medoids:
????????????for?item?in?medoids[m]:
????????????????if?item?!=?m:
????????????????????idx?=?medoids_idx.index(m)
????????????????????swap_temp?=?medoids_idx[idx]
????????????????????medoids_idx[idx]?=?item
????????????????????tmp?medoids_?=?totalcost(data?medoids_idx)
????????????????????#?print?tmp‘-------->‘medoids_.keys()
????????????????????if?tmp?????????????????????????best_choice?=?list(medoids_idx)
????????????????????????best_res?=?dict(medoids_)
????????????????????????current_cost?=?tmp
????????????????????medoids_idx[idx]?=?swap_temp
????????iter_count?+=?1
????????print(‘while循環次數:?‘iter_count)
????????
????????if?best_choice?==?medoids_idx:?break
????????if?current_cost?<=?pre_cost:
????????????pre_cost?=?current_cost
????????????medoids?=?best_res
????????????medoids_idx?=?best_choice
????return(current_cost?best_choice?best_res)
def?main():
????n=?int(input(‘對原數據的采樣數目n=:?‘))
????k?=?int(input(‘聚類數目K=?:‘))
????data?=?sampledata(‘waveform+noise.data‘n)
????totalcostclasslabelclasses?=a?bc?=PAM(datak)
????print(‘總距離是:?‘totalcost)
????print(‘
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????3404??2017-10-25?15:58??K中心data\PAM.py
?????文件????1077986??2017-10-24?21:33??K中心data\waveform+noise.data
?????目錄??????????0??2018-03-13?14:51??K中心data
-----------?---------??----------?-----??----
??????????????1081390????????????????????3
- 上一篇:python實現SVM
- 下一篇:基于Python的計算機網絡實驗設計
評論
共有 條評論