資源簡介
Rodriguez A, Laio A. Clustering by fast search and find of density peaks[J]. Science, 2014, 344(6191): 1492-1496.基于這篇文章實現的最基本的密度聚類的算法密度峰值聚類py代碼

代碼片段和文件信息
#!/usr/bin/env?python
#?-*-?coding:?UTF-8?-*-
import?sys
import?math
import?logging
import?numpy?as?np
logger?=?logging.getLogger(“dpc_cluster“)
def?load_paperdata(distance_f):
????‘‘‘
????Load?distance?from?data
????Args:
????????????distance_f?:?distance?file?the?format?is?column1-index?1?column2-index?2?column3-distance
????Returns:
????????distances?dict?max?distance?min?distance?max?continues?id
????‘‘‘
????logger.info(“PROGRESS:?load?data“)
????distances?=?{}
????min_dis?max_dis?=?sys.float_info.max?0.0
????max_id?=?0
????with?open(distance_f?‘r‘)?as?fp:
????????for?line?in?fp:
????????????x1?x2?d?=?line.strip().split(‘?‘)
????????????x1?x2?=?int(x1)?int(x2)
????????????max_id?=?max(max_id?x1?x2)
????????????dis?=?float(d)
????????????min_dis?max_dis?=?min(min_dis?dis)?max(max_dis?dis)
????????????distances[(x1?x2)]?=?float(d)
????????????distances[(x2?x1)]?=?float(d)
????for?i?in?xrange(max_id):
????????distances[(i?i)]?=?0.0
????logger.info(“PROGRESS:?load?end“)
????return?distances?max_dis?min_dis?max_id
def?select_dc(max_id?max_dis?min_dis?distances?auto=False):
????‘‘‘
????Select?the?local?density?threshold?default?is?the?method?used?in?paper?auto?is?‘autoselect_dc‘
????Args:
????????????max_id????:?max?continues?id
????????????max_dis???:?max?distance?for?all?points
????????????min_dis???:?min?distance?for?all?points
????????????distances?:?distance?dict
????????????auto??????:?use?auto?dc?select?or?not
????Returns:
????????dc?that?local?density?threshold
????‘‘‘
????logger.info(“PROGRESS:?select?dc“)
????if?auto:
????????return?autoselect_dc(max_id?max_dis?min_dis?distances)
????percent?=?2.0
????position?=?int(max_id?*?(max_id?+?1)?/?2?*?percent?/?100)
????dc?=?sorted(distances.values())[position?*?2?+?max_id]
????logger.info(“PROGRESS:?dc?-?“?+?str(dc))
????return?dc
def?autoselect_dc(max_id?max_dis?min_dis?distances):
????‘‘‘
????Auto?select?the?local?density?threshold?that?let?average?neighbor?is?1-2?percent?of?all?nodes.
????Args:
????????????max_id????:?max?continues?id
????????????max_dis???:?max?distance?for?all?points
????????????min_dis???:?min?distance?for?all?points
????????????distances?:?distance?dict
????Returns:
????????dc?that?local?density?threshold
????‘‘‘
????dc?=?(max_dis?+?min_dis)?/?2
????while?True:
????????nneighs?=?sum([1?for?v?in?distances.values()?if?v?????????if?nneighs?>=?0.01?and?nneighs?<=?0.02:
????????????break
????????#?binary?search
????????if?nneighs?0.01:
????????????min_dis?=?dc
????????else:
????????????max_dis?=?dc
????????dc?=?(max_dis?+?min_dis)?/?2
????????if?max_dis?-?min_dis?0.0001:
????????????break
????return?dc
def?local_density(max_id?distances?dc?guass=True?cutoff=False):
????‘‘‘
????Compute?all?points‘?local?density
????Args:
????????????max_id????:?max?continues?id
????????????distances?:?distance?dict
????????????gauss?????:?use?guass?func?or?not(can‘t?use?together?with?cutoff)
????????????cutoff??
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-12-08?04:35??DensityPeakCluster-master\
?????文件?????????552??2017-12-08?04:35??DensityPeakCluster-master\.gitignore
?????文件????????1072??2017-12-08?04:35??DensityPeakCluster-master\LICENSE
?????文件????????1053??2017-12-08?04:35??DensityPeakCluster-master\README.md
?????文件????????7765??2017-12-08?04:35??DensityPeakCluster-master\cluster.py
?????目錄???????????0??2017-12-08?04:35??DensityPeakCluster-master\data\
?????目錄???????????0??2017-12-08?04:35??DensityPeakCluster-master\data\data_in_paper\
?????文件????????4525??2017-12-08?04:35??DensityPeakCluster-master\data\data_in_paper\cluster_dp.m
?????文件????29771107??2017-12-08?04:35??DensityPeakCluster-master\data\data_in_paper\example_distances.dat
?????目錄???????????0??2017-12-08?04:35??DensityPeakCluster-master\data\data_iris_flower\
?????文件????????2399??2017-12-08?04:35??DensityPeakCluster-master\data\data_iris_flower\iris.data
?????文件??????174202??2017-12-08?04:35??DensityPeakCluster-master\data\data_iris_flower\iris.forcluster
?????文件????????2151??2017-12-08?04:35??DensityPeakCluster-master\data\data_iris_flower\iris.label
?????目錄???????????0??2017-12-08?04:35??DensityPeakCluster-master\distance\
?????文件?????????130??2017-12-08?04:35??DensityPeakCluster-master\distance\__init__.py
?????文件????????2946??2017-12-08?04:35??DensityPeakCluster-master\distance\distance.py
?????文件????????1358??2017-12-08?04:35??DensityPeakCluster-master\distance\distance_builder.py
?????文件?????????444??2017-12-08?04:35??DensityPeakCluster-master\distance\distance_builder_data_iris_flower.py
?????文件?????????319??2017-12-08?04:35??DensityPeakCluster-master\distance\error_wrongvec.py
?????文件????????2598??2017-12-08?04:35??DensityPeakCluster-master\plot.py
?????文件????????1712??2017-12-08?04:35??DensityPeakCluster-master\plot_utils.py
?????文件?????????706??2017-12-08?04:35??DensityPeakCluster-master\step1_choose_center.py
?????文件?????????933??2017-12-08?04:35??DensityPeakCluster-master\step2_cluster.py
評論
共有 條評論