資源簡介
仿照weka自帶的簡單K均值聚類算法,實現的一個簡單的模糊C均值聚類算法。
代碼片段和文件信息
/*
?*?SimpleFuzzyCMeans.java
?*?
?*?本算法是在weka自帶的SimpleKMeans算法的基礎上修改而成的
?*?在Eclipse平臺下運行實現。
?*?
?*?本算法保留了SimpleKMeans算法的基本構架,
?*?對其算法的核心代碼進行了修改,
?*?將核心代碼換為模糊C均值聚類算法。
?*?
?*?SimpleKMeans算法支持Euclidean和Manhattan距離,
?*?本算法出于簡便,只支持Euclidean距離。
?*?
?*?算法關于數據的預處理以及程序結果的輸出采用SimpleKMeans算法的代碼,
?*?并進行了少量的修改(主要是參數項的設置)。
?*?
?*?程序運行前先修改weka.gui包中的文件GenericobjectEditor.pros,
?*?在其語句“#?Lists?the?Clusterers?I?want?to?choose?from“下
?*?加入語句“weka.clusterers.SimpleFuzzyCMeans\“,然后保存。
?*?
?*?運行weka.gui包中的文件GUIChooser.java,
?*?便可打開weka的圖形界面,然后選擇數據,
?*?在其聚類算法模塊下選擇SimpleFuzzyCMeans,
?*?對算法的參數進行適當的設置,
?*?即可運行模糊C均值聚類算法。
?*
?*/
package?weka.clusterers;
import?weka.classifiers.rules.DecisionTableHashKey;
import?weka.core.Attribute;
import?weka.core.Capabilities;
import?weka.core.DistanceFunction;
import?weka.core.EuclideanDistance;
import?weka.core.Instance;
import?weka.core.Instances;
import?weka.core.Option;
import?weka.core.Utils;
import?weka.core.WeightedInstancesHandler;
import?weka.core.Capabilities.Capability;
import?weka.filters.Filter;
import?weka.filters.unsupervised.attribute.ReplaceMissingValues;
import?java.util.Enumeration;
import?java.util.HashMap;
import?java.util.Random;
import?java.util.Vector;
public?class?SimpleFuzzyCMeans?extends?RandomizableClusterer?implements
NumberOfClustersRequestable?WeightedInstancesHandler?{
/**
?*?for?serialization
?*/
static?final?long?serialVersionUID?=?3235809600124455123L;
/**
?*?replace?missing?values?in?training?instances
?*/
private?ReplaceMissingValues?m_ReplaceMissingFilter;
/**
?*?number?of?clusters?to?generate
?*/
private?int?m_NumClusters?=?2;
/**
?*?實例數據
?*/
private?Instances?instances;
/**
?*?holds?the?cluster?centroids
?*/
private?Instances?m_ClusterCentroids;
/**
?*?Holds?the?standard?deviations?of?the?numeric?attributes?in?each?cluster
?*/
private?Instances?m_ClusterStdDevs;
/**
?*?For?each?cluster?holds?the?frequency?counts?for?the?values?of?each
?*?nominal?attribute
?*/
private?int[][][]?m_ClusterNominalCounts;
private?int[][]?m_ClusterMissingCounts;
/**
?*?Stats?on?the?full?data?set?for?comparison?purposes?In?case?the?attribute
?*?is?numeric?the?value?is?the?mean?if?is?being?used?the?Euclidian?distance
?*?and?if?the?attribute?is?nominal?then?it‘s?mode?is?saved
?*/
private?double[]?m_FullMeansOrModes;
private?double[]?m_FullStdDevs;
private?int[][]?m_FullNominalCounts;
private?int[]?m_FullMissingCounts;
/**
?*?Display?standard?deviations?for?numeric?atts
?*/
private?boolean?m_displayStdDevs?=?true;
/**
?*?Replace?missing?values?globally?
?*/
private?boolean?m_dontReplaceMissing?=?false;
/**
?*?The?number?of?instances?in?each?cluster
?*/
private?int[]?m_ClusterSizes;
/**
?*?隸屬度矩陣
?*/
private?double[][]?membershipMatrix;
/**
?*?模糊系數
?*/
private?double?fuzzyCoefficient?=?(double)?2.0;
/**
?*?定義誤差精度
?*/
評論
共有 條評論