資源簡介
對應于前面的MahoutCanopy.jar文件的源代碼,可以參考來看;前面的那個是工具,這個是源碼

代碼片段和文件信息
package?mahout.fansy.canopy;
import?java.io.IOException;
import?org.apache.hadoop.conf.Configuration;
import?org.apache.hadoop.fs.Path;
import?org.apache.hadoop.io.Text;
import?org.apache.hadoop.mapreduce.Job;
import?org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import?org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import?org.apache.hadoop.util.ToolRunner;
import?org.apache.mahout.clustering.Cluster;
import?org.apache.mahout.clustering.canopy.CanopyConfigKeys;
import?org.apache.mahout.clustering.iterator.ClusterWritable;
import?org.apache.mahout.common.AbstractJob;
import?org.apache.mahout.common.ClassUtils;
import?org.apache.mahout.common.HadoopUtil;
import?org.apache.mahout.common.commandline.DefaultOptionCreator;
import?org.apache.mahout.common.distance.DistanceMeasure;
import?org.apache.mahout.math.VectorWritable;
import?org.slf4j.Logger;
import?org.slf4j.LoggerFactory;
/**
?*?改編原mahout代碼,使輸入數(shù)據(jù)可以直接為文本
?*?主要改編的是Mapper的key和value的格式
?*?@author?Administrator
?*
?*/
public?class?CanopyDriver?extends?AbstractJob{
/**?
?*?@param?args
?*/
private?static?final?Logger?log?=?LoggerFactory.getLogger(CanopyDriver.class);
public?static?void?main(String[]?args)?throws?Exception?{
ToolRunner.run(new?Configuration()?new?CanopyDriver()?args);
}
@Override
public?int?run(String[]?arg0)?throws?Exception?{
addInputOption();
????addOutputOption();
????addOption(DefaultOptionCreator.distanceMeasureOption().create());
????addOption(DefaultOptionCreator.t1Option().create());
????addOption(DefaultOptionCreator.t2Option().create());
????addOption(DefaultOptionCreator.t3Option().create());
????addOption(DefaultOptionCreator.t4Option().create());
????addOption(DefaultOptionCreator.clusterFilterOption().create());
????addOption(DefaultOptionCreator.overwriteOption().create());
????addOption(DefaultOptionCreator.clusteringOption().create());
????addOption(DefaultOptionCreator.methodOption().create());
????addOption(DefaultOptionCreator.outlierThresholdOption().create());
????if?(parseArguments(arg0)?==?null)?{
??????return?-1;
????}
????Path?input?=?getInputPath();
????Path?output?=?getOutputPath();
????Configuration?conf?=?getConf();
????if?(hasOption(DefaultOptionCreator.OVERWRITE_OPTION))?{
??????HadoopUtil.delete(conf?output);
????}
????String?measureClass?=?getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
????double?t1?=?Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
????double?t2?=?Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
????double?t3?=?t1;
????if?(hasOption(DefaultOptionCreator.T3_OPTION))?{
??????t3?=?Double.parseDouble(getOption(DefaultOptionCreator.T3_OPTION));
????}
????double?t4?=?t2;
????if?(hasOption(DefaultOptionCreator.T4_OPTION))?{
??????t4?=?Double.parseDouble(getOption(DefaultOptionCreator.T4_OPTION));
????}
????int?clusterFilter?=?0;
????if?(hasOption(Defaul
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????6693??2013-07-23?16:48??CanopyDriver.java
?????文件????????2445??2013-07-23?17:21??CanopyMapper.java
?????文件????????2196??2013-07-23?17:21??CanopyReducer.java
評論
共有 條評論