hadoop并行化和非并行化的kmeans算法.zip

大小: 5KB

文件類型: .zip

金幣: 2

下載: 2 次

發布日期: 2021-12-17
語言: 其他
標簽: kmeans??hadoop??java??map/reduce??山東大學??

高速下載

資源簡介

包含兩種平臺上運行的kmeans算法：一種是在Hadoop系統上的并行化kmeans算法，支持讀文件，執行聚類算法，輸出質心文件，將每個數據的聚類信息輸出到控制臺上；另一種是串行的聚類算法，支持讀文件數據，執行kmeans算法，將每個數據的聚類信息輸出到文件中。代碼注釋清晰。

資源截圖

小圖大圖

代碼片段和文件信息

package?com.kmeans;

import?java.io.IOException;

import?org.apache.hadoop.conf.Configuration;
import?org.apache.hadoop.fs.FSDataInputStream;
import?org.apache.hadoop.fs.FileStatus;
import?org.apache.hadoop.fs.FileSystem;
import?org.apache.hadoop.fs.Path;
import?org.apache.hadoop.io.DoubleWritable;
import?org.apache.hadoop.io.LongWritable;
import?org.apache.hadoop.io.NullWritable;
import?org.apache.hadoop.io.Text;
import?org.apache.hadoop.io.WritableComparable;
import?org.apache.hadoop.io.WritableComparator;
import?org.apache.hadoop.mapreduce.Counter;
import?org.apache.hadoop.mapreduce.Job;
import?org.apache.hadoop.mapreduce.Mapper;
import?org.apache.hadoop.mapreduce.Reducer;
import?org.apache.hadoop.mapreduce.Reducer.Context;
import?org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import?org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import?org.apache.hadoop.util.LineReader;

class?Center{
	protected?static?int?k?=?3;		//質心的個數
	protected?static?int?dimension?=?2;?//數據的維度
	
	//從初始的質心文件中加載質心，并返回質心文件字符串，質心之間用tab分割
	public?String?loadInitCenter（Path?path）?throws?IOException?{
		StringBuffer?sb?=?new?StringBuffer（）;
		Configuration?conf?=?new?Configuration（）;
		FileSystem?hdfs?=?FileSystem.get（conf）;
		FSDataInputStream?dis?=?hdfs.open（path）;
		LineReader?in?=?new?LineReader（dis?conf）;
		Text?line?=?new?Text（）;
		while（in.readLine（line）?>?0）?{
			sb.append（line.toString（）.trim（））;//trim（）:去掉字符串兩端多余的空格
			sb.append（“\t“）;
		}
		return?sb.toString（）.trim（）;
	}
	//從每次迭代的質心文件里讀取質心，并返回字符串
	public?String?loadCenter（Path?path）?throws?IOException?{
		StringBuffer?sb?=?new?StringBuffer（）;
		Configuration?conf?=?new?Configuration（）;
		FileSystem?hdfs?=?FileSystem.get（conf）;
		//獲取文件列表
		FileStatus[]?files?=?hdfs.listStatus（path）;
		for（int?i?=?0;?i?			Path?filePath?=?files[i].getPath（）;
			if（!filePath.getName（）.contains（“part“））?continue;
			FSDataInputStream?dis?=?hdfs.open（filePath）;
			LineReader?in?=?new?LineReader（dis?conf）;
			Text?line?=?new?Text（）;
			while（in.readLine（line）?>?0）?{
				sb.append（line.toString（）.trim（））;
				sb.append（“\t“）;
			}
		}
		return?sb.toString（）.trim（）;
	}
}

public?class?Kmeans?{
	private?static?String?FLAG?=?“a“;//用于存聚類中心信息
	
	//計算兩個向量之間的?歐式距離
	public?static?double?distance（double[]?a?double[]?b）?{
		if（a?==?null?||?b?==?null?||?a.length?!=?b.length）?return?Double.MAX_VALUE;
		double?d?=?0;
		for（int?i?=?0;?i?			d?+=?Math.pow（a[i]?-?b[i]?2）;
		}
		return?Math.sqrt（d）;
	}
	
	public?static?class?mapper?extends?Mapperject?Text?Text?Text>{
		
		double[][]?centers?=?new?double[Center.k][];//存儲每個簇中心的信息
		String[]?centerstrArray?=?null;//用于存儲聚類中心的字符串連接信息
		
		public?void?setup（Context?context）?{
			//將放在context中的聚類中心轉換為數組的形式，方便使用
			String?kmeansS?=?context.getConfiguration（）.get（FLAG）;
			centerstrArray?=?kmeansS.split（“\t“）;
			
			for（int?i?=?0;?i?				String[]?segs?=?centerstrArray[i].split（““）;
				centers[i]?=?new?double[segs

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2019-05-27?02:44??hadoop并行化和非并行化的kmeans算法\
?????目錄???????????0??2019-05-27?02:44??hadoop并行化和非并行化的kmeans算法\并行化kmeans算法\
?????文件????????7808??2019-05-26?11:49??hadoop并行化和非并行化的kmeans算法\并行化kmeans算法\Kmeans.java
?????目錄???????????0??2019-05-27?02:44??hadoop并行化和非并行化的kmeans算法\非并行化kmeans算法\
?????文件????????4515??2019-05-26?12:27??hadoop并行化和非并行化的kmeans算法\非并行化kmeans算法\Kmeans.java

上一篇：opencv_3rdparty中所有ffmpeg庫
下一篇：職工工資管理系統

91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

hadoop并行化和非并行化的kmeans算法.zip

資源簡介

資源截圖

代碼片段和文件信息

評論

相關資源