資源簡介
在Hadoop集群中,用MapReduce分布式計算TFIDF
代碼片段和文件信息
package?eb.cloud.mapreduce.MR.guoruonan;
import?org.apache.hadoop.conf.Configuration;
import?org.apache.hadoop.fs.FileStatus;
import?org.apache.hadoop.fs.FileSystem;
import?org.apache.hadoop.fs.Path;
import?org.apache.hadoop.io.LongWritable;
import?org.apache.hadoop.io.Text;
import?org.apache.hadoop.mapreduce.Job;
import?org.apache.hadoop.mapreduce.Mapper;
import?org.apache.hadoop.mapreduce.Reducer;
import?org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import?org.apache.hadoop.mapreduce.lib.input.FileSplit;
import?org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import?org.apache.hadoop.util.GenericOptionsParser;
import?java.io.IOException;
import?java.util.ArrayList;
import?java.util.Collections;
import?java.util.Comparator;
public?class?Tfidf?{
????public?static?class?Mapper0?extends?Mapper?{
????????String?filename;
????????public?void?map(LongWritable?key?Text?value?Context?context)
????????????????throws?IOException?InterruptedException?{
????????????FileSplit?split?=?(FileSplit)?context.getInputSplit();
????????????filename?=?split.getPath().getName();
????????????String?newString?=?value.toString().toLowerCase();
????????????String?results[]?=?newString.split(“[^a-zA-Z]“);
????????????int?flag?=?0;
????????????for?(String?val?:?results)?{
????????????????if?(val.equals(““))
????????????????????continue;
????????????????context.write(new?Text(filename)?new?Text(val));
????????????}
????????}
????}
????public?static?class?Reducer0?extends?Reducer?{
????????public?void?reduce(Text?key?Iterable?values?Context?context)
????????????????throws?IOException?InterruptedException?{
????????????ArrayList?array?=?new?ArrayList();
????????????for?(Text?t?:?values)?{
????????????????array.add(t.toString());
????????????}
????????????for?(String?str?:?array)?{
????????????????context.write(new?Text(key.toString()?+?“?“+str)?new?Text(““
????????????????????????+?array.size()));
????????????????//?
????????????}
????????}
????}
????public?static?class?Mapper1?extends?Mapper?{
????????public?void?map(LongWritable?key?Text?value?Context?context)
????????????????throws?IOException?InterruptedException?{
????????????String?line?=?value.toString();
????????????int?index?=?line.indexOf(“ “);
????????????context.write(new?Text(line.substring(0?index))
????????????????????new?Text(line.substring(index?+?1)));
????????}
????}
????public?static?class?Reducer1?extends?Reducer?{
????????public?void?reduce(Text?key?Iterable?values?Context?context)
????????????????throws?IOException?InterruptedException?{
????????????ArrayList?array?=?new?ArrayList();
????????????int?ciNum?=?1;
????????????for?(Text?val?:?values)?{
????????????????array.add(val.toString());
????????????????ciNum?=
- 上一篇:java漢諾塔動畫實現
- 下一篇:jsr173_1.0_api.jar
評論
共有 條評論