資源簡介
用MapReduce實現(xiàn)TF-IDF,Hadoop版本是2.7.7,參考某教程親自手寫的,可以運行,有問題可以留言

代碼片段和文件信息
package?com.sxt.hadoop.mr.tfidf;
import?org.apache.hadoop.conf.Configuration;
import?org.apache.hadoop.fs.FileSystem;
import?org.apache.hadoop.fs.Path;
import?org.apache.hadoop.io.IntWritable;
import?org.apache.hadoop.io.Text;
import?org.apache.hadoop.mapreduce.Job;
import?org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import?org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public?class?FirstJob?{
static?{
????try?{
???? System.load(“C:\\Windows\\System32\\hadoop.dll“);
????}?catch?(UnsatisfiedlinkError?e)?{
??????System.err.println(“Native?code?library?failed?to?load.\n“?+?e);
??????System.exit(1);
????}
}
public?static?void?main(String[]?args)?{
Configuration?conf?=?new?Configuration();
conf.set(“mapreduce.app-submission.coress-paltform“?“true“);
conf.set(“mapreduce.framework.name“?“l(fā)ocal“);
try?{
FileSystem?fs?=?FileSystem.get(conf);
Job?job?=?Job.getInstance(conf);
job.setJarByClass(FirstJob.class);
job.setJobName(“weibo1“);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(4);
job.setPartitionerClass(FirstPartition.class);
job.setMapperClass(FirstMapper.class);
job.setCombinerClass(FirstReduce.class);
job.setReducerClass(FirstReduce.class);
FileInputFormat.addInputPath(job?new?Path(“/data/weibo.txt“));
Path?path?=?new?Path(“/data/output/weibo1“);
if?(fs.exists(path))?{
fs.delete(path?true);
}
FileOutputFormat.setOutputPath(job?path);
boolean?f?=?job.waitForCompletion(true);
if?(f)?{
}
}?catch?(Exception?e)?{
e.printStackTrace();
}
}
}
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2019-05-18?01:12??tfidf\
?????文件????????1636??2019-04-30?11:04??tfidf\FirstJob.java
?????文件????????1471??2019-04-30?10:30??tfidf\FirstMapper.java
?????文件?????????522??2019-03-30?19:57??tfidf\FirstPartition.java
?????文件?????????777??2019-04-30?10:33??tfidf\FirstReduce.java
?????文件????????2245??2019-05-18?01:31??tfidf\LastJob.java
?????文件????????3704??2019-05-18?01:45??tfidf\LastMapper.java
?????文件?????????582??2019-04-30?13:03??tfidf\LastReduce.java
?????文件????????1814??2019-05-18?01:31??tfidf\TwoJob.java
?????文件????????1269??2019-05-18?01:35??tfidf\TwoMapper.java
?????文件?????????421??2019-05-18?01:17??tfidf\TwoPartition.java
?????文件?????????535??2019-03-30?19:57??tfidf\TwoReduce.java
評論
共有 條評論