當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

Hadoop MapReduce程序的模板框架

發布時間：2025/3/21 编程问答 16 豆豆

生活随笔收集整理的這篇文章主要介紹了 Hadoop MapReduce程序的模板框架小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

這里放了兩個Hadoop MapReduce程序的模板框架，包括一些基本的包import語句、Mapper基類、Reducer基類、map()方法、reduce()方法，后面還有一些作業job的驅動程序，具體說是配置作業名、配置Mapper類、Reducer類、Combiner類的類名等等。

寫MR程序時，程序員需要實現相應的map()函數和reduce()函數。

一、

/** MapReduce程序模板，一些必要的語句* 寫MR程序時，復制該文件，修改類名，實現相應的map、reduce函數等 */import java.io.IOException; import java.util.StringTokenizer; // 分隔字符串 import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; // 相當于int類型 import org.apache.hadoop.io.LongWritable; // 相當于long類型 import org.apache.hadoop.io.Text; // 相當于String類型 import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class HadoopMRTemplate extends Configured implements Tool{public static class MapTemplate extends Mapper<LongWritable, Text, Text, IntWritable> { // TODO: some preprocessing operations before map() functionpublic void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {// map函數中參數key是偏移量，value是每一行的內容// TODO: implements map() function} // map( )} // class MapTemplatepublic static class ReduceTemplate extends Reducer<Text, IntWritable, Text, IntWritable> { //實現reduce函數// TODO: some preprocessing operations before reduce() functionpublic void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException {// TODO: implements reduce() function} // reduce( ) } // class ReduceTemplatepublic int run(String[] args) throws Exception {Job job = new Job(getConf()); job.setJobName("HadoopMRTemplate"); // 作業名job.setOutputKeyClass(Text.class); // 類名.class生成class對象job.setOutputValueClass(IntWritable.class);job.setMapperClass(MapTemplate.class); job.setReducerClass(ReduceTemplate.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); // 作業的輸入路徑FileOutputFormat.setOutputPath(job, new Path(args[1])); // 作業的輸出路徑return (job.waitForCompletion(true)? 0 : 1); } //run()public static void main(String[] args) throws Exception { // 調用ToolRunner.run( )int exitCode = ToolRunner.run(new HadoopMRTemplate(), args); System.exit(exitCode); } //main() } // class HadoopMRTemplate

二、

/** MapReduce程序模板，一些必要的語句* 寫MR程序時，復制該文件，修改類名，實現相應的map、reduce函數等 */import java.io.IOException; import java.util.StringTokenizer; // 分隔字符串import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; // 相當于int類型 import org.apache.hadoop.io.Text; // 相當于String類型 import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser;public class HadoopMRTemplate2 {public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{// TODO: some preprocessing operations before map() functionpublic void map(Object key, Text value, Context context) throws IOException, InterruptedException {// map函數中參數key是偏移量，value是每一行的內容// TODO: implements map() function} //map()} // class TokenizerMapperpublic static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {// TODO: some preprocessing operations before map() functionpublic void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {// TODO: implements reduce() function} // reduce()} // class IntSumReducerpublic static void main(String[] args) throws Exception {Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();if (otherArgs.length != 2) {System.err.println("Usage: wordcount <in> <out>");System.exit(2);}Job job = new Job(conf, "HadoopMRTemplate2"); // 作業名job.setJarByClass(HadoopMRTemplate2.class);job.setMapperClass(TokenizerMapper.class);job.setCombinerClass(IntSumReducer.class);job.setReducerClass(IntSumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job, new Path(otherArgs[0])); // 作業的輸入路徑FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); // 作業的輸出路徑System.exit(job.waitForCompletion(true) ? 0 : 1);} //main() } // class HadoopMRTempalte2

總結

以上是生活随笔為你收集整理的Hadoop MapReduce程序的模板框架的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： Python变量和对象类型速记手册
下一篇：怎样在 Markdown 中使程序代码带