Hadoop MapReduce程序的模板框架
生活随笔
收集整理的這篇文章主要介紹了
Hadoop MapReduce程序的模板框架
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
這里放了兩個Hadoop MapReduce程序的模板框架,包括一些基本的包import語句、Mapper基類、Reducer基類、map()方法、reduce()方法,后面還有一些作業job的驅動程序,具體說是配置作業名、配置Mapper類、Reducer類、Combiner類的類名等等。
寫MR程序時,程序員需要實現相應的map()函數和reduce()函數。
一、
/** MapReduce程序模板,一些必要的語句* 寫MR程序時,復制該文件,修改類名,實現相應的map、reduce函數等 */import java.io.IOException; import java.util.StringTokenizer; // 分隔字符串 import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; // 相當于int類型 import org.apache.hadoop.io.LongWritable; // 相當于long類型 import org.apache.hadoop.io.Text; // 相當于String類型 import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class HadoopMRTemplate extends Configured implements Tool{public static class MapTemplate extends Mapper<LongWritable, Text, Text, IntWritable> { // TODO: some preprocessing operations before map() functionpublic void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {// map函數中參數key是偏移量,value是每一行的內容// TODO: implements map() function} // map( )} // class MapTemplatepublic static class ReduceTemplate extends Reducer<Text, IntWritable, Text, IntWritable> { //實現reduce函數// TODO: some preprocessing operations before reduce() functionpublic void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException {// TODO: implements reduce() function} // reduce( ) } // class ReduceTemplatepublic int run(String[] args) throws Exception {Job job = new Job(getConf()); job.setJobName("HadoopMRTemplate"); // 作業名job.setOutputKeyClass(Text.class); // 類名.class生成class對象job.setOutputValueClass(IntWritable.class);job.setMapperClass(MapTemplate.class); job.setReducerClass(ReduceTemplate.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); // 作業的輸入路徑FileOutputFormat.setOutputPath(job, new Path(args[1])); // 作業的輸出路徑return (job.waitForCompletion(true)? 0 : 1); } //run()public static void main(String[] args) throws Exception { // 調用ToolRunner.run( )int exitCode = ToolRunner.run(new HadoopMRTemplate(), args); System.exit(exitCode); } //main() } // class HadoopMRTemplate二、
/** MapReduce程序模板,一些必要的語句* 寫MR程序時,復制該文件,修改類名,實現相應的map、reduce函數等 */import java.io.IOException; import java.util.StringTokenizer; // 分隔字符串import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; // 相當于int類型 import org.apache.hadoop.io.Text; // 相當于String類型 import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser;public class HadoopMRTemplate2 {public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{// TODO: some preprocessing operations before map() functionpublic void map(Object key, Text value, Context context) throws IOException, InterruptedException {// map函數中參數key是偏移量,value是每一行的內容// TODO: implements map() function} //map()} // class TokenizerMapperpublic static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {// TODO: some preprocessing operations before map() functionpublic void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {// TODO: implements reduce() function} // reduce()} // class IntSumReducerpublic static void main(String[] args) throws Exception {Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();if (otherArgs.length != 2) {System.err.println("Usage: wordcount <in> <out>");System.exit(2);}Job job = new Job(conf, "HadoopMRTemplate2"); // 作業名job.setJarByClass(HadoopMRTemplate2.class);job.setMapperClass(TokenizerMapper.class);job.setCombinerClass(IntSumReducer.class);job.setReducerClass(IntSumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job, new Path(otherArgs[0])); // 作業的輸入路徑FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); // 作業的輸出路徑System.exit(job.waitForCompletion(true) ? 0 : 1);} //main() } // class HadoopMRTempalte2總結
以上是生活随笔為你收集整理的Hadoop MapReduce程序的模板框架的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Python变量和对象类型速记手册
- 下一篇: 怎样在 Markdown 中使程序代码带