intellij idea本地开发调试hadoop的方法

转载请注明出处： http://blog.csdn.net/programmer_wei/article/details/45286749

我的intellij idea版本是14，hadoop版本2.6，使用《hadoop权威指南》的天气统计源码作为示例。

下面附上源码，数据集在http://hadoopbook.com/code.html可以下载1901和1902两年数据：

[java]view plain copy
package com.hadoop.maxtemperature;  
  
import java.io.IOException;  
import org.apache.hadoop.io.IntWritable;  
import org.apache.hadoop.io.LongWritable;  
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapreduce.Mapper;  
  
public class MaxTemperatureMapper  
        extends Mapper<LongWritable, Text, Text, IntWritable> {  //注1  
    private static final int MISSING = 9999;  
    @Override  
    public void map(LongWritable key, Text value, Context context)  
            throws IOException, InterruptedException {  
        String line = value.toString();  
        String year = line.substring(15, 19);  
        int airTemperature;  
        if (line.charAt(87) == '+') { // parseInt doesn't like leading plus signs  
            airTemperature = Integer.parseInt(line.substring(88, 92));  
        } else {  
            airTemperature = Integer.parseInt(line.substring(87, 92));  
        }  
        String quality = line.substring(92, 93);  
        if (airTemperature != MISSING && quality.matches("[01459]")) {  
            context.write(new Text(year), new IntWritable(airTemperature));  
        }  
    }  
}  

[java]view plain copy
package com.hadoop.maxtemperature;  
  
  
import java.io.IOException;  
import org.apache.hadoop.io.IntWritable;  
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapreduce.Reducer;  
  
public class MaxTemperatureReducer  
        extends Reducer<Text, IntWritable, Text, IntWritable> {  
    @Override  
    public void reduce(Text key, Iterable<IntWritable> values,  
                       Context context)  
            throws IOException, InterruptedException {  
        int maxValue = Integer.MIN_VALUE;  
        for (IntWritable value : values) {  
            maxValue = Math.max(maxValue, value.get());  
        }  
        context.write(key, new IntWritable(maxValue));  
    }  
}  

[java]view plain copy
package com.hadoop.maxtemperature;  
  
  
import org.apache.hadoop.fs.Path;  
import org.apache.hadoop.io.IntWritable;  
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapreduce.Job;  
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
  
public class MaxTemperature {  
    public static void main(String[] args) throws Exception {  
        if (args.length != 2) {  
            System.err.println("Usage: MaxTemperature <input path> <output path>");  
            System.exit(-1);  
        }  
        Job job = new Job();  
        job.setJarByClass(MaxTemperature.class);  
        job.setJobName("Max temperature");  
  
        FileInputFormat.addInputPath(job, new Path(args[0]));  
        FileOutputFormat.setOutputPath(job, new Path(args[1]));  
  
        job.setMapperClass(MaxTemperatureMapper.class);  
        job.setReducerClass(MaxTemperatureReducer.class);  
  
        job.setOutputKeyClass(Text.class);              //注1  
        job.setOutputValueClass(IntWritable.class);  
  
        System.exit(job.waitForCompletion(true) ? 0 : 1);  
    }  
}