【发布时间】:2020-05-20 01:38:35
【问题描述】:
我试图使用我自己的 Class 对象作为我的 Mapper 的输出值并在 Reducer 中使用它们,但没有调用 reduce() 方法,如果我删除默认构造函数,我的应用程序将被终止日期收入类。我的代码如下:
司机:
package it.polito.bigdata.hadoop.lab;
import com.sun.xml.internal.ws.policy.privateutil.PolicyUtils;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.File;
/**
* MapReduce program
*/
public class DriverBigData extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
int exitCode = 0;
FileUtils.forceDelete(new File("output/"));
Path inputPath = new Path("input/");
Path outputPath = new Path("output");
int numberOfReducer = 1;
//FileUtils.forceDelete(new File(String.valueOf(outputPath.isUriPathAbsolute())));
Configuration configuration = this.getConf();
Job job = Job.getInstance(configuration);
job.setJobName("myJob");
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
job.setJarByClass(DriverBigData.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(MapperBigData.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DateIncome.class);
job.setReducerClass(ReducerBigData.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
// job.setCombinerClass(CombinerBigData.class);
job.setNumReduceTasks(numberOfReducer);
// Execute the job and wait for completion
if (job.waitForCompletion(true))
exitCode = 0;
else
exitCode = 1;
return exitCode;
}
/**
* Main of the driver
*/
public static void main(String args[]) throws Exception {
// Exploit the ToolRunner class to "configure" and run the Hadoop application
int res = ToolRunner.run(new Configuration(), new DriverBigData(), args);
System.exit(res);
}
}
映射器:
package it.polito.bigdata.hadoop.lab;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import javax.swing.plaf.synth.ColorType;
/**
* Lab - Mapper
*/
/* Set the proper data types for the (key,value) pairs */
class MapperBigData extends Mapper<
Text, // Input key type
Text, // Input value type
Text, // Output key type
DateIncome> {// Output value type
protected void map(
Text key, // Input key type
Text value, // Input value type
Context context) throws IOException, InterruptedException {
try {
DateIncome income = new DateIncome(key.toString(),Float.parseFloat(value.toString()));
context.write(key, income);
}catch (Exception e){
System.err.println(e.toString());
}
}
}
减速机:
package it.polito.bigdata.hadoop.lab;
import java.io.IOException;
import java.util.*;
import com.google.common.collect.Multimap;
import javafx.util.Pair;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
/**
* Lab - Reducer
*/
/* Set the proper data types for the (key,value) pairs */
class ReducerBigData extends Reducer<
Text, // Input key type
DateIncome, // Input value type
Text, // Output key type
FloatWritable> { // Output value type
float maxIncome = 0;
String maxDAte = "";
@Override
protected void reduce(
Text key, // Input key type
Iterable<DateIncome> values, // Input value type
Context context) throws IOException, InterruptedException {
System.out.println("reducer");
for (DateIncome dateIncome : values) {
System.out.println(dateIncome.getDate() + " " + dateIncome.getIncome());
if (maxIncome <= dateIncome.getIncome()) {
maxIncome = dateIncome.getIncome();
maxDAte = dateIncome.getDate();
}
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
super.cleanup(context);
context.write(new Text(maxDAte), new FloatWritable(maxIncome));
}
}
日期收入:
package it.polito.bigdata.hadoop.lab;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class DateIncome implements Writable {
private String date;
private float income;
public DateIncome() {
}
public DateIncome(String date, float income) {
this.date = date;
this.income = income;
}
public String getDate() {
return date;
}
public void setDate(String dateValue) {
date = dateValue;
}
public float getIncome() {
return income;
}
public void setIncome(float incomeValue) {
income = incomeValue;
}
@Override
public void readFields(DataInput in) throws IOException {
income = in.readFloat();
date = in.readUTF();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeFloat(income);
out.writeUTF(date);
}
public String toString() {
return new String("date:" + date + " income:" + income);
}
}
输入.txt:
2015-11-01 1000
2015-11-02 1305
2015-12-01 500
2015-12-02 750
2016-01-01 345
2016-01-02 1145
2016-02-03 200
2016-02-04 500
输出:
2015-11-02 1305.0
所以,我的问题是,如果我删除 DateIncome 类的默认构造函数,则不会调用 reducer 的 reduce() 方法。为什么 Hadoop 需要默认构造函数,尽管提供了另一个构造函数?
【问题讨论】: