【发布时间】:2015-07-18 20:30:59
【问题描述】:
我正在尝试在 EMR 上运行以下代码,但它给出了上述异常。有谁知道可能出了什么问题?我正在使用 avro-tools-1.77 来编译我的架构。
经过一番研究,我开始觉得这可能是一个 avro 问题,可以通过使用 Maven 编译和编辑依赖项或将 amazon hadoop 版本更改为以前的版本来修复。但是,我从未使用过 Maven,而且更改 hadoop 版本会弄乱我的许多其他代码。
public class MapReduceIndexing extends Configured implements Tool{
static int number_of_documents;
static DynamoStorage ds = new DynamoStorage();
public static class IndexMapper extends Mapper<AvroKey<DocumentSchema>, NullWritable, Text, IndexValue>{
public void map(AvroKey<DocumentSchema> key, NullWritable value, Context context) throws IOException, InterruptedException {
System.out.println("inside map start");
//some mapper code e.g.
for(String word : all_words.keySet()){
context.write(new Text(word), iv);
}
System.out.println("inside map end");
}
}
public static class IndexReducer extends Reducer<Text, IndexValue, AvroKey<CharSequence>, AvroValue<Integer>> {
@Override
public void reduce(Text key, Iterable<IndexValue> iterable_values, Context context) throws IOException, InterruptedException {
System.out.println("inside reduce start");
//some reducer code
System.out.println("inside reduce end");
}
}
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "indexing");
job.setJarByClass(MapReduceIndexing.class);
job.setJobName("Making inverted index");
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setInputFormatClass(AvroKeyInputFormat.class);
job.setMapperClass(IndexMapper.class);
AvroJob.setInputKeySchema(job, DocumentSchema.getClassSchema());
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IndexValue.class);
job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
job.setReducerClass(IndexReducer.class);
AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));
return (job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception {
//setting input and output directories
AWSCredentials credentials = new BasicAWSCredentials("access key", "secret key");
AmazonS3 s3 = new AmazonS3Client(credentials);
ObjectListing object_listing = s3.listObjects(new ListObjectsRequest().withBucketName(args[2]));
number_of_documents = object_listing.getObjectSummaries().size();
int res = ToolRunner.run(new MapReduceIndexing(), args);
System.exit(res);
}}
【问题讨论】:
标签: java hadoop amazon-s3 avro amazon-emr