【发布时间】:2015-08-08 01:32:00
【问题描述】:
我正在尝试通过 MapReduce 作业将 MongoDb 中的集合文档导入 HDFS。我正在使用旧的 Api。这是驱动代码
package my.pac;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.mongodb.hadoop.mapred.MongoInputFormat;
import com.mongodb.hadoop.util.MongoConfigUtil;
public class ImportDriver extends Configured implements Tool {
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new ImportDriver(), args);
System.exit(exitCode);
}
@Override
public int run(String[] args) throws Exception {
JobConf conf = new JobConf();
MongoConfigUtil.setInputURI(conf,"mongodb://127.0.0.1:27017/SampleDb.shows");
conf.setJarByClass(ImportDriver.class);
conf.addResource(new Path("/usr/lib/hadoop/hadoop-1.2.1/conf/core-site.xml"));
conf.addResource(new Path("/usr/lib/hadoop/hadoop-1.2.1/conf/hdfs-site.xml"));
FileOutputFormat.setOutputPath(conf, new Path(args[0]));
conf.setInputFormat(MongoInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setMapperClass(ImportMapper.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputKeyClass(Text.class);
JobClient.runJob(conf);
return 0;
}
}
这是我的映射器代码:
package my.pac;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.bson.BSONObject;
import com.mongodb.hadoop.io.BSONWritable;
public class ImportMapper extends MapReduceBase implements Mapper<BSONWritable, BSONWritable, Text, Text>{
@Override
public void map(BSONWritable key, BSONWritable value,
OutputCollector<Text, Text> o, Reporter arg3)
throws IOException {
String val = ((BSONObject) value).get("_id").toString();
System.out.println(val);
o.collect( new Text(val), new Text(val));
}
}
我正在使用
- Ubuntu-14.0
- Hadoop-1.2.1
- MongoDb-3.0.4
我添加了以下罐子:
- mongo-2.9.3.jar
- mongo-hadoop-core-1.3.0.jar
- mongo-java-driver-2.13.2.jar
当我运行它时,我收到这样的错误:
java.lang.Exception: java.lang.ClassCastException: com.mongodb.hadoop.io.BSONWritable cannot be cast to org.bson.BSONObject
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:354)
Caused by: java.lang.ClassCastException: com.mongodb.hadoop.io.BSONWritable cannot be cast to org.bson.BSONObject
at my.pac.ImportMapper.map(ImportMapper.java:18)
at my.pac.ImportMapper.map(ImportMapper.java:1)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:430)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:366)
at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:223)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
我该如何解决这个问题?
【问题讨论】: