在进行Map/Reduce时,有的业务需要在一个job中将数据写入到多个HBase的表中,下面是实现方式。
原文地址:http://lookfirst.com/2011/07/hbase-multitableoutputformat-writing-to.html
HBase MultiTableOutputFormat writing to multiple tables in one Map Reduce Job
static class TsvImporter extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> { @Override public void map(LongWritable offset, Text value, Context context) throws IOException { // contains the line of tab separated data we are working on (needs to be parsed out). byte[] lineBytes = value.getBytes(); // rowKey is the hbase rowKey generated from lineBytes Put put = new Put(rowKey); // Create your KeyValue object put.add(kv); context.write("actions", put); // write to the actions table // rowKey2 is the hbase rowKey Put put = new Put(rowKey2); // Create your KeyValue object put.add(kv); context.write("actions_index", put); // write to the actions table } } public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String pathStr = args[0]; Path inputDir = new Path(pathStr); Job job = new Job(conf, "my_custom_job"); job.setJarByClass(TsvImporter.class); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); // this is the key to writing to multiple tables in hbase job.setOutputFormatClass(MultiTableOutputFormat.class); job.setMapperClass(TsvImporter.class); job.setNumReduceTasks(0); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration()); return job; }