在进行Map/Reduce时,有的业务需要在一个job中将数据写入到多个HBase的表中,下面是实现方式。

原文地址:http://lookfirst.com/2011/07/hbase-multitableoutputformat-writing-to.html

HBase MultiTableOutputFormat writing to multiple tables in one Map Reduce Job

 
static class TsvImporter extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
    @Override
    public void map(LongWritable offset, Text value, Context context) throws IOException {
        // contains the line of tab separated data we are working on (needs to be parsed out).
        byte[] lineBytes = value.getBytes();

        // rowKey is the hbase rowKey generated from lineBytes
        Put put = new Put(rowKey);
        // Create your KeyValue object
        put.add(kv);
        context.write("actions", put); // write to the actions table

        // rowKey2 is the hbase rowKey
        Put put = new Put(rowKey2);
        // Create your KeyValue object
        put.add(kv);
        context.write("actions_index", put); // write to the actions table
    }
}

public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String pathStr = args[0];
    Path inputDir = new Path(pathStr);
    Job job = new Job(conf, "my_custom_job");
    job.setJarByClass(TsvImporter.class);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(TextInputFormat.class);
    
    // this is the key to writing to multiple tables in hbase
    job.setOutputFormatClass(MultiTableOutputFormat.class);
    job.setMapperClass(TsvImporter.class);
    job.setNumReduceTasks(0);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration());
    return job;
}
 

 

相关文章: