将user表、group表、order表关;(类似于多表关联查询)
测试准备:
首先同步时间,然后 开启hdfs集群,开启yarn集群;在本地"/home/hadoop/test/"目录创建user表、group表、order表的文件;
user文件:
group文件:
order文件:
测试目标:
得到3张表关联后的结果;
测试代码:
一定要把握好输出键值的类型,否则有可能造成有输出目录,但是没有文件内容的问题;
package com.mmzs.bigdata.yarn.mapreduce; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileSplit; public class UserGroupMapper01 extends Mapper<LongWritable, Text, Text, Text> { private Text outKey; private Text outValue; @Override protected void setup(Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { outKey = new Text(); outValue = new Text(); } @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { FileSplit fp = (FileSplit) context.getInputSplit(); String fileName = fp.getPath().getName(); String line = value.toString(); String[] fields = line.split("\\s+"); String keyStr = null; String valueStr = null; if ("group".equalsIgnoreCase(fileName)) { keyStr = fields[0]; valueStr = new StringBuilder(fields[1]).append("-->").append(fileName).toString(); } else { keyStr = fields[2]; //加“-->”;后以此标识符作为分割符,进行文件区分 valueStr = new StringBuilder(fields[0]).append("\t").append(fields[1]).append("-->").append(fileName).toString(); } outKey.set(keyStr); outValue.set(valueStr); context.write(outKey, outValue); } @Override protected void cleanup(Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { outKey = null; outValue = null; } }