将user表、group表、order表关;(类似于多表关联查询)

 

测试准备:

首先同步时间,然后 开启hdfs集群,开启yarn集群;在本地"/home/hadoop/test/"目录创建user表、group表、order表的文件;

user文件:

YARN集群的mapreduce测试(三)

group文件:

YARN集群的mapreduce测试(三)

order文件:

 YARN集群的mapreduce测试(三)

YARN集群的mapreduce测试(三)

测试目标:

得到3张表关联后的结果;

 

测试代码

一定要把握好输出键值的类型,否则有可能造成有输出目录,但是没有文件内容的问题;

package com.mmzs.bigdata.yarn.mapreduce;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

public class UserGroupMapper01 extends Mapper<LongWritable, Text, Text, Text> {
    
    private Text outKey;
    private Text outValue;
    
    @Override
    protected void setup(Mapper<LongWritable, Text, Text, Text>.Context context)
            throws IOException, InterruptedException {
        outKey = new Text();
        outValue = new Text();
    }

    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
            throws IOException, InterruptedException {
        FileSplit fp = (FileSplit) context.getInputSplit();
        String fileName = fp.getPath().getName();
        
        String line = value.toString();
        String[] fields = line.split("\\s+");
        
        String keyStr = null;
        String valueStr = null;
        if ("group".equalsIgnoreCase(fileName)) {
            keyStr = fields[0];
            valueStr = new StringBuilder(fields[1]).append("-->").append(fileName).toString();
        } else {
            keyStr = fields[2];
            //加“-->”;后以此标识符作为分割符,进行文件区分
            valueStr = new StringBuilder(fields[0]).append("\t").append(fields[1]).append("-->").append(fileName).toString();
        }
        
        outKey.set(keyStr);
        outValue.set(valueStr);
        context.write(outKey, outValue);
        
        
    }
    
    @Override
    protected void cleanup(Mapper<LongWritable, Text, Text, Text>.Context context)
            throws IOException, InterruptedException {
        outKey = null;
        outValue = null;
    }
    
}
UserGroupMapper01

相关文章: