使用到的资源
hadoop-eclipse-plugin-2.6.4.jar
hadoop-2.7.7
日志文件 user_login.txt
上述资源以及实验源码均已上传到百度云盘,可自行获取
链接:https://pan.baidu.com/s/1sprb5Ytue3rxMEAMTXZoVQ
提取码:fec7
复制这段内容后打开百度网盘手机App,操作更方便哦

搭建IDE开发环境,创建MapReduce工程MemberCount

1.将插件hadoop-eclipse-plugin-2.6.4.jar复制到eclipse的dropins目录下
MapReduce编程入门-日志访问次数统计任务
2.菜单选择Windowns – Perspective – Open Perspective – Other,在对话框中选中 Map/Reduce,点击OK
MapReduce编程入门-日志访问次数统计任务
3.点击小象图标增加 Hadoop 集群的连接MapReduce编程入门-日志访问次数统计任务
4.集群信息按自己的 Hadoop 集群实际信息填写,点击FinishMapReduce编程入门-日志访问次数统计任务
5.创建连接后,刷新 HDFS 列表即可获得文件目录最新状态
MapReduce编程入门-日志访问次数统计任务
6.在主菜单上选择Windowns – Preference,选中Hadoop Map/Reduce,添加 hadoop-2.7.7 文件所在路径,导入MapReduce运行依赖的jar包
MapReduce编程入门-日志访问次数统计任务
7.新建工程 Map/Reduce Project
MapReduce编程入门-日志访问次数统计任务
8.工程命名为 MemberCount ,点击 Finish
MapReduce编程入门-日志访问次数统计任务
9.建好后的工程目录,可见jar包已经导入
MapReduce编程入门-日志访问次数统计任务
通过MapReduce编程解决统计访问次数
10.编写完业务代码后,点击主菜单File – Export,选择 Runnable JAR file ,点击 Next
MapReduce编程入门-日志访问次数统计任务
任务一逻辑代码如下:

package mapreduce.util;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class dailyAccessCount {
	public static class MyMapper
		extends Mapper<Object,Text,Text,IntWritable>{
			private final static IntWritable one = new IntWritable(1);
			public void map(Object key,Text value,Context context)
				throws IOException,InterruptedException{
				String line = value.toString();
				//指定逗号为分隔符,组成数组
				String array[] = line.split(",");
				//提取数组中的访问日期作为Key
				String keyOutput = array[1];
				//组成键值对
				context.write(new Text(keyOutput),one);
			}
	}
	
	public static class MyReducer
		extends Reducer<Text,IntWritable,Text,IntWritable>{
			private IntWritable result = new IntWritable();
			public void reduce(Text key,Iterable<IntWritable> values,Context context)
				throws IOException,InterruptedException {
					//定义累加器,初始值为0
					int sum = 0;
					for (IntWritable val : values) {
						//将相同键的所有值进行累加
						sum += val.get();
					}
					result.set(sum);
					context.write(key,result);
			}
	}
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf,"Daily Access Count");
		job.setJarByClass(dailyAccessCount.class);
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReducer.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		for (int i = 0; i < args.length - 1; ++ i) {
			FileInputFormat.addInputPath(job, new Path(args[i]));
		}
		
		FileOutputFormat.setOutputPath(job, new Path(args[args.length - 1]));
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

11.选择相应的类,点击Finish
MapReduce编程入门-日志访问次数统计任务
12.使用Xftp工具将日志文件和业务jar包传输到虚拟机的opt文件夹中
MapReduce编程入门-日志访问次数统计任务
13.上传需要统计的用户登录日志文件到HDFS文件系统中
创建/user/root目录

/opt/hadoop/bin/hdfs dfs -mkdir -p /user/root/

上传日志文件到hdfs文件系统中

/opt/hadoop/bin/hdfs dfs -copyFromLocal /opt/user_login.txt /user/root/

MapReduce编程入门-日志访问次数统计任务
14.执行用户日志统计程序
执行命令:

/opt/hadoop/bin/hadoop jar /opt/dailyAccessCount.jar /user/root/user_login.txt /user/root/AccessCount

运行结果:
MapReduce编程入门-日志访问次数统计任务
15.统计结果生成的文件
MapReduce编程入门-日志访问次数统计任务
16.查看part-r-00000文件内容可见统计结果
MapReduce编程入门-日志访问次数统计任务
通过MapReduce编程解决按访问次数进行排序
17.编写任务二业务代码生成jar包上传到虚拟机中
MapReduce编程入门-日志访问次数统计任务
任务二逻辑代码:

package mapreduce.util;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class accessTimesSort {
	
	public static class MyMapper 
		extends Mapper<Object, Text,IntWritable,Text>{
			public void map(Object key, Text value, Context context
					) throws IOException, InterruptedException {
	
						String lines = value.toString();  
						//指定tab为分隔符,组成数组
						String array[] = lines.split("\t");  
						//提取访问次数做为Key
						int keyOutput = Integer.parseInt(array[1]); 
						//提取访问日期做为Values
						String valueOutput = array[0];              
						context.write(new IntWritable(keyOutput), new Text(valueOutput)); 
			}
	}

	public static class MyReducer 
		extends Reducer<IntWritable,Text,Text,IntWritable> {

			public void reduce(IntWritable key, Iterable<Text> values, 
                Context context)
                		throws IOException, InterruptedException {
							for(Text value : values){
								context.write(value, key);
							}
			}
	}

	public static void main(String[] args) throws Exception{
	    Configuration conf = new Configuration();
	    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
	    if (otherArgs.length < 2) {
	      System.err.println("Usage: wordcount <in> [<in>...] <out>");
	      System.exit(2);
	    }
	    Job job = new Job(conf, "Access Time Sort");
	    job.setJarByClass(accessTimesSort.class);
	    job.setMapperClass(MyMapper.class);
	    job.setReducerClass(MyReducer.class);
	    job.setMapOutputKeyClass(IntWritable.class);
	    job.setMapOutputValueClass(Text.class);
	    job.setOutputKeyClass(Text.class);
	    job.setOutputValueClass(IntWritable.class);
	    
	    for (int i = 0; i < otherArgs.length - 1; ++i) {
	      FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
	    }
	    FileOutputFormat.setOutputPath(job,
	      new Path(otherArgs[otherArgs.length - 1]));
	    System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

18.执行任务二统计程序
执行命令

/opt/hadoop/bin/hadoop jar /opt/accessTimesSort.jar /user/root/AccessCount /user/root/TimesSort

MapReduce编程入门-日志访问次数统计任务
19.统计生成的结果文件
MapReduce编程入门-日志访问次数统计任务
20.打开查看统计结果,可见内容以及按照访问次数升序排序
MapReduce编程入门-日志访问次数统计任务
至此,统计实验完毕。

相关文章:

  • 2021-12-25
  • 2021-12-25
  • 2022-01-07
  • 2021-09-05
  • 2022-12-23
  • 2022-12-23
  • 2021-06-06
  • 2021-08-23
猜你喜欢
  • 2021-10-05
  • 2022-12-23
  • 2022-03-03
  • 2021-05-17
  • 2022-12-23
  • 2022-02-22
  • 2022-12-23
相关资源
相似解决方案