利用hadoop查询两两之间有共同好友及他俩的共同好友都是谁_软件编程

利用hadoop查询两两之间有共同好友及他俩的共同好友都是谁

发布时间：2026-01-11

点击量：

编码思路：

第一步是可以把好友当作key，value是拥有key好友的用户，例如:拥有好友B的是：A,F,J,E用户

第二步在第一步结果后，双重for循环进行两两之间进行拼接，这样就可以得出正确结果

具体代码实现：

第一步：

package com.zsy.mr.commonfriend;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class commonFriendStepOne {
	static class commonFriendStepOneMapper extends Mapper<LongWritable, Text, Text, Text>{
		Text k = new Text();
		Text v = new Text();
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			//通过过冒号分割
			String[] splits = value.toString().split(":");
			//获取拥有好友的用户名
			String name = splits[0];
			//获取该用户下的好友列表
			String[] friends = StringUtils.isNotBlank(splits[1])?  splits[1].split(","):null;
			if(friends != null) {
				//循环好友，好友当作key，拥有好友的用户名当作value
				for (String friend : friends) {
					k.set(friend);
					v.set(name);
					context.write(k, v);
				}
			}
		}
	}
	
	static class commonFriendStepOneReducer extends Reducer<Text, Text, Text, Text>{
		Text v = new Text();
		@Override
		protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			List<String> resultList = new ArrayList<String>();//实际生产代码不建议用list接收，应该是直接处理掉
			//处理数据，该数据是拥有key好友的所有用户
			for (Text value : values) {
				resultList.add(value.toString());
			}
			v.set(StringUtils.join(resultList, ","));
			context.write(key, v);
		}
	}
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		/*conf.set("mapreduce.framework.name", "yarn");
        conf.set("yarn.resoucemanger.hostname", "hadoop01");*/
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(commonFriendStepOne.class);
		
		//指定本业务job要使用的业务类
		job.setMapperClass(commonFriendStepOneMapper.class);
		job.setReducerClass(commonFriendStepOneReducer.class);
		
		//指定mapper输出的k v类型  如果map的输出和reduce的输出一样，只需要设置输出即可
		//job.setMapOutputKeyClass(Text.class);
		//job.setMapOutputValueClass(IntWritable.class);
		
		//指定最终输出kv类型（reduce输出类型）
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		//指定job的输入文件所在目录
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		//指定job的输出结果目录
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		//将job中配置的相关参数，以及job所有的java类所在 的jar包，提交给yarn去运行
		//job.submit();无结果返回，建议不使用它
		boolean res = job.waitForCompletion(true);
		
		System.exit(res?0:1);
	}
}

结果：

第二步：

代码实现

package com.zsy.mr.commonfriend;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class commonFriendStepTwo {

	static class commonFriendStepTwoMapper extends Mapper<LongWritable, Text, Text, Text>{
		Text k = new Text();
		Text v = new Text();
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			String[] splits = value.toString().split("\t");
			//获取好友
			String friend = splits[0];
			//获取拥有该好友所有的用户信息
			String[] names = splits[1].split(",");
			//进行排序，防止计算数据重复，例如：A-B和B-A其实一个对
			Arrays.sort(names);
			//进行双重for循环
			for (int i = 0; i < names.length-1; i++) {
				String string = names[i];
				for (int j = i+1; j < names.length; j++) {
					String string2 = names[j];
					k.set(string+"-"+string2);
					v.set(friend);
					context.write(k, v);
				}
			}
		}
	}
	
	static class commonFriendStepTwoReducer extends Reducer<Text, Text, Text, NullWritable>{
		Text k = new Text();
		@Override
		protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, NullWritable>.Context context)
				throws IOException, InterruptedException {
			List<String> resultList = new ArrayList<String>();//实际生产代码不建议用list接收，应该是直接处理掉
			for (Text text : value) {
				resultList.add(text.toString());
			}
			k.set(key.toString()+":"+ StringUtils.join(resultList,","));
			context.write(k, NullWritable.get());
		}
	}
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		/*conf.set("mapreduce.framework.name", "yarn");
        conf.set("yarn.resoucemanger.hostname", "hadoop01");*/
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(commonFriendStepTwo.class);
		
		//指定本业务job要使用的业务类
		job.setMapperClass(commonFriendStepTwoMapper.class);
		job.setReducerClass(commonFriendStepTwoReducer.class);
		
		//指定mapper输出的k v类型  如果map的输出和reduce的输出一样，只需要设置输出即可
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		
		//指定最终输出kv类型（reduce输出类型）
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(NullWritable.class);
		
		//指定job的输入文件所在目录
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		//指定job的输出结果目录
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		//将job中配置的相关参数，以及job所有的java类所在 的jar包，提交给yarn去运行
		//job.submit();无结果返回，建议不使用它
		boolean res = job.waitForCompletion(true);
		
		System.exit(res?0:1);
	}
}

结果：

这样就可以找到正确结果

总结

标签：# 好友列表 # Path # fs # Configuration # LongWritable # Mapper # lib # input # Reducer # FileInputFormat # 过过 # 该用户 # Hadoop 求共同好友 # 使用它 # 以此类推 # 应该是 # 的是 # 就可以 # 要使 # 只需要 # 第二步 # redis实现共同好友的思路详解 # 利用Python查看微信共同好友功能的实现代码 # Hadoop 共同好友

上一篇：mybatis like模糊查询特殊字符报错转义处理方式

下一篇：Socket通信原理和实践

利用hadoop查询两两之间有共同好友及他俩的共同好友都是谁

发布时间：2026-01-11

点击量：

目录

编码思路：

具体代码实现：

第一步：

第二步：

总结

返回

4008888355