Hadoop中如何实现分组
这篇文章主要为大家展示了“Hadoop中如何实现分组”,内容简而易懂,条理清晰,希望能够帮助大家解决疑惑,下面让小编带领大家一起研究并学习一下“Hadoop中如何实现分组”这篇文章吧。

为徽县等地区用户提供了全套网页设计制作服务,及徽县网站建设行业解决方案。主营业务为成都网站设计、网站建设、徽县网站设计,以传统方式定制建设网站,并提供域名空间备案等一条龙服务,秉承以专业、用心的态度为用户提供真诚的服务。我们深信只要达到每一位用户的要求,就会得到认可,从而选择与我们长期合作。这样,我们也可以走得更远!
package grounp;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* 自定义分组
* 初始结果:
* 3 3
* 3 2
* 3 1
* 2 2
* 2 1
* 1 1
* 输出结果:
1 1
2 2
3 3
* @author Xr
*
*/
public class groupApp {
public static final String INPUT_PATH = "hdfs://hadoop:9000/data";
public static final String OUTPUT_PATH = "hdfs://hadoop:9000/datas";
public static void main(String[] args)throws Exception{
Configuration conf = new Configuration();
existsFile(conf);
Job job = new Job(conf, groupApp.class.getName());
FileInputFormat.setInputPaths(job, INPUT_PATH);
job.setMapperClass(MyMapper.class);
//自定义键
job.setMapOutputKeyClass(NewKey.class);
job.setMapOutputValueClass(LongWritable.class);
//自定义分组
job.setGroupingComparatorClass(NewGroupCompator.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
job.waitForCompletion(true);
}
private static void existsFile(Configuration conf) throws IOException,
URISyntaxException {
FileSystem fs = FileSystem.get(new URI(OUTPUT_PATH),conf);
if(fs.exists(new Path(OUTPUT_PATH))){
fs.delete(new Path(OUTPUT_PATH),true);
}
}
}
class MyMapper extends Mapper{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String string = value.toString();
String[] split = string.split("\t");
NewKey k2 = new NewKey();
k2.set(Long.parseLong(split[0]),Long.parseLong(split[1]));
context.write(k2, new LongWritable(Long.parseLong(split[1])));
}
}
class MyReducer extends Reducer{
@Override
protected void reduce(NewKey key2, Iterable values,Context context)
throws IOException, InterruptedException {
long max = Long.MIN_VALUE;
for(LongWritable v2 : values){
long l = v2.get();
if(l>max){
max = l;
}
}
context.write(new LongWritable(key2.first),new LongWritable(max));
}
}
class NewKey implements WritableComparable{
long first;
long second;
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(this.first);
out.writeLong(this.second);
}
public void set(long parseLong, long parseLong2) {
this.first = parseLong;
this.second = parseLong2;
}
@Override
public void readFields(DataInput in) throws IOException {
this.first = in.readLong();
this.second = in.readLong();
}
@Override
public int compareTo(NewKey o) {
if(this.first==o.first){
if(this.second < o.second){
return -1;
}else if(this.second == o.second){
return 0;
}else{
return 1;
}
}else{
if(this.first < o.first){
return -1;
}else{
return 1;
}
}
}
}
class NewGroupCompator implements RawComparator{
@Override
public int compare(NewKey o1, NewKey o2) {
return 0;
}
/**
* 比较字节数组中指定的字节序列的大小
* @param b1 第一个参与比较的字节数组
* @param s1 第一个参与比较的字节数组的开始位置
* @param l1 第一个参与比较的字节数组的字节长度
* @param b2 第二个参与比较的字节数组
* @param s2 第二个参与比较的字节数组的开始位置
* @param l2 第二个参与比较的字节数组的字节长度
* @return
*/
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
return WritableComparator.compareBytes(b1, s1, 8, b2, s2, 8);
}
} 以上是“Hadoop中如何实现分组”这篇文章的所有内容,感谢各位的阅读!相信大家都有了一定的了解,希望分享的内容对大家有所帮助,如果还想学习更多知识,欢迎关注创新互联行业资讯频道!
网站标题:Hadoop中如何实现分组
URL网址:http://www.jxjierui.cn/article/psocpg.html


咨询
建站咨询
