hadoop sortカスタムソート(3つの数の比較方法)



 
0目的:
 
ファイルを、第1の列と同時に、第2の列を昇順にします.2列目が同じで、3列目が昇順
3,3,33,2,43,2,02,2,12,1,41,1,0
 
 
mapreduce:
 
1.
にある
map
および
reduce
ステージをソートする場合、比較するのは
k2
.
v2
ソート比較には関与しません.もしも
v2
も行う
並べ替え、
k2
および
v2
新しいクラスに組み立て、
k2
を選択すると、比較に参加できます.
 
2.
グループ分けの場合も
k2
比較します.
 
 
1コード:コアはhadoop map outputのkeyをカスタマイズして、中に比較の書き方を書きます
 
package sort;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class MyThreeSortApp {
	
	// 0       
	static final String FILE_ROOT = "hdfs://master:9000/";
	static final String INPUT_PATH = "hdfs://master:9000/hello";
	static final String OUT_PATH = "hdfs://master:9000/out";

	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception{
			
			Configuration conf = new Configuration();
			FileSystem fileSystem = FileSystem.get(new URI(FILE_ROOT),conf);
			Path outpath = new Path(OUT_PATH);
			if(fileSystem.exists(outpath)){
				fileSystem.delete(outpath, true);
			}
			
			// 0       
			Job job = new Job(conf);
			// 1.1                    hdfs    。        。          map  
			FileInputFormat.setInputPaths(job, INPUT_PATH);
			//               ,              
			job.setInputFormatClass(TextInputFormat.class); //     MapReduce         InputFormat implement
			
			//1.2       map 
			job.setMapperClass(MyMapper3.class);
			job.setMapOutputKeyClass(NewKey3.class);
			job.setMapOutputValueClass(NullWritable.class);
			
			
			//1.3   
			job.setNumReduceTasks(1);
			
			//1.4 TODO                 
			//1.5 TODO   
			
			//2.2      reduce 
			job.setReducerClass(MyReducer3.class);
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(NullWritable.class);
			
			//2.3        
			FileOutputFormat.setOutputPath(job, outpath);
			job.setOutputFormatClass(TextOutputFormat.class);
			
			//        
			job.waitForCompletion(true);
			
		}
}

class MyMapper3 extends Mapper{

	
	@Override
	protected void map(LongWritable k1, Text v1, Context context)throws IOException, InterruptedException {
		String lineStr = v1.toString();
		System.out.println("map the line: " + lineStr);
		String[] split = lineStr.split(",");
		NewKey3 newKey3 = new NewKey3(Long.parseLong(split[0]),Long.parseLong(split[1]),Long.parseLong(split[2]));
		context.write(newKey3, NullWritable.get());
	}
	
}

class MyReducer3 extends Reducer{

	protected void reduce(NewKey3 k2, Iterable v2s, org.apache.hadoop.mapreduce.Reducer.Context context)
			throws IOException, InterruptedException {
		System.out.println("reduce the key is: " + k2.toString());
		context.write(new Text(k2.toString()), NullWritable.get());
	}

	
	
}


//       hadoop map output key   ,        
class NewKey3 implements WritableComparable{
	
	private long first;
	private long second;
	private long third;
	
	public NewKey3(){}
	
	public NewKey3(long first,long second,long third){
		this.first = first;
		this.second = second;
		this.third = third;
	}
	
	@Override
	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + (int) (first ^ (first >>> 32));
		result = prime * result + (int) (second ^ (second >>> 32));
		result = prime * result + (int) (third ^ (third >>> 32));
		return result;
	}

	@Override
	public boolean equals(Object obj) {
		if (this == obj)
			return true;
		if (obj == null)
			return false;
		if (getClass() != obj.getClass())
			return false;
		NewKey3 other = (NewKey3) obj;
		if (first != other.first)
			return false;
		if (second != other.second)
			return false;
		if (third != other.third)
			return false;
		return true;
	}

	@Override
	public String toString() {
		return first + " " + second + " " + third ;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeLong(this.first);
		out.writeLong(this.second);
		out.writeLong(this.third);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		this.first = in.readLong();
		this.second = in.readLong();
		this.third = in.readLong();
	}

	@Override
	public int compareTo(NewKey3 other) {
		long result;
		result = this.first - other.first;
		if(result == 0){
			result = this.second - other.second;
			if(result == 0){
				result = this.third - other.third;
			}
		}
		return (int)result;
	}
	
}

 
 
2実行結果:
[root@master local]# hadoop fs -text /out/part-r-00000
Warning: $HADOOP_HOME is deprecated.

1 1 0
2 1 4
2 2 1
3 2 0
3 2 4
3 3 3