多属性TopK——NRAアルゴリズム(JAVA)実現


この2,3日マルチプロパティのTopKを見て、NRA、google之について見ました.
Javaで実現してみましたが、間違いがあるかもしれません.間違いを指摘してください.
英語の注釈もよくないので、間に合います.
アルゴリズムでは、AndOrと重み付けのいくつかの操作が使用されます.
(1)Resultのデータ構造,ここでのResultは実は前回検索した出力(項目の1つのデータ構造)である.


public class Result implements Cloneable{
	private String id = null;
	private float score = 0;
	
	public Result(String id){
		this(id, 0);
	}
	public Result(String id, float score){
		this.id = id;
		this.score = score;
	}
	public String getId(){
		return this.id;
	}
	
	public void setId(String id){
		this.id = id;
	}
	
	public float getScore(){
		return this.score;
	}
	
	public void setScore(float score){
		this.score = score;
	}
	
	@Override
	public boolean equals(Object obj) {
		if (obj instanceof Result) {
			Result name = (Result) obj;
			return (id.equals(name.id)
					&&Math.abs(score - name.score) < 0.001);
		}
		return super.equals(obj);
	}

	@Override
	protected Object clone() throws CloneNotSupportedException {
		return super.clone();
	}
	
	@Override
	public String toString(){
		return this.id+"="+this.score;
	}
}

 
(2)複数のResultからなるList

import java.util.ArrayList;
import java.util.List;


public class ResultList implements Cloneable{
	private List<Result> resultList;
	private float weight;
	
	public ResultList(){
		this.resultList = new ArrayList<Result>();
		this.weight = 1;
	}
	
	public ResultList(int num){
		this(num,1);
	}
	
	public ResultList(int num, float weight){
		 this.resultList = new ArrayList<Result>(num);
		 this.weight = weight;
	}
	
	public List<Result> getResultList(){
		return this.resultList;
	}
	
	public Result getResult(int i){
		return this.resultList.get(i);
	}
	
	public float getWeight(){
		return this.weight;
	}
	
	public void setWeight(float weight){
		this.weight = weight;
	}
	
	public void setResult(Result result, int i){
		Result temp = this.resultList.get(i);
		temp.setId(result.getId());
		temp.setScore(result.getScore());
	}
	
	public void addResult(Result result){
		this.resultList.add(result);
	}
	
	public int getSize(){
		return this.resultList.size();
	}
	@Override
	public boolean equals(Object obj) {
		if (obj instanceof ResultList) {
			ResultList name = (ResultList) obj;
			boolean flags = true;
			for(int i=0;i<resultList.size();i++){
				flags = flags&&resultList.get(i).equals(name.resultList.get(i));
			}
			return (flags&&Math.abs(resultList.size()-name.resultList.size())<0.001);
		}
		return super.equals(obj);
	}
	@Override
	protected Object clone() throws CloneNotSupportedException {
		ResultList resultListCloneList = null;
		if(resultList != null){
			resultListCloneList = new ResultList();
			for(Result result:resultList){
				resultListCloneList.resultList.add((Result) result.clone());
			}
		}
		return resultListCloneList;
		
	}
	@Override
	public String toString(){
		return this.resultList.toString()+"
"+this.weight; } }

 
(3)操作データ構造
public enum Operation {AND,OR,WEIGHT}
(4)アルゴリズムの真の実現


import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Random;
import java.util.TreeSet;
import java.util.Map.Entry;

import org.junit.Test;

import process.search.engine.Operation;
import process.search.engine.Result;
import process.search.engine.ResultList;


public class TestNRA {
	class Node implements Comparable<TestNRA.Node>{
		private String id;
		private float worst;
		private float best;
		private BitSet visited;
		
		public Node(String id){
			this(id, 0, 0);
		}
		
		public Node(String id, float worst, float best){
			this.id = id;
			this.worst = worst;
			this.best = best;
			this.visited = new BitSet();
		}
		
		public Node(Result r){
			this(r.getId(),r.getScore(),r.getScore());
		}
		
		public void setVisit(int i){
			this.visited.set(i, true);
		}
		
		public boolean getVisit(int i){
			return this.visited.get(i);
		}
		
		public BitSet getVisits(){
			return this.visited;
		}
		
		public String getId(){
			return this.id;
		}
		
		public void setId(String id ){
			this.id = id;
		}
		
		public float getWorst(){
			return this.worst;
		}
		
		public void setWorst(float minValue){
			this.worst = minValue;
		}
		
		public float getBest(){
			return this.best;
		}
		
		public void setBest(float best){
			this.best = best;
		}
		
		@Override
		public int compareTo(Node node) {
			if(getWorst() > node.getWorst())
				return 1;
			else if( getWorst() == node.getWorst()){
				return this.getId().compareTo(node.getId());
			}else
				return -1;
		}	
		
		@Override
		public boolean equals(Object o){
			if(o instanceof Node)
				return this.id.equals(((Node)o).id);
			if(o instanceof Result)
				return this.id.equals(((Result)o).getId());
			return false;
		}
		
		@Override
		public String toString(){
			return this.id +"=[" + this.worst + "," + this.best + "] : " + this.visited;
		}
	}
	
	public Node findWorstNode(ArrayList<Node> list, boolean direction){
		Node node = null;
		if(list.size()>0)
			node = list.get(0);
		for(int i = 1 ; i < list.size(); i++){
			if(direction){
				if(1==node.compareTo(list.get(i)))
					node = list.get(i);
			}else{
				if(-1==node.compareTo(list.get(i)))
					node = list.get(i);
			}
		}
		return node;
	}
	
	public Node findBestNode(ArrayList<Node> list){		
		Node node = null;
		if(list.size()>0)
			node = list.get(0);
		for(int i = 1; i < list.size(); i++){
			if(node.getBest()<list.get(i).getBest())
				node = list.get(i);
		}
		return node;
	}
	
	private float findMax(ArrayList<Float> list, BitSet visited){
		float ff = 0;
		for(int i = 0; i < list.size() ; i++){
			if ( ff < list.get(i) && !visited.get(i)){
				ff = list.get(i);
			}			
		}
		return ff;
	}
	
	private float findMin(ArrayList<Float> list, BitSet visited){
		float ff = Float.MAX_VALUE;
		for(int i = 0 ; i < list.size(); i++){
			if( ff > list.get(i) && !visited.get(i)){
				ff = list.get(i);
			}
		}
		return ff;
	}
	public ResultList execute(ResultList[] resultSet, Operation op, int topk){
		// the column number of result
		int colNum = resultSet.length;
		
		// result list of top K
		ResultList result = new ResultList(topk);
		
		// hash map source record all element and there attributes' range [worst, best]
		HashMap<String,Node> source = new HashMap<String,Node>();
		
		// tree set top records the element of highest [worst] value 
		//TreeSet<Node> top = new TreeSet<Node>();
		ArrayList<Node> top = new ArrayList<Node>();
		// tree set candidate records the source of element apart from the top set 
		//TreeSet<Node> candidate = new TreeSet<Node>();
		ArrayList<Node> candidate = new ArrayList<Node>();
		// if the operation is weight, limits records the the attribute value of the elements in the last time.
		ArrayList<Float> limits = new ArrayList<Float>(colNum);
		
		// local temple variable
		Result r = null;
		Node node = null, tnode = null;
		Node topWorstNode=null,canWorstNode=null,canBestNode = null;
		float best = 0, max = 0, min = 0;
		

		
		for(int i = 0; i < colNum; i++){
			best += (resultSet[i].getResult(0).getScore()*resultSet[i].getWeight());
			limits.add(resultSet[i].getResult(0).getScore());
		}

		boolean stop = true;
		// start the algorithm, j stands for the line of the result set
		for(int j = 0; ; j++){
			// i stands for the column of the result set
			for(int i = 0; i < colNum; i++){
				// the element equals to resultSet[j][i];
				if(j>=resultSet[i].getSize()){
					stop = true;
					continue;					
				}
				stop = false;
				
				r = resultSet[i].getResult(j);
				
				if(op==Operation.WEIGHT)
					best = best - (limits.get(i)-r.getScore())*resultSet[i].getWeight();
				else
					limits.set(i,r.getScore());
					
				// if the element is in the source map
				if(source.containsKey(r.getId())){
					// get the element from the source
					node = source.get(r.getId());
					// set the attribute of the element has been visited
					node.setVisit(i);
					// base different operation
					if(op == Operation.OR){
						// or operation need to assure the largest value
						if(node.getBest()<r.getScore()){
							node.setBest(r.getScore());
							node.setWorst(r.getScore());
						}else if(node.getWorst()<r.getScore())
							node.setWorst(r.getScore());						
					}else if(op == Operation.AND){
						// and operation need to assure the smallest value
						if(node.getBest()>r.getScore()){
							node.setWorst(r.getScore());
							node.setBest(r.getScore());
						}
						
					}else {
						// weight operation need to assure all the attribute value with weight
						node.setWorst(node.getWorst()+r.getScore()*resultSet[i].getWeight());
					}
				}else{
					// if the element is not in the source map, add a new node into source
					node = new Node(r);
					// if the operation is weight, set the best value of the element to best
					if(op==Operation.WEIGHT){
						node.setBest(best);
					}
					else if(op==Operation.OR){
						max = findMax(limits,node.getVisits());						
						if(max>r.getScore())
							node.setBest(max);
					}else{
						min = findMin(limits,node.getVisits());
						node.setWorst(0);
						node.setBest(min);
					}
						
					node.setVisit(i);
					source.put(r.getId(), node);
					// if top size less than topk
					if(top.size()<topk){						
						top.add(node);						
					}
					// else add the node into candidates
					else{						
						candidate.add(node);						
					}						
				}
							
				// reset the [worst, best] range of all the element in the source map according
				// to each operation.
				if(op==Operation.OR){
					for(Entry<String,Node> element : source.entrySet()){
						tnode = element.getValue();
						if(!tnode.getVisit(i)){
							if(tnode.equals(node)){
								if(tnode.getBest()<node.getWorst()){
									tnode.setBest(node.getWorst());
								}
								if(tnode.getWorst()<node.getWorst()){
									tnode.setWorst(node.getWorst());
								}
							}else{
								if(tnode.getBest()>node.getBest()&&tnode.getWorst()<node.getBest())
									tnode.setBest(node.getBest());
							}
						}
					}
				} else if( op == Operation.AND){
					for(Entry<String,Node> element : source.entrySet()){
						tnode = element.getValue();
						if(!tnode.getVisit(i)){
							if(tnode.getBest()>node.getBest()){
								tnode.setBest(node.getBest());
								tnode.setWorst(0);
							}
						}
					}
				} else {
					for(Entry<String,Node> element : source.entrySet()){
						node = element.getValue();
						if(!node.getVisit(i)){
							if(node.equals(r)){
								node.setWorst(node.getWorst()+r.getScore()*resultSet[i].getWeight());
							}else{
								node.setBest(node.getBest()-(limits.get(i)-r.getScore())*resultSet[i].getWeight());
							}
						}else{
							if(node.equals(r)&&node.getVisits().length()!=1)
								node.setBest(node.getBest()-(limits.get(i)-r.getScore())*resultSet[i].getWeight());
						}
					}
				}
				
				
				// if candidate size > 0, compare the node of candidate with the largest worst value 
				// to the node of top with smallest value. if it is larger, swap two node. 
				if(candidate.size()>0){
					topWorstNode = findWorstNode(top,true);
					canWorstNode = findWorstNode(candidate,false);
					canBestNode = findBestNode(candidate);
					if(topWorstNode.getWorst()<canWorstNode.getWorst()){
						top.remove(topWorstNode);
						top.add(canWorstNode);
						candidate.remove(canWorstNode);
						candidate.add(topWorstNode);					
					}
				}
			}
			
			if(stop)
				break;
			// record the values of element last visited.
			if(op == Operation.WEIGHT){
				limits.clear();
				for(int i = 0; i < colNum; i++)
					limits.add(resultSet[i].getResult(j).getScore());
			}
			
			
			// if the largest best value of candidate smaller than the smallest worst value 
			if(candidate.size() != 0){
				// find the largest best value of the element in the candidate	
				if(canBestNode.getBest()<topWorstNode.getWorst()){
					System.out.println("top : "+top);
					System.out.println("candidate: " +candidate);
					System.out.println("===================================================");
					System.out.println("execute at the line : " + j);
					break;
					//}
				}
			}
			
		}
		
		Iterator<Node> it = top.iterator();
		while(it.hasNext()){
			Node tn = it.next();
			Result rr = new Result(tn.getId(),tn.getWorst());
			result.addResult(rr);
		}
		return result;
	}

	
	

	@Test
	public void test(){
		
		ResultList[] resultSet = new ResultList[3];
		
		String[][] idList = {{"a","b","c","d","e","f","g","h","i","j"},
							{"e","i","b","a","f","d","g","h","j","c"},
							{"g","j","b","f","i","h","c","d","a","e"}};
		
		float[][] score = {{100,98,82,73,45,32,21,14,11,6},
						{80,75,62,55,53,45,39,37,26,19},
						{93,70,61,57,44,36,26,19,17,3}};
		
		for(int i = 0 ; i < 3 ; i++){
			resultSet[i] = new ResultList();
			for(int j = 0 ; j < 10; j++){
				Result r = new Result(idList[i][j],score[i][j]);
				resultSet[i].addResult(r);
			}
			resultSet[i].setWeight(1);
		}
		ResultList rl = execute(resultSet,Operation.WEIGHT,3);
		System.out.println(rl);
	}
	
	
}