多属性TopK——NRAアルゴリズム(JAVA)実現
この2,3日マルチプロパティのTopKを見て、NRA、google之について見ました.
Javaで実現してみましたが、間違いがあるかもしれません.間違いを指摘してください.
英語の注釈もよくないので、間に合います.
アルゴリズムでは、AndOrと重み付けのいくつかの操作が使用されます.
(1)Resultのデータ構造,ここでのResultは実は前回検索した出力(項目の1つのデータ構造)である.
(2)複数のResultからなるList
(3)操作データ構造
public enum Operation {AND,OR,WEIGHT}
(4)アルゴリズムの真の実現
Javaで実現してみましたが、間違いがあるかもしれません.間違いを指摘してください.
英語の注釈もよくないので、間に合います.
アルゴリズムでは、AndOrと重み付けのいくつかの操作が使用されます.
(1)Resultのデータ構造,ここでのResultは実は前回検索した出力(項目の1つのデータ構造)である.
public class Result implements Cloneable{
private String id = null;
private float score = 0;
public Result(String id){
this(id, 0);
}
public Result(String id, float score){
this.id = id;
this.score = score;
}
public String getId(){
return this.id;
}
public void setId(String id){
this.id = id;
}
public float getScore(){
return this.score;
}
public void setScore(float score){
this.score = score;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof Result) {
Result name = (Result) obj;
return (id.equals(name.id)
&&Math.abs(score - name.score) < 0.001);
}
return super.equals(obj);
}
@Override
protected Object clone() throws CloneNotSupportedException {
return super.clone();
}
@Override
public String toString(){
return this.id+"="+this.score;
}
}
(2)複数のResultからなるList
import java.util.ArrayList;
import java.util.List;
public class ResultList implements Cloneable{
private List<Result> resultList;
private float weight;
public ResultList(){
this.resultList = new ArrayList<Result>();
this.weight = 1;
}
public ResultList(int num){
this(num,1);
}
public ResultList(int num, float weight){
this.resultList = new ArrayList<Result>(num);
this.weight = weight;
}
public List<Result> getResultList(){
return this.resultList;
}
public Result getResult(int i){
return this.resultList.get(i);
}
public float getWeight(){
return this.weight;
}
public void setWeight(float weight){
this.weight = weight;
}
public void setResult(Result result, int i){
Result temp = this.resultList.get(i);
temp.setId(result.getId());
temp.setScore(result.getScore());
}
public void addResult(Result result){
this.resultList.add(result);
}
public int getSize(){
return this.resultList.size();
}
@Override
public boolean equals(Object obj) {
if (obj instanceof ResultList) {
ResultList name = (ResultList) obj;
boolean flags = true;
for(int i=0;i<resultList.size();i++){
flags = flags&&resultList.get(i).equals(name.resultList.get(i));
}
return (flags&&Math.abs(resultList.size()-name.resultList.size())<0.001);
}
return super.equals(obj);
}
@Override
protected Object clone() throws CloneNotSupportedException {
ResultList resultListCloneList = null;
if(resultList != null){
resultListCloneList = new ResultList();
for(Result result:resultList){
resultListCloneList.resultList.add((Result) result.clone());
}
}
return resultListCloneList;
}
@Override
public String toString(){
return this.resultList.toString()+"
"+this.weight;
}
}
(3)操作データ構造
public enum Operation {AND,OR,WEIGHT}
(4)アルゴリズムの真の実現
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Random;
import java.util.TreeSet;
import java.util.Map.Entry;
import org.junit.Test;
import process.search.engine.Operation;
import process.search.engine.Result;
import process.search.engine.ResultList;
public class TestNRA {
class Node implements Comparable<TestNRA.Node>{
private String id;
private float worst;
private float best;
private BitSet visited;
public Node(String id){
this(id, 0, 0);
}
public Node(String id, float worst, float best){
this.id = id;
this.worst = worst;
this.best = best;
this.visited = new BitSet();
}
public Node(Result r){
this(r.getId(),r.getScore(),r.getScore());
}
public void setVisit(int i){
this.visited.set(i, true);
}
public boolean getVisit(int i){
return this.visited.get(i);
}
public BitSet getVisits(){
return this.visited;
}
public String getId(){
return this.id;
}
public void setId(String id ){
this.id = id;
}
public float getWorst(){
return this.worst;
}
public void setWorst(float minValue){
this.worst = minValue;
}
public float getBest(){
return this.best;
}
public void setBest(float best){
this.best = best;
}
@Override
public int compareTo(Node node) {
if(getWorst() > node.getWorst())
return 1;
else if( getWorst() == node.getWorst()){
return this.getId().compareTo(node.getId());
}else
return -1;
}
@Override
public boolean equals(Object o){
if(o instanceof Node)
return this.id.equals(((Node)o).id);
if(o instanceof Result)
return this.id.equals(((Result)o).getId());
return false;
}
@Override
public String toString(){
return this.id +"=[" + this.worst + "," + this.best + "] : " + this.visited;
}
}
public Node findWorstNode(ArrayList<Node> list, boolean direction){
Node node = null;
if(list.size()>0)
node = list.get(0);
for(int i = 1 ; i < list.size(); i++){
if(direction){
if(1==node.compareTo(list.get(i)))
node = list.get(i);
}else{
if(-1==node.compareTo(list.get(i)))
node = list.get(i);
}
}
return node;
}
public Node findBestNode(ArrayList<Node> list){
Node node = null;
if(list.size()>0)
node = list.get(0);
for(int i = 1; i < list.size(); i++){
if(node.getBest()<list.get(i).getBest())
node = list.get(i);
}
return node;
}
private float findMax(ArrayList<Float> list, BitSet visited){
float ff = 0;
for(int i = 0; i < list.size() ; i++){
if ( ff < list.get(i) && !visited.get(i)){
ff = list.get(i);
}
}
return ff;
}
private float findMin(ArrayList<Float> list, BitSet visited){
float ff = Float.MAX_VALUE;
for(int i = 0 ; i < list.size(); i++){
if( ff > list.get(i) && !visited.get(i)){
ff = list.get(i);
}
}
return ff;
}
public ResultList execute(ResultList[] resultSet, Operation op, int topk){
// the column number of result
int colNum = resultSet.length;
// result list of top K
ResultList result = new ResultList(topk);
// hash map source record all element and there attributes' range [worst, best]
HashMap<String,Node> source = new HashMap<String,Node>();
// tree set top records the element of highest [worst] value
//TreeSet<Node> top = new TreeSet<Node>();
ArrayList<Node> top = new ArrayList<Node>();
// tree set candidate records the source of element apart from the top set
//TreeSet<Node> candidate = new TreeSet<Node>();
ArrayList<Node> candidate = new ArrayList<Node>();
// if the operation is weight, limits records the the attribute value of the elements in the last time.
ArrayList<Float> limits = new ArrayList<Float>(colNum);
// local temple variable
Result r = null;
Node node = null, tnode = null;
Node topWorstNode=null,canWorstNode=null,canBestNode = null;
float best = 0, max = 0, min = 0;
for(int i = 0; i < colNum; i++){
best += (resultSet[i].getResult(0).getScore()*resultSet[i].getWeight());
limits.add(resultSet[i].getResult(0).getScore());
}
boolean stop = true;
// start the algorithm, j stands for the line of the result set
for(int j = 0; ; j++){
// i stands for the column of the result set
for(int i = 0; i < colNum; i++){
// the element equals to resultSet[j][i];
if(j>=resultSet[i].getSize()){
stop = true;
continue;
}
stop = false;
r = resultSet[i].getResult(j);
if(op==Operation.WEIGHT)
best = best - (limits.get(i)-r.getScore())*resultSet[i].getWeight();
else
limits.set(i,r.getScore());
// if the element is in the source map
if(source.containsKey(r.getId())){
// get the element from the source
node = source.get(r.getId());
// set the attribute of the element has been visited
node.setVisit(i);
// base different operation
if(op == Operation.OR){
// or operation need to assure the largest value
if(node.getBest()<r.getScore()){
node.setBest(r.getScore());
node.setWorst(r.getScore());
}else if(node.getWorst()<r.getScore())
node.setWorst(r.getScore());
}else if(op == Operation.AND){
// and operation need to assure the smallest value
if(node.getBest()>r.getScore()){
node.setWorst(r.getScore());
node.setBest(r.getScore());
}
}else {
// weight operation need to assure all the attribute value with weight
node.setWorst(node.getWorst()+r.getScore()*resultSet[i].getWeight());
}
}else{
// if the element is not in the source map, add a new node into source
node = new Node(r);
// if the operation is weight, set the best value of the element to best
if(op==Operation.WEIGHT){
node.setBest(best);
}
else if(op==Operation.OR){
max = findMax(limits,node.getVisits());
if(max>r.getScore())
node.setBest(max);
}else{
min = findMin(limits,node.getVisits());
node.setWorst(0);
node.setBest(min);
}
node.setVisit(i);
source.put(r.getId(), node);
// if top size less than topk
if(top.size()<topk){
top.add(node);
}
// else add the node into candidates
else{
candidate.add(node);
}
}
// reset the [worst, best] range of all the element in the source map according
// to each operation.
if(op==Operation.OR){
for(Entry<String,Node> element : source.entrySet()){
tnode = element.getValue();
if(!tnode.getVisit(i)){
if(tnode.equals(node)){
if(tnode.getBest()<node.getWorst()){
tnode.setBest(node.getWorst());
}
if(tnode.getWorst()<node.getWorst()){
tnode.setWorst(node.getWorst());
}
}else{
if(tnode.getBest()>node.getBest()&&tnode.getWorst()<node.getBest())
tnode.setBest(node.getBest());
}
}
}
} else if( op == Operation.AND){
for(Entry<String,Node> element : source.entrySet()){
tnode = element.getValue();
if(!tnode.getVisit(i)){
if(tnode.getBest()>node.getBest()){
tnode.setBest(node.getBest());
tnode.setWorst(0);
}
}
}
} else {
for(Entry<String,Node> element : source.entrySet()){
node = element.getValue();
if(!node.getVisit(i)){
if(node.equals(r)){
node.setWorst(node.getWorst()+r.getScore()*resultSet[i].getWeight());
}else{
node.setBest(node.getBest()-(limits.get(i)-r.getScore())*resultSet[i].getWeight());
}
}else{
if(node.equals(r)&&node.getVisits().length()!=1)
node.setBest(node.getBest()-(limits.get(i)-r.getScore())*resultSet[i].getWeight());
}
}
}
// if candidate size > 0, compare the node of candidate with the largest worst value
// to the node of top with smallest value. if it is larger, swap two node.
if(candidate.size()>0){
topWorstNode = findWorstNode(top,true);
canWorstNode = findWorstNode(candidate,false);
canBestNode = findBestNode(candidate);
if(topWorstNode.getWorst()<canWorstNode.getWorst()){
top.remove(topWorstNode);
top.add(canWorstNode);
candidate.remove(canWorstNode);
candidate.add(topWorstNode);
}
}
}
if(stop)
break;
// record the values of element last visited.
if(op == Operation.WEIGHT){
limits.clear();
for(int i = 0; i < colNum; i++)
limits.add(resultSet[i].getResult(j).getScore());
}
// if the largest best value of candidate smaller than the smallest worst value
if(candidate.size() != 0){
// find the largest best value of the element in the candidate
if(canBestNode.getBest()<topWorstNode.getWorst()){
System.out.println("top : "+top);
System.out.println("candidate: " +candidate);
System.out.println("===================================================");
System.out.println("execute at the line : " + j);
break;
//}
}
}
}
Iterator<Node> it = top.iterator();
while(it.hasNext()){
Node tn = it.next();
Result rr = new Result(tn.getId(),tn.getWorst());
result.addResult(rr);
}
return result;
}
@Test
public void test(){
ResultList[] resultSet = new ResultList[3];
String[][] idList = {{"a","b","c","d","e","f","g","h","i","j"},
{"e","i","b","a","f","d","g","h","j","c"},
{"g","j","b","f","i","h","c","d","a","e"}};
float[][] score = {{100,98,82,73,45,32,21,14,11,6},
{80,75,62,55,53,45,39,37,26,19},
{93,70,61,57,44,36,26,19,17,3}};
for(int i = 0 ; i < 3 ; i++){
resultSet[i] = new ResultList();
for(int j = 0 ; j < 10; j++){
Result r = new Result(idList[i][j],score[i][j]);
resultSet[i].addResult(r);
}
resultSet[i].setWeight(1);
}
ResultList rl = execute(resultSet,Operation.WEIGHT,3);
System.out.println(rl);
}
}