Javaテーブルはmysqlを模倣して基本操作(接続、パケットソート、統計など)を実現する

15037 ワード

          
package cn.cgh.table;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * 
 * @author xiegonghai
 *
 */
public class TableOperation
{

    /**
     * the separator of multiKey
     */
    public static String separator = "@&";

    /**
     * Join A and B according to one column
     * 
     * @param indexA
     * @param indexB
     * @return New table after innerJoin
     */
    public static List> innerJoin(List> A,
					       List> B,
					       int indexA,
					       int indexB)
    {

	List> res = new ArrayList>();
	// colMap  B        
	Map> colMap = new HashMap>();
	int i = 0;
	for (List row : B)
	{
	    String key = row.get(indexB);
	    if (colMap.containsKey(key))
	    {
		colMap.get(key).add(i);
	    } else
	    {
		ArrayList arrTmp = new ArrayList();
		arrTmp.add(i);
		colMap.put(key, arrTmp);
	    }
	    i = i + 1;
	}
	//   A     
	for (List row : A)
	{
	    String tarStr = row.get(indexA);
	    if (colMap.containsKey(tarStr))
	    {
		ArrayList idList = colMap.get(tarStr);
		for (Integer id : idList)
		{
		    // deep copy
		    List newRow = new ArrayList(row);
		    Set sList = new HashSet();
		    sList.add(indexB);
		    List rightB = getRowExcept(B.get(i), sList);
		    newRow.addAll(rightB);
		    res.add(newRow);
		}
	    }
	}
	System.out.println(A);
	System.out.println(B);
	return res;
    }

    /**
     * Join A and B according to multiColumn
     * 
     * @param indexAList id list of table A
     * @param indexBList id list of table B
     * @return New table after innerJoin
     */
    public static List> innerJoin(List> A,
					       List> B,
					       List indexAList,
					       List indexBList)
    {

	List> res = new ArrayList>();
	// colMap  B        
	Map> colMap = new HashMap>();
	int i = 0;
	for (List row : B)
	{
	    long murmurHashKeyB = getLineMultiKeyHash(row, indexBList);
	    if (colMap.containsKey(murmurHashKeyB))
	    {
		colMap.get(murmurHashKeyB).add(i);
	    } else
	    {
		ArrayList arrTmp = new ArrayList();
		arrTmp.add(i);
		colMap.put(murmurHashKeyB, arrTmp);
	    }
	    i = i + 1;
	}
	//   A     
	for (List row : A)
	{
	    long murmurHashValA = getLineMultiKeyHash(row, indexAList);
	    if (colMap.containsKey(murmurHashValA))
	    {
		ArrayList idList = colMap.get(murmurHashValA);
		for (Integer id : idList)
		{
		    List newRow = new ArrayList(row);
		    Set sList = new HashSet();
		    sList.addAll(indexBList);
		    List rightB = getRowExcept(B.get(id), sList);
		    newRow.addAll(rightB);
		    res.add(newRow);
		}
	    }
	}
	System.out.println(A);
	System.out.println(B);
	return res;
    }

    /**
     * 
     * @param row One line data
     * @param idList Clear the data that belong to idList
     * @return
     */
    public static List getRowExcept(List row,
					    Set idList)
    {
	List newRow = new ArrayList();
	int size = row.size();
	for (int i = 0; i < size; ++i)
	{
	    if (!idList.contains(i))
	    {
		newRow.add(row.get(i));
	    }
	}
	return newRow;
    }

    /**
     * Join A and B according to multiColumn
     * 
     * @param indexAList id list of table A
     * @param indexBList id list of table B
     * @return New table after innerJoin
     */
    public static List> leftJoin(List> A,
					      List> B,
					      List indexAList,
					      List indexBList)
    {

	List> res = new ArrayList>();
	// colMap  B        
	Map> colMap = new HashMap>();
	int i = 0;
	for (List row : B)
	{
	    long murmurHashKeyB = getLineMultiKeyHash(row, indexBList);
	    if (colMap.containsKey(murmurHashKeyB))
	    {
		colMap.get(murmurHashKeyB).add(i);
	    } else
	    {
		ArrayList arrTmp = new ArrayList();
		arrTmp.add(i);
		colMap.put(murmurHashKeyB, arrTmp);
	    }
	    i = i + 1;
	}
	//   A     
	for (List row : A)
	{
	    long murmurHashValA = getLineMultiKeyHash(row, indexAList);
	    if (colMap.containsKey(murmurHashValA))
	    {
		ArrayList idList = colMap.get(murmurHashValA);
		for (Integer id : idList)
		{
		    List newRow = new ArrayList(row);
		    Set sList = new HashSet();
		    sList.addAll(indexBList);
		    List rightB = getRowExcept(B.get(id), sList);
		    newRow.addAll(rightB);
		    res.add(newRow);
		}
	    } else
	    {
		List newRow = new ArrayList(row);
		List rightB = new ArrayList();
		int bSize = B == null ? 0 : B.size();
		int rolCols = bSize == 0 ? 0 : B.get(0).size();
		int indexbSize = indexBList == null ? 0 : indexBList.size();
		for (int k = 0; k < rolCols - indexbSize; ++k)
		{
		    rightB.add("-");
		}
		newRow.addAll(rightB);
		res.add(newRow);
	    }
	}
	System.out.println(A);
	System.out.println(B);
	return res;
    }

    /**
     *          
     * 
     * @param table
     * @param orderList
     */
    public static void groupByIdList(List> table,
				     int val)
    {
	SortedTable sort = new SortedTable(table, val);
    }

    /**
     *          
     * @param table 
     * @param orderList
     */
    public static List> countGroupBy(List> table,int sortCols,int colIndex)
    {
	List> newTable = new ArrayList>(table);
	
	SortedTable sort = new SortedTable(newTable,sortCols);
	int rowSize = newTable.size();
	for(int i = 0;i< rowSize;++i)
	{
	    
	}
	return newTable;
    }

    /**
     * 64 bit MurmurHash value
     * 
     * @param row String of list
     * @param idList multiKey of one line
     * @return the 64 bit of hash value
     */
    public static long getKeyHash(String key)
    {
	return MurmurHash.hash64(key);
    }

    /**
     * 64 bit MurmurHash value
     * 
     * @param row String of list
     * @param idList multiKey of one line
     * @return the 64 bit of hash value
     */
    public static long getLineMultiKeyHash(List row,
					   List idList)
    {
	StringBuilder sb = new StringBuilder();
	for (int i : idList)
	{
	    sb.append(row.get(i));
	    sb.append(separator);
	}
	return MurmurHash.hash64(sb.toString());
    }

}

以上は、mysqlの自然接続と左接続を実現するデータマトリクスの基本操作であり、まだ完了せず、更新を継続しています.
次にgroupbyを実現するためにデータマトリクスをマルチレベルソートする
package cn.cgh.table;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

/**
 *   table   (<=3)  
 * 
 * @author xiegonghai
 *
 */
public class SortedTable
{
    public List> mList;  
    public List>> mCmpList = new ArrayList>>();  
    public SortedTable(List> list,int val){  
        mList = list;
        if(val>=1)mCmpList.add(compareFirASC);  
        if(val>=2)mCmpList.add(compareSecASC);
        if(val>=3)mCmpList.add(compareThirASC);  
        sort(mList, mCmpList);  
    }  
    public void sort(List> list, final List>> comList) {  
        if (comList == null)  
            return;  
        Comparator> cmp = new Comparator>() {  
            public int compare(List o1, List o2) {  
                for (Comparator> comparator : comList) {  
                    if (comparator.compare(o1, o2) > 0) {  
                        return 1;  
                    } else if (comparator.compare(o1, o2) < 0) {  
                        return -1;  
                    }  
                }  
                return 0;  
            }  
        };  
        Collections.sort(list, cmp);  
    }  
  
    private Comparator> compareFirASC = new Comparator>() {  
  
        public int compare(List o1, List o2) {  
            return o1.get(0).compareTo(o2.get(0)); 
        }  
    };  
  
    private Comparator> compareSecASC = new Comparator>() {  
  
        public int compare(List o1, List o2) {  
            return o1.get(1).compareTo(o2.get(1));  
        }  
    };  
    
    private Comparator> compareThirASC = new Comparator>() {  
	  
        public int compare(List o1, List o2) {  
            return o1.get(2).compareTo(o2.get(2));  
        }  
    }; 
}

以下はmurmurhash実装クラス(hadoop,redis,nginxともに適用)であり,任意の長い文字列から一意の32,64ビット整数型へのマッピングを実現する.
package cn.cgh.table;

/**
 * 
 * @author reference to someone
 *
 */
public final class MurmurHash
{

    /**
     * Generates 32 bit hash from byte array of the given length and seed.
     * 
     * @param data byte array to hash
     * @param length length of the array to hash
     * @param seed initial seed value
     * @return 32 bit hash of the given array
     */
    public static int hash32(final byte[] data,
			     int length,
			     int seed)
    {
	// 'm' and 'r' are mixing constants generated offline.
	// They're not really 'magic', they just happen to work well.
	final int m = 0x5bd1e995;
	final int r = 24;
	// Initialize the hash to a random value
	int h = seed ^ length;
	int length4 = length / 4;

	for (int i = 0; i < length4; i++)
	{
	    final int i4 = i * 4;
	    int k = (data[i4 + 0] & 0xff) + ((data[i4 + 1] & 0xff) << 8) + ((data[i4 + 2] & 0xff) << 16)
		    + ((data[i4 + 3] & 0xff) << 24);
	    k *= m;
	    k ^= k >>> r;
	    k *= m;
	    h *= m;
	    h ^= k;
	}

	// Handle the last few bytes of the input array
	switch (length % 4)
	{
	case 3:
	    h ^= (data[(length & ~3) + 2] & 0xff) << 16;
	case 2:
	    h ^= (data[(length & ~3) + 1] & 0xff) << 8;
	case 1:
	    h ^= (data[length & ~3] & 0xff);
	    h *= m;
	}

	h ^= h >>> 13;
	h *= m;
	h ^= h >>> 15;

	return h;
    }

    /**
     * Generates 32 bit hash from byte array with default seed value.
     * 
     * @param data byte array to hash
     * @param length length of the array to hash
     * @return 32 bit hash of the given array
     */
    public static int hash32(final byte[] data,
			     int length)
    {
	return hash32(data, length, 0x9747b28c);
    }

    /**
     * Generates 32 bit hash from a string.
     * 
     * @param text string to hash
     * @return 32 bit hash of the given string
     */
    public static int hash32(final String text)
    {
	final byte[] bytes = text.getBytes();
	return hash32(bytes, bytes.length);
    }

    /**
     * Generates 32 bit hash from a substring.
     * 
     * @param text string to hash
     * @param from starting index
     * @param length length of the substring to hash
     * @return 32 bit hash of the given string
     */
    public static int hash32(final String text,
			     int from,
			     int length)
    {
	return hash32(text.substring(from, from + length));
    }

    /**
     * Generates 64 bit hash from byte array of the given length and seed.
     * 
     * @param data byte array to hash
     * @param length length of the array to hash
     * @param seed initial seed value
     * @return 64 bit hash of the given array
     */
    public static long hash64(final byte[] data,
			      int length,
			      int seed)
    {
	final long m = 0xc6a4a7935bd1e995L;
	final int r = 47;

	long h = (seed & 0xffffffffl) ^ (length * m);

	int length8 = length / 8;

	for (int i = 0; i < length8; i++)
	{
	    final int i8 = i * 8;
	    long k = ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8)
		    + (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24)
		    + (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40)
		    + (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56);

	    k *= m;
	    k ^= k >>> r;
	    k *= m;

	    h ^= k;
	    h *= m;
	}

	switch (length % 8)
	{
	case 7:
	    h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;
	case 6:
	    h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
	case 5:
	    h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
	case 4:
	    h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
	case 3:
	    h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
	case 2:
	    h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
	case 1:
	    h ^= (long) (data[length & ~7] & 0xff);
	    h *= m;
	}
	;

	h ^= h >>> r;
	h *= m;
	h ^= h >>> r;

	return h;
    }

    /**
     * Generates 64 bit hash from byte array with default seed value.
     * 
     * @param data byte array to hash
     * @param length length of the array to hash
     * @return 64 bit hash of the given string
     */
    public static long hash64(final byte[] data,
			      int length)
    {
	return hash64(data, length, 0xe17a1465);
    }

    /**
     * Generates 64 bit hash from a string.
     * 
     * @param text string to hash
     * @return 64 bit hash of the given string
     */
    public static long hash64(final String text)
    {
	final byte[] bytes = text.getBytes();
	return hash64(bytes, bytes.length);
    }

    /**
     * Generates 64 bit hash from a substring.
     * 
     * @param text string to hash
     * @param from starting index
     * @param length length of the substring to hash
     * @return 64 bit hash of the given array
     */
    public static long hash64(final String text,
			      int from,
			      int length)
    {
	return hash64(text.substring(from, from + length));
    }
}