Javaテーブルはmysqlを模倣して基本操作(接続、パケットソート、統計など)を実現する
15037 ワード
package cn.cgh.table;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
*
* @author xiegonghai
*
*/
public class TableOperation
{
/**
* the separator of multiKey
*/
public static String separator = "@&";
/**
* Join A and B according to one column
*
* @param indexA
* @param indexB
* @return New table after innerJoin
*/
public static List> innerJoin(List> A,
List> B,
int indexA,
int indexB)
{
List> res = new ArrayList>();
// colMap B
Map> colMap = new HashMap>();
int i = 0;
for (List row : B)
{
String key = row.get(indexB);
if (colMap.containsKey(key))
{
colMap.get(key).add(i);
} else
{
ArrayList arrTmp = new ArrayList();
arrTmp.add(i);
colMap.put(key, arrTmp);
}
i = i + 1;
}
// A
for (List row : A)
{
String tarStr = row.get(indexA);
if (colMap.containsKey(tarStr))
{
ArrayList idList = colMap.get(tarStr);
for (Integer id : idList)
{
// deep copy
List newRow = new ArrayList(row);
Set sList = new HashSet();
sList.add(indexB);
List rightB = getRowExcept(B.get(i), sList);
newRow.addAll(rightB);
res.add(newRow);
}
}
}
System.out.println(A);
System.out.println(B);
return res;
}
/**
* Join A and B according to multiColumn
*
* @param indexAList id list of table A
* @param indexBList id list of table B
* @return New table after innerJoin
*/
public static List> innerJoin(List> A,
List> B,
List indexAList,
List indexBList)
{
List> res = new ArrayList>();
// colMap B
Map> colMap = new HashMap>();
int i = 0;
for (List row : B)
{
long murmurHashKeyB = getLineMultiKeyHash(row, indexBList);
if (colMap.containsKey(murmurHashKeyB))
{
colMap.get(murmurHashKeyB).add(i);
} else
{
ArrayList arrTmp = new ArrayList();
arrTmp.add(i);
colMap.put(murmurHashKeyB, arrTmp);
}
i = i + 1;
}
// A
for (List row : A)
{
long murmurHashValA = getLineMultiKeyHash(row, indexAList);
if (colMap.containsKey(murmurHashValA))
{
ArrayList idList = colMap.get(murmurHashValA);
for (Integer id : idList)
{
List newRow = new ArrayList(row);
Set sList = new HashSet();
sList.addAll(indexBList);
List rightB = getRowExcept(B.get(id), sList);
newRow.addAll(rightB);
res.add(newRow);
}
}
}
System.out.println(A);
System.out.println(B);
return res;
}
/**
*
* @param row One line data
* @param idList Clear the data that belong to idList
* @return
*/
public static List getRowExcept(List row,
Set idList)
{
List newRow = new ArrayList();
int size = row.size();
for (int i = 0; i < size; ++i)
{
if (!idList.contains(i))
{
newRow.add(row.get(i));
}
}
return newRow;
}
/**
* Join A and B according to multiColumn
*
* @param indexAList id list of table A
* @param indexBList id list of table B
* @return New table after innerJoin
*/
public static List> leftJoin(List> A,
List> B,
List indexAList,
List indexBList)
{
List> res = new ArrayList>();
// colMap B
Map> colMap = new HashMap>();
int i = 0;
for (List row : B)
{
long murmurHashKeyB = getLineMultiKeyHash(row, indexBList);
if (colMap.containsKey(murmurHashKeyB))
{
colMap.get(murmurHashKeyB).add(i);
} else
{
ArrayList arrTmp = new ArrayList();
arrTmp.add(i);
colMap.put(murmurHashKeyB, arrTmp);
}
i = i + 1;
}
// A
for (List row : A)
{
long murmurHashValA = getLineMultiKeyHash(row, indexAList);
if (colMap.containsKey(murmurHashValA))
{
ArrayList idList = colMap.get(murmurHashValA);
for (Integer id : idList)
{
List newRow = new ArrayList(row);
Set sList = new HashSet();
sList.addAll(indexBList);
List rightB = getRowExcept(B.get(id), sList);
newRow.addAll(rightB);
res.add(newRow);
}
} else
{
List newRow = new ArrayList(row);
List rightB = new ArrayList();
int bSize = B == null ? 0 : B.size();
int rolCols = bSize == 0 ? 0 : B.get(0).size();
int indexbSize = indexBList == null ? 0 : indexBList.size();
for (int k = 0; k < rolCols - indexbSize; ++k)
{
rightB.add("-");
}
newRow.addAll(rightB);
res.add(newRow);
}
}
System.out.println(A);
System.out.println(B);
return res;
}
/**
*
*
* @param table
* @param orderList
*/
public static void groupByIdList(List> table,
int val)
{
SortedTable sort = new SortedTable(table, val);
}
/**
*
* @param table
* @param orderList
*/
public static List> countGroupBy(List> table,int sortCols,int colIndex)
{
List> newTable = new ArrayList>(table);
SortedTable sort = new SortedTable(newTable,sortCols);
int rowSize = newTable.size();
for(int i = 0;i< rowSize;++i)
{
}
return newTable;
}
/**
* 64 bit MurmurHash value
*
* @param row String of list
* @param idList multiKey of one line
* @return the 64 bit of hash value
*/
public static long getKeyHash(String key)
{
return MurmurHash.hash64(key);
}
/**
* 64 bit MurmurHash value
*
* @param row String of list
* @param idList multiKey of one line
* @return the 64 bit of hash value
*/
public static long getLineMultiKeyHash(List row,
List idList)
{
StringBuilder sb = new StringBuilder();
for (int i : idList)
{
sb.append(row.get(i));
sb.append(separator);
}
return MurmurHash.hash64(sb.toString());
}
}
以上は、mysqlの自然接続と左接続を実現するデータマトリクスの基本操作であり、まだ完了せず、更新を継続しています.
次にgroupbyを実現するためにデータマトリクスをマルチレベルソートする
package cn.cgh.table;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
/**
* table (<=3)
*
* @author xiegonghai
*
*/
public class SortedTable
{
public List> mList;
public List>> mCmpList = new ArrayList>>();
public SortedTable(List> list,int val){
mList = list;
if(val>=1)mCmpList.add(compareFirASC);
if(val>=2)mCmpList.add(compareSecASC);
if(val>=3)mCmpList.add(compareThirASC);
sort(mList, mCmpList);
}
public void sort(List> list, final List>> comList) {
if (comList == null)
return;
Comparator> cmp = new Comparator>() {
public int compare(List o1, List o2) {
for (Comparator> comparator : comList) {
if (comparator.compare(o1, o2) > 0) {
return 1;
} else if (comparator.compare(o1, o2) < 0) {
return -1;
}
}
return 0;
}
};
Collections.sort(list, cmp);
}
private Comparator> compareFirASC = new Comparator>() {
public int compare(List o1, List o2) {
return o1.get(0).compareTo(o2.get(0));
}
};
private Comparator> compareSecASC = new Comparator>() {
public int compare(List o1, List o2) {
return o1.get(1).compareTo(o2.get(1));
}
};
private Comparator> compareThirASC = new Comparator>() {
public int compare(List o1, List o2) {
return o1.get(2).compareTo(o2.get(2));
}
};
}
以下はmurmurhash実装クラス(hadoop,redis,nginxともに適用)であり,任意の長い文字列から一意の32,64ビット整数型へのマッピングを実現する.
package cn.cgh.table;
/**
*
* @author reference to someone
*
*/
public final class MurmurHash
{
/**
* Generates 32 bit hash from byte array of the given length and seed.
*
* @param data byte array to hash
* @param length length of the array to hash
* @param seed initial seed value
* @return 32 bit hash of the given array
*/
public static int hash32(final byte[] data,
int length,
int seed)
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
final int m = 0x5bd1e995;
final int r = 24;
// Initialize the hash to a random value
int h = seed ^ length;
int length4 = length / 4;
for (int i = 0; i < length4; i++)
{
final int i4 = i * 4;
int k = (data[i4 + 0] & 0xff) + ((data[i4 + 1] & 0xff) << 8) + ((data[i4 + 2] & 0xff) << 16)
+ ((data[i4 + 3] & 0xff) << 24);
k *= m;
k ^= k >>> r;
k *= m;
h *= m;
h ^= k;
}
// Handle the last few bytes of the input array
switch (length % 4)
{
case 3:
h ^= (data[(length & ~3) + 2] & 0xff) << 16;
case 2:
h ^= (data[(length & ~3) + 1] & 0xff) << 8;
case 1:
h ^= (data[length & ~3] & 0xff);
h *= m;
}
h ^= h >>> 13;
h *= m;
h ^= h >>> 15;
return h;
}
/**
* Generates 32 bit hash from byte array with default seed value.
*
* @param data byte array to hash
* @param length length of the array to hash
* @return 32 bit hash of the given array
*/
public static int hash32(final byte[] data,
int length)
{
return hash32(data, length, 0x9747b28c);
}
/**
* Generates 32 bit hash from a string.
*
* @param text string to hash
* @return 32 bit hash of the given string
*/
public static int hash32(final String text)
{
final byte[] bytes = text.getBytes();
return hash32(bytes, bytes.length);
}
/**
* Generates 32 bit hash from a substring.
*
* @param text string to hash
* @param from starting index
* @param length length of the substring to hash
* @return 32 bit hash of the given string
*/
public static int hash32(final String text,
int from,
int length)
{
return hash32(text.substring(from, from + length));
}
/**
* Generates 64 bit hash from byte array of the given length and seed.
*
* @param data byte array to hash
* @param length length of the array to hash
* @param seed initial seed value
* @return 64 bit hash of the given array
*/
public static long hash64(final byte[] data,
int length,
int seed)
{
final long m = 0xc6a4a7935bd1e995L;
final int r = 47;
long h = (seed & 0xffffffffl) ^ (length * m);
int length8 = length / 8;
for (int i = 0; i < length8; i++)
{
final int i8 = i * 8;
long k = ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8)
+ (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24)
+ (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40)
+ (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56);
k *= m;
k ^= k >>> r;
k *= m;
h ^= k;
h *= m;
}
switch (length % 8)
{
case 7:
h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;
case 6:
h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
case 5:
h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
case 4:
h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
case 3:
h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
case 2:
h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
case 1:
h ^= (long) (data[length & ~7] & 0xff);
h *= m;
}
;
h ^= h >>> r;
h *= m;
h ^= h >>> r;
return h;
}
/**
* Generates 64 bit hash from byte array with default seed value.
*
* @param data byte array to hash
* @param length length of the array to hash
* @return 64 bit hash of the given string
*/
public static long hash64(final byte[] data,
int length)
{
return hash64(data, length, 0xe17a1465);
}
/**
* Generates 64 bit hash from a string.
*
* @param text string to hash
* @return 64 bit hash of the given string
*/
public static long hash64(final String text)
{
final byte[] bytes = text.getBytes();
return hash64(bytes, bytes.length);
}
/**
* Generates 64 bit hash from a substring.
*
* @param text string to hash
* @param from starting index
* @param length length of the substring to hash
* @return 64 bit hash of the given array
*/
public static long hash64(final String text,
int from,
int length)
{
return hash64(text.substring(from, from + length));
}
}