BloomFilter_ブロンフィルタ
18369 ワード
ヘッダファイル Common.h BitMap.h BloomFilter.h Common.cn BitMap.c BloomFilter.cn
#pragma once
#ifndef _COMMON_H_
#define _COMMON_H_
#define size_t unsigned long
size_t BKDRHash(const char *str);
size_t SDBMHash(const char *str);
size_t RSHash(const char *str);
size_t APHash(const char *str);
size_t JSHash(const char *str);
#endif //!_COMMON_H_
#pragma once
#ifndef _BITMAP_H_
#define _BITMAP_H_
typedef unsigned long size_t;
typedef struct HashBitMap
{
size_t* _BitMap;
size_t _size;
size_t _capacity;
}BitMap,*pBitMap;
void InitBitMap(pBitMap bm, size_t size);
int InsertBitMap(pBitMap bm, size_t data);
int FindBitMap(pBitMap bm, size_t data);
void Set(pBitMap bm, size_t seat, size_t num);// 1
void ReSet(pBitMap bm, size_t seat, size_t num);// 0
size_t SizeBitMap(BitMap* bmp);
size_t CountBitMap(BitMap* bmp);
void DestroyBitMap(BitMap* bmp);
#endif//!_BITMAP_H_
#pragma once
#ifndef _BLOOMFILTER_H_
#define _BLOOMFILRR_H_
#include"BitMap.h"
#include"Common.h"
typedef char* DataType;
typedef size_t(*PHF)(DataType);
#define FUNCNUM 5
typedef struct BloomFilter
{
BitMap _bmp;
PHF _HashFunc[FUNCNUM];
size_t _size;
}BF;
void InitBloomFilter(BF* bf, PHF hashFunc[FUNCNUM], size_t size);
int InsertBF(BF* bf, DataType key);
int IsInBloomFilter(BF* bf, DataType key);
void DestroyBloomFilter(BF* bf);
#endif // ! _BLOOMFILTER_H_
ソースファイル
#include"Common.h"
size_t BKDRHash(const char *str)
{
register size_t hash = 0;
size_t ch;
while (ch = (size_t)*str++)
{
hash = hash * 131 + ch; // 31、131、1313、13131、131313..
// , :hash = hash << 7 + hash << 1 + hash + ch;
// Intel ,CPU ,
// 100 , 0( Debug , 1/3);
// ARM RISC , ARM Booth's Algorithm 32 , :
// 8-31 1 0 , 1
// 16-31 1 0 , 2
// 24-31 1 0 , 3
// , 4
// , ,
}
return hash;
}
/// @brief SDBM Hash Function
/// @detail SDBM( ) , BKDRHash , 。
size_t SDBMHash(const char *str)
{
register size_t hash = 0;
size_t ch;
while (ch = (size_t)*str++)
{
hash = 65599 * hash + ch;
//hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;
}
return hash;
}
/// @brief RS Hash Function
/// @detail Robert Sedgwicks 《Algorithms in C》 。
size_t RSHash(const char *str)
{
register size_t hash = 0;
size_t magic = 63689;
size_t ch;
while (ch = (size_t)*str++)
{
hash = hash * magic + ch;
magic *= 378551;
}
return hash;
}
/// @brief AP Hash Function
/// @detail Arash Partow hash 。
size_t APHash(const char *str)
{
register size_t hash = 0;
size_t ch;
for (long i = 0; ch = (size_t)*str++; i++)
{
if ((i & 1) == 0)
{
hash ^= ((hash << 7) ^ ch ^ (hash >> 3));
}
else
{
hash ^= (~((hash << 11) ^ ch ^ (hash >> 5)));
}
}
return hash;
}
/// @brief JS Hash Function
/// Justin Sobel hash 。
size_t JSHash(const char *str)
{
if (!*str) // , 0
return 0;
register size_t hash = 1315423911;
size_t ch = 0;;
while (ch = (size_t)*str++)
{
hash ^= ((hash << 5) + ch + (hash >> 2));
}
return hash;
}
#include
#include
#include
#include"BitMap.h"
char* Cou = "\0\1\1\2\1\2\2\3";
void InitBitMap(pBitMap bm, size_t size)
{
bm->_capacity = (size >> 5) + 1;
bm->_size = 0;
bm->_BitMap = (size_t*)malloc(sizeof(size_t) * bm->_capacity);
assert(bm->_BitMap);
for (int i = 0; i < bm->_capacity; i++)
bm->_BitMap[i] = 0;
}
int InsertBitMap(pBitMap bm, size_t data)
{
if (FindBitMap(bm, data) == 1)
return 0;
size_t Addr = data >> 5;
size_t bit = data % 32;
Set(bm, Addr, bit);
bm->_size++;
return 1;
}
void Set(pBitMap bm, size_t Addr, size_t bit)// 1
{
bm->_BitMap[Addr] |= 1 << bit;
}
void ReSet(pBitMap bm, size_t Addr, size_t bit)// 0
{
bm->_BitMap[Addr] &= ~(1 << bit);
}
int FindBitMap(pBitMap bm, size_t data)
{
int Addr = data >> 5;
if (Addr >= bm->_capacity)
return 0;
int bit = data % 32;
return ((bm->_BitMap[Addr] & (1 << bit)) != 0);
}
size_t SizeBitMap(BitMap* bm)
{
assert(bm);
return bm->_size;
}
size_t CountBitMap(BitMap* bm)
{
assert(bm);
char* Bit4 = NULL;
size_t count = 0;
for (size_t i = 0; i < bm->_capacity; i++)
{
Bit4 = (char*)&bm->_BitMap[i];
int j = 0;
while (j < 4)
{
size_t num = *Bit4 & 7;
count += Cou[num];
num = *Bit4 >> 4;
count += Cou[num];
Bit4 ++;
j++;
}
}
return count;
}
void DestroyBitMap(BitMap* bm)
{
assert(bm);
free(bm->_BitMap);
bm->_size = 0;
bm->_capacity = 0;
}
#include
#include
#include"BloomFilter.h"
PHF Func[FUNCNUM] = { BKDRHash,SDBMHash,RSHash,APHash,JSHash };
void InitBloomFilter(BF* bf, PHF hashFunc[FUNCNUM], size_t size)
{
assert(bf);
InitBitMap(&bf->_bmp, size);
for(int i =0; i_HashFunc[i] = hashFunc[i];
bf->_size = 0;
}
int InsertBF(BF* bf, DataType key)
{
assert(bf);
size_t Addr[5] = {0};
int flag = 1;
size_t Max_num = bf->_bmp._capacity * 32;
for (int i = 0; i < FUNCNUM; i++)
{
Addr[i] = bf->_HashFunc[i](key);
if (Addr[i] >= Max_num)
Addr[i] %= Max_num;
}
for (int i = 0; i < FUNCNUM; i++)
{
if (InsertBitMap(&bf->_bmp._BitMap, Addr[i]))
flag = 1;
}
bf->_size += flag;
}
int IsInBloomFilter(BF* bf, DataType key)
{
assert(bf);
size_t Addr[FUNCNUM];
size_t Max_num = bf->_bmp._capacity * 32;
for (int i = 0; i < FUNCNUM; i++)
{
Addr[i] = bf->_HashFunc[i](key);
if (Addr[i] >= Max_num)
Addr[i] %= Max_num;
}
for (int i = 0; i < FUNCNUM; i++)
{
if (0 == FindBitMap(&bf->_bmp, Addr[i]))
return 0;
}
return 1;
}
void DestroyBloomFilter(BF* bf)
{
assert(bf);
DestroyBitMap(&bf->_bmp);
bf->_size = 0;
}
void test()
{
BF bf;
InitBloomFilter(&bf, Func, 1000);
InsertBF(&bf, " ");
InsertBF(&bf, " ");
InsertBF(&bf, " ");
InsertBF(&bf, " ");
if (1 == IsInBloomFilter(&bf, " "))
printf("Is In BlooFilter!!!!
");
else
printf("Is Not In BlooFilter!!!!
");
if (1 == IsInBloomFilter(&bf, " "))
printf("Is In BlooFilter!!!!
");
else
printf("Is Not In BlooFilter!!!!
");
if (1 == IsInBloomFilter(&bf, " "))
printf("Is In BlooFilter!!!!
");
else
printf("Is Not In BlooFilter!!!!
");
if (1 == IsInBloomFilter(&bf, " "))
printf("Is In BlooFilter!!!!
");
else
printf("Is Not In BlooFilter!!!!
");
if (1 == IsInBloomFilter(&bf, " "))
printf("Is In BlooFilter!!!!
");
else
printf("Is Not In BlooFilter!!!!
");
DestroyBloomFilter(&bf);
}
int main()
{
test();
system("pause");
return 0;
}