BloomFilter_ブロンフィルタ

18369 ワード

ヘッダファイル
  • Common.h
  • #pragma once
    
    #ifndef _COMMON_H_
    #define _COMMON_H_
    
    #define size_t unsigned long 
    
    size_t BKDRHash(const char *str);
    
    size_t SDBMHash(const char *str);
    
    size_t RSHash(const char *str);
    
    size_t APHash(const char *str);
    
    size_t JSHash(const char *str);
    
    
    
    #endif //!_COMMON_H_
    
  • BitMap.h
  • #pragma once
    
    
    #ifndef _BITMAP_H_
    #define _BITMAP_H_
    
    typedef  unsigned long  size_t;
    
    typedef struct  HashBitMap
    {
        size_t* _BitMap;
        size_t _size;
        size_t _capacity;
    }BitMap,*pBitMap;
    
    
    void InitBitMap(pBitMap bm, size_t size);
    int InsertBitMap(pBitMap bm, size_t data);
    int FindBitMap(pBitMap bm, size_t data);
    
    void Set(pBitMap bm, size_t seat, size_t num);//    1
    void ReSet(pBitMap bm, size_t seat, size_t num);//    0
    
    size_t SizeBitMap(BitMap* bmp);
    size_t CountBitMap(BitMap* bmp);
    void DestroyBitMap(BitMap* bmp);
    
    
    #endif//!_BITMAP_H_
  • BloomFilter.h
  • #pragma once
    #ifndef  _BLOOMFILTER_H_
    #define  _BLOOMFILRR_H_
    
    #include"BitMap.h"
    #include"Common.h"
    
    typedef char* DataType;
    typedef size_t(*PHF)(DataType);
    #define FUNCNUM 5 
    
    
    typedef struct BloomFilter
    {
        BitMap _bmp;
        PHF _HashFunc[FUNCNUM];
        size_t _size;
    }BF;
    
    void InitBloomFilter(BF* bf, PHF hashFunc[FUNCNUM], size_t size);
    int InsertBF(BF* bf, DataType key);
    int IsInBloomFilter(BF* bf, DataType key);
    void DestroyBloomFilter(BF* bf);
    
    #endif // ! _BLOOMFILTER_H_
    
    
    ソースファイル
  • Common.cn
  • 
    #include"Common.h"
    
    
    size_t BKDRHash(const char *str)
    {
        register size_t hash = 0;
        size_t ch;
        while (ch = (size_t)*str++)
        {
            hash = hash * 131 + ch;   //      31、131、1313、13131、131313..  
                                      //                       ,       :hash = hash << 7 + hash << 1 + hash + ch;  
                                      //     Intel   ,CPU                ,  
                                      //       100         ,           0(   Debug ,             1/3);  
                                      //  ARM  RISC        ,  ARM    Booth's Algorithm   32       ,         :  
                                      //    8-31   1 0 ,  1       
                                      //    16-31   1 0 ,  2       
                                      //    24-31   1 0 ,  3       
                                      //   ,  4       
                                      //   ,         ,                          
        }
        return hash;
    }
    /// @brief SDBM Hash Function  
    /// @detail            SDBM(          )       ,  BKDRHash    ,        。  
    
    size_t SDBMHash(const char *str)
    {
        register size_t hash = 0;
        size_t ch;
        while (ch = (size_t)*str++)
        {
            hash = 65599 * hash + ch;
            //hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;  
        }
        return hash;
    }
    /// @brief RS Hash Function  
    /// @detail  Robert Sedgwicks  《Algorithms in C》        。  
    
    size_t RSHash(const char *str)
    {
        register size_t hash = 0;
        size_t magic = 63689;
        size_t ch;
        while (ch = (size_t)*str++)
        {
            hash = hash * magic + ch;
            magic *= 378551;
        }
        return hash;
    }
    /// @brief AP Hash Function  
    /// @detail  Arash Partow     hash  。  
    size_t APHash(const char *str)
    {
        register size_t hash = 0;
        size_t ch;
        for (long i = 0; ch = (size_t)*str++; i++)
        {
            if ((i & 1) == 0)
            {
                hash ^= ((hash << 7) ^ ch ^ (hash >> 3));
            }
            else
            {
                hash ^= (~((hash << 11) ^ ch ^ (hash >> 5)));
            }
        }
        return hash;
    }
    /// @brief JS Hash Function  
    ///  Justin Sobel     hash  。  
    
    size_t JSHash(const char *str)
    {
        if (!*str)        //        ,            0  
            return 0;
        register size_t hash = 1315423911;
        size_t ch = 0;;
        while (ch = (size_t)*str++)
        {
            hash ^= ((hash << 5) + ch + (hash >> 2));
        }
        return hash;
    }
  • BitMap.c
  • 
    #include
    #include
    #include
    #include"BitMap.h"
    
    char* Cou = "\0\1\1\2\1\2\2\3";
    
    void InitBitMap(pBitMap bm, size_t size)
    {
        bm->_capacity = (size >> 5) + 1;
    
        bm->_size = 0;
    
        bm->_BitMap = (size_t*)malloc(sizeof(size_t) * bm->_capacity);
    
        assert(bm->_BitMap);
    
        for (int i = 0; i < bm->_capacity; i++)
        bm->_BitMap[i] = 0;
    
    }
    
    int InsertBitMap(pBitMap bm, size_t data)
    {
        if (FindBitMap(bm, data) == 1)
            return 0;
    
        size_t Addr = data >> 5;
    
        size_t bit = data % 32;
    
        Set(bm, Addr, bit);
    
        bm->_size++;
    
        return 1;
    }
    
    void Set(pBitMap bm, size_t Addr, size_t bit)//    1
    {
        bm->_BitMap[Addr] |= 1 << bit;
    }
    
    void ReSet(pBitMap bm, size_t Addr, size_t bit)//    0
    {
        bm->_BitMap[Addr] &= ~(1 << bit);
    }
    
    int FindBitMap(pBitMap bm, size_t data)
    {
        int Addr = data >> 5;
    
        if (Addr >= bm->_capacity)
            return 0;
    
        int bit = data % 32;
    
        return ((bm->_BitMap[Addr] & (1 << bit)) != 0);
    }
    
    size_t SizeBitMap(BitMap* bm)
    {
        assert(bm);
    
        return bm->_size;
    }
    size_t CountBitMap(BitMap* bm)
    {
        assert(bm);
    
        char* Bit4 = NULL;
        size_t count = 0;
        for (size_t i = 0; i < bm->_capacity; i++)
        {
            Bit4 = (char*)&bm->_BitMap[i];
    
            int j = 0;
            while (j < 4)
            {
                size_t num = *Bit4 & 7;
                count += Cou[num];
                num = *Bit4 >> 4;
                count += Cou[num];
                Bit4 ++;
                j++;
            }
        }
    
        return count;
    }
    void DestroyBitMap(BitMap* bm)
    {
        assert(bm);
    
        free(bm->_BitMap);
    
        bm->_size = 0;
    
        bm->_capacity = 0;
    }
    
  • BloomFilter.cn
  • #include
    #include
    #include"BloomFilter.h"
    
    PHF Func[FUNCNUM] = { BKDRHash,SDBMHash,RSHash,APHash,JSHash };
    
    void InitBloomFilter(BF* bf, PHF hashFunc[FUNCNUM], size_t size)
    {
        assert(bf);
    
        InitBitMap(&bf->_bmp, size);
    
        for(int i =0; i_HashFunc[i] = hashFunc[i];
    
        bf->_size = 0;
    }
    
    int InsertBF(BF* bf, DataType key)
    {
        assert(bf);
    
        size_t Addr[5] = {0};
        int flag = 1;
    
        size_t Max_num = bf->_bmp._capacity * 32;
    
        for (int i = 0; i < FUNCNUM; i++)
        {
            Addr[i] = bf->_HashFunc[i](key);
            if (Addr[i] >= Max_num)
                Addr[i] %= Max_num;
        }
    
        for (int i = 0; i < FUNCNUM; i++)
        {
            if (InsertBitMap(&bf->_bmp._BitMap, Addr[i]))
                flag = 1;
        }
    
        bf->_size += flag;
    
    }
    
    int IsInBloomFilter(BF* bf, DataType key)
    {
    
        assert(bf);
    
        size_t Addr[FUNCNUM];
    
        size_t Max_num = bf->_bmp._capacity * 32;
    
    
        for (int i = 0; i < FUNCNUM; i++)
        {
            Addr[i] = bf->_HashFunc[i](key);
            if (Addr[i] >= Max_num)
                Addr[i] %= Max_num;
        }
    
        for (int i = 0; i < FUNCNUM; i++)
        {
            if (0 == FindBitMap(&bf->_bmp, Addr[i]))
                return 0;
        }
        return 1;
    }
    
    void DestroyBloomFilter(BF* bf)
    {
        assert(bf);
    
        DestroyBitMap(&bf->_bmp);
    
        bf->_size = 0;
    }
    
    
    void test()
    {
        BF bf;
    
        InitBloomFilter(&bf, Func, 1000);
    
        InsertBF(&bf, "  ");
        InsertBF(&bf, "  ");
        InsertBF(&bf, "  ");
        InsertBF(&bf, "  ");
    
    
        if (1 == IsInBloomFilter(&bf, "  "))
            printf("Is In BlooFilter!!!!
    "
    ); else printf("Is Not In BlooFilter!!!!
    "
    ); if (1 == IsInBloomFilter(&bf, " ")) printf("Is In BlooFilter!!!!
    "
    ); else printf("Is Not In BlooFilter!!!!
    "
    ); if (1 == IsInBloomFilter(&bf, " ")) printf("Is In BlooFilter!!!!
    "
    ); else printf("Is Not In BlooFilter!!!!
    "
    ); if (1 == IsInBloomFilter(&bf, " ")) printf("Is In BlooFilter!!!!
    "
    ); else printf("Is Not In BlooFilter!!!!
    "
    ); if (1 == IsInBloomFilter(&bf, " ")) printf("Is In BlooFilter!!!!
    "
    ); else printf("Is Not In BlooFilter!!!!
    "
    ); DestroyBloomFilter(&bf); } int main() { test(); system("pause"); return 0; }