Boyer-Moore実装

5977 ワード

#include <iostream>
#include <vector>
#include <algorithm>
#include <set>
#include <string>
#include <cstring>

namespace patternMatch{
using namespace std;

class  boyerMoore{
public:
        boyerMoore(void):skip(NULL), shift(NULL), charMax(256){}
        int bmMake(const string &patternStr){
            if( shiftMake(patternStr) < 0 || skipMake(patternStr) < 0)
                    return -1;
            pattern = patternStr;
        }
        ~boyerMoore(void){
            bmDesory();
        }
        const int match(const string &target) const{
                if(pattern.empty() || !shift || !skip || target.size() > pattern.size())
                        return -1;
                int targetLen = target.size();
                int patternLen = pattern.size();
                int i;
                int j = 0;
                while( j <= targetLen - patternLen){
                    for(i = patternLen -1; i >= 0 && target[i + j] == pattern[i]; i--);
                            if(i < 0 )
                                return j;
                            else
                                j =  max(shift[i], skip[pattern[i]] - (patternLen - 1 - i));
                }
        }
        virtual void bmDebug(){
            cout << "pattern string :" << pattern << endl;
            cout << "skip vec :" << endl;
            skipDebug();
            cout << "shift vec:" << endl;
            shiftDebug();
        }
private:
        int skipMake(const string &patternStr);
         /**build assit vec used like kmp*/
        void suffixMake(const string &pattern, int *suffix);
        int shiftMake(const string &patternStr);
        void skipDestory(void) {
            if(skip)
                delete skip;
        }
        void shiftDestory(void) {
            if(shift)
                delete shift;
        }
        void bmDesory(void){
            skipDestory();
            shiftDestory();
            pattern.clear();
        }
        virtual void skipDebug(void){
            if(skip){
                for(int pos = 0; pos < pattern.size(); pos++)
                    cout << skip[pattern[pos]] << " ";
                cout << endl;
            }
        }
        virtual void shiftDebug(void){
            if(shift){
                for(int pos = 0; pos < pattern.size(); pos++)
                    cout << shift[pos] << " ";
                cout << endl;
            }
        }
        int *skip;
        int *shift;
        string pattern;
        const int charMax;
};

int boyerMoore::skipMake(const string &patternStr){
    int pos;
    int patternLen = patternStr.size();
    skipDestory();
    if( NULL == (skip = new int[charMax]))
            return -1;
    else
        memset(skip, 0, charMax);

    for(pos = 0; pos < charMax; pos++)
            skip[pos] = patternLen;
    for(pos = 0; pos <= patternLen - 1; pos++)
            skip[(unsigned char)patternStr[pos]] = patternLen - pos - 1;
    return 0;
}


void boyerMoore::suffixMake(const string &pattern, int *suffix)
{
    int patterLen = pattern.size();
    suffix[patterLen - 1] = patterLen;
    for(int posMatch = patterLen - 2; posMatch >= 0; posMatch--){
            int posCheck = posMatch;
            while(posCheck >= 0 && pattern[posCheck] == pattern[patterLen - 1 - (posMatch - posCheck)]){
                    posCheck--;
            }
            suffix[posMatch] = posMatch - posCheck;
            //cout << posMatch << "-suffix->" << suffix[posMatch] << endl;
    }
}

int boyerMoore::shiftMake(const string &patternStr){
            int pos;
            int patternLen = patternStr.size();
            shiftDestory();
            if( NULL == (shift = new int[patternLen]))
                return -1;
            else
                memset(shift, 0, sizeof(int) * patternLen);

            int *suffix = new int[patternLen];
            if(!suffix)
                return -1;
            else
                memset(suffix, 0, sizeof(int) * patternLen);
            /** make suffix for shift*/
            suffixMake(patternStr, suffix);

            for(int pos = 0; pos < patternLen; pos++)
                    shift[pos] = patternLen;

            int posMark = 0;
            for(int pos = patternLen - 1; pos >= 0; pos--){
                    if(suffix[pos] == pos + 1){
                            int posVal  = patternLen - pos - 1;
                            for(; posMark < posVal; posMark++)
                                if( patternLen == shift[posMark]){
                                    shift[posMark] = posVal;
                                    //cout << posMark << "->" << shift[posMark] << endl;
                            }
                    }
            }
            for(int pos = 0; pos < patternLen -1; pos++)
                    shift[patternLen - suffix[pos] - 1] = patternLen - pos -1;
    if(suffix)
        delete suffix;
    return 0;
}

}

using namespace std;
using namespace patternMatch;

int main()
{
    string pattern, target;
    class boyerMoore bm;
    cout << "intput pattern match string" << endl;
    cin >> pattern;
    if(bm.bmMake(pattern) < 0){
        cout << "Init pattern string fail" << endl;
        return -1;
    }
    bm.bmDebug();
    cout << "intput target  string" << endl;
    return 0;
}

参考記事

  • http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
  • http://wenku.baidu.com/link?url=-GK6uL1f_nvlHtZ3KNXfY_9ydgOTijIXoIapJBweNISBW_pZIvKKS6sshbpELw99VumsxKgxXYur8qRSHxvuN7ub5OiSXY9yPAOEm63NMju###

  • IN BUILDING