Boost Semantic Actionでc/c++のトークンを切り出す


↑にwordcountの説明があるのだが、なかなかに高度なので(またはコンパイルできない)(*´Д`) 自作することにした。


int main
char* line = NULL;
size_t len = 0;

まずはこのようなサンプルからトークンを切り出すことを考える。。

文法定義は次のようになる。


    27    template
    28      struct definition
    29      {
    30          typedef rule rule_t;
    31          rule_t r; rule_t r_list;
    32          definition( const Abc& self )
    33          {
    34            r = (+alnum_p >> '_' >> alnum_p | +alnum_p >> '*' | *alnum_p )[MyAction()];
    35            r_list = r % +space_p;
    36          }
    37          const rule_t& start() const { return r_list; }
    38      };
    39};

+c++
size_t : (+alnum_p >> '_' >> alnum_p
+
++

+c++
char* : +alnum_p >> '*'
+
++

コードを見てみる。。。

 1#include <iostream>
 2#include <string>
 3#include <vector>
 4#include <fstream>
 5
 6#include <boost/spirit.hpp>
 7#include <boost/lambda/lambda.hpp>
 8#include <boost/lambda/bind.hpp>
 9using namespace std;
10using namespace boost::spirit;
11
12struct Abc : grammar<Abc>
13{
14    struct MyAction
15    {
16        template<typename Ite>
17          void operator()( Ite i1, Ite i2 ) const    18            {
19              if(i2 - i1 > 0)
20                {
21                  cout << "# of characters: " << i2 - i1 << endl
22                       << "string: " << string(i1,i2) << endl;
23                }
24            }
25    };
26
27    template<typename ScannerT>
28      struct definition
29      {
30          typedef rule<ScannerT> rule_t;
31          rule_t r; rule_t r_list;
32          definition( const Abc& self )
33          {
34            r = (+alnum_p >> '_' >> alnum_p | +alnum_p >> '*' | *alnum_p )[MyAction()];
35            r_list = r % +space_p;
36          }    
37          const rule_t& start() const { return r_list; }
38      };
40
41#include <typeinfo>
42int main(int argc, char* argv[]){
43    using namespace std;
44
45    if(argc != 2){
46        cerr << "wrong # of args." << endl;
47        cerr << "./spirit_file_read input.txt" << endl;
48        return 1;
49    }
50
51    ifstream ifs(argv[1], ios::in);
52    if(!ifs){
53        cerr << "Error: file not opened." << endl;
54        return 1;
55    }
56
57    string tmp;
58    string str;
59
60    int counter = 0;
61
62    Abc parser;
63
64    while(getline(ifs, tmp)){
65
66        cout << "line:" << counter << ":" << tmp << endl;
67
68       if( parse( tmp.begin(), tmp.end(), Abc() ).full );
69       cout << endl;
70
71       counter++;
72    }
73
74    ifs.close();
75    return 0;
76}

実行してみる。。。


$ g++ spirit_file_read_3.cpp -lboost_system
In file included from spirit_file_read_3.cpp:6:0:
/usr/include/boost/spirit.hpp:18:4: warning: #warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp" [-Wcpp]
 #  warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp"
    ^~~~~~~
$ ./a.out sample.c 
line:0:int main
# of characters: 3
string: int
# of characters: 4
string: main

line:1:char* line = NULL;
# of characters: 5
string: char*
# of characters: 4
string: line

line:2:size_t len = 0;
# of characters: 6
string: size_t
# of characters: 3
string: len

(`ー´)b

これをruleにまとめていけば、なんとかなるのではないだらうか?