Boost Semantic ActionでCSVファイル中のIPv4のアドレスを取り出す


Boost Semantic ActionでCSVファイル中のIPv4のアドレスを取り出す方法を考えてみる。。

ルールとアクションを書く。。。

13struct AddrParse : grammar<AddrParse>
14{
15    struct MyAction
16    {
17        template<typename Ite>
18          void operator()( Ite i1, Ite i2 ) const    
19            { cout << "文字数:" << i2 - i1 << endl
20                   << " 内容:" << string(i1,i2) << endl; }
21    };
22
23    template<typename ScannerT>
24      struct definition
25      {
26          typedef rule<ScannerT> rule_t;
27          rule_t r;
28          definition( const AddrParse& self )
29          {
30            // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';                                                                                                                            
31            r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()]; // >> +( '*' >> int_p );                                                                         
32          }
33          const rule_t& start() const { return r; }
34      };
35};

31行目で 整数 . 整数 . 整数 . 整数 にマッチしたらMyActionを呼び出す。

+c++
31 r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()];
+

コードを見てみる。。。

 1#include <iostream>
 2#include <fstream>
 3#include <sstream>
 4#include <string>
 5#include <boost/spirit.hpp>
 6#include <boost/tokenizer.hpp>
 7
 8using namespace std;
 9using namespace boost::spirit;
10
11std::vector<string> v;
12
13struct AddrParse : grammar<AddrParse>
14{
15    struct MyAction
16    {
17        template<typename Ite>
18          void operator()( Ite i1, Ite i2 ) const
19            { cout << "文字数:" << i2 - i1 << endl
20                   << " 内容:" << string(i1,i2) << endl; }
21    };
22
23    template<typename ScannerT>
24      struct definition
25      {
26          typedef rule<ScannerT> rule_t;
27          rule_t r;
28          definition( const AddrParse& self )
29          {
30            // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';                                                                                                                            
31            r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()]; // >> +( '*' >> int_p );                                                                         
32          }
33          const rule_t& start() const { return r; }
34      };
35};
36
37std::vector < std::vector< std::string > > parse_csv(const char* filepath)
38{
39    std::vector< std::vector< std::string > > cells;
40    std::string line;
41    std::ifstream ifs(filepath);
42
43    while (std::getline(ifs, line)) {
44
45        std::vector< std::string > data;
46
47        boost::tokenizer< boost::escaped_list_separator< char > > tokens(line);
48        for (const std::string& token : tokens) {
49            data.push_back(token);
50        }
51
52        cells.push_back(data);
53    }
54
55    return cells;
56}
57
58#include <typeinfo>
59int main(int argc, char* argv[]){
60    using namespace std;
61
62    if(argc != 2){
63        cerr << "引数の数が間違っています." << endl;
64        cerr << "./spirit_file_read [INPUT_FILE_NAME]" << endl;
65        return 1;
66    }
67
68    ifstream ifs(argv[1], ios::in);
69    if(!ifs){
70        cerr << "Error: file not opened." << endl;
71        return 1;
72    }
73
74    string tmp;
75    string str;
76
77    int counter = 0;
78
79    const auto cells = parse_csv(argv[1]);
80    AddrParse parser;
81
82    for (const auto& rows : cells) {
83
84      for (const auto& cell : rows) {
85        parse_info<string::const_iterator> info =
86          parse( cell.begin(), cell.end(), parser );
87
88        if(info.full) {
89          cout << "line:" << counter << " " << cell << endl;
90          // push_vector(cell);                                                                                                                                                       
91        }
92      }
93
94      counter++;
95    }
96
97    ifs.close();
98    return 0;
99}

実行してみる。。。


$ g++ -o random_data random_data.cpp
$ ./random_data 3

$ cat random_data.txt 
"2019/07/02 02:02:00.839","2019/07/02 02:02:00","2019/07/02 02:02:00","841","*.178.167.132","25846","iy","*.215.218.225","51321","bu","8MP","VX13Gpdkt","drN","deETa","gyAY4gdZ","8","TJoPPQuOKvrxzAjCd11rpqqSqs","912","198","336","769","278","554","rand-pa1"
"2019/07/02 02:02:52.006","2019/07/02 02:02:52","2019/07/02 02:02:52","478","*.40.197.93","41214","ol","*.213.36.241","23907","Vu","OYF","H6zQlnN5X","yV3","P2VPw","9D3viFsS","5","cogAhSIycmvdYl7RaZNjGCsWqj","953","917","636","718","142","607","rand-pa1"
"2019/07/02 12:12:16.086","2019/07/02 12:12:16","2019/07/02 12:12:16","17","*.175.225.23","15918","Hk","*.156.36.246","8994","u1","Tt2","hihIKl4xd","OX5","uj8uP","hKwtE4iF","8","9EuzqcBTUrBjAago2vY5MMugKb","157","401","130","109","999","219","rand-pa1"

$ ./a.out random_data.txt 
文字数:15
 内容:255.178.167.132
line:0 255.178.167.132
文字数:14
 内容:69.215.218.225
line:0 69.215.218.225
文字数:13
 内容:104.40.197.93
line:1 104.40.197.93
文字数:13
 内容:76.213.36.241
line:1 76.213.36.241
文字数:14
 内容:120.175.225.23
line:2 120.175.225.23
文字数:13
 内容:55.156.36.246
line:2 55.156.36.246

(`ー´)b