Boost Spiritを使ってCSVファイルからIPv4アドレスを取り出す


Boost Spiritを使ってCSVファイルからIPv4アドレスを切り出す方法を考えてみる。

ルールを書く

11struct MyGrammar : grammar<MyGrammar>
12{
13    template<typename ScannerT>
14      struct definition
15      {
16          typedef rule<ScannerT> rule_t;
17          rule_t r;
18
19          definition( const MyGrammar& )
20          {    
21            r = int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p; ; // >> +( '*' >> int_p );                                                                                       
22          }
23
24          const rule_t& start() const { return r; }
25      };
26};

IPアドレス X.X.X.Xを21行目のようにする

+c++
21 r = int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p; ; // >> +( '*' >> int_p );
+

テスト用のログファイルを生成する。

$ g++ -o random_data random_data.cpp
$ ./random_data 3
$ cat random_data.txt
"2019/07/02 08:08:32.547","2019/07/02 08:08:32","2019/07/02 08:08:32","841","..208.171","25846","bn","..12.224","51321","w7","oFD","tBJ3eQQSb","REi","BPo3a","SEhsNFI4","8","VlMhtG15LwnbVVc4TqijRLwLwg","912","198","336","769","278","554","rand-pa1"
"2019/07/02 13:13:51.113","2019/07/02 13:13:51","2019/07/02 13:13:51","478","..190.69","41214","oY","..2.229","23907","LU","Pe4","u3iHsabCn","ryI","Bc9J3","DTx4mO4a","5","ozmapPf9uJUusozrYoTIkfp1m3","953","917","636","718","142","607","rand-pa1"
"2019/07/02 16:16:12.140","2019/07/02 16:16:12","2019/07/02 16:16:12","17","..103.219","15918","Oc","..17.162","8994","zj","JuF","Xl2D2U7M8","0TZ","xZ0IU","cZKwmW0Z","8","pzfXONA0IoxjDiduGFHg59TLvI","157","401","130","109","999","219","rand-pa1"

コードを見てみる。。

 1#include <iostream>
 2#include <fstream>
 3#include <sstream>
 4#include <string>
 5#include <boost/spirit.hpp>
 6#include <boost/tokenizer.hpp>
 7
 8using namespace std;
 9using namespace boost::spirit;
10
11struct MyGrammar : grammar<MyGrammar>
12{
13    template<typename ScannerT>
14      struct definition
15      {
16          typedef rule<ScannerT> rule_t;
17          rule_t r;
18
19          definition( const MyGrammar& )
20          {
21            r = int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p; ; // >> +( '*' >> int_p );                                                                                       
22          }
23
24          const rule_t& start() const { return r; }
25      };
26};
27
28std::vector < std::vector< std::string > > parse_csv(const char* filepath)
29{
30    std::vector< std::vector< std::string > > cells;
31    std::string line;
32    std::ifstream ifs(filepath);
33
34    while (std::getline(ifs, line)) {
35
36        std::vector< std::string > data;
37
38        boost::tokenizer< boost::escaped_list_separator< char > > tokens(line);
39        for (const std::string& token : tokens) {
40            data.push_back(token);
41        }
42
43        cells.push_back(data);
44    }
45
46    return cells;
47}
48 
49#include <typeinfo>
50int main(int argc, char* argv[]){
51    using namespace std;
52
53    if(argc != 2){
54        cerr << "引数の数が間違っています." << endl;
55        cerr << "./spirit_file_read [INPUT_FILE_NAME]" << endl;
56        return 1;
57    }
58
59    ifstream ifs(argv[1], ios::in);
60    if(!ifs){
61        cerr << "Error: file not opened." << endl;
62        return 1;
63    }
64
65    string tmp;
66    string str;
67
68    int counter = 0;
69
70    const auto cells = parse_csv(argv[1]);
71    MyGrammar parser;
72
73    for (const auto& rows : cells) {
74
75      for (const auto& cell : rows) {
76        parse_info<string::const_iterator> info =
77          parse( cell.begin(), cell.end(), parser );
78
79        if(info.full) {
80          cout << "line:" << counter << " " << cell << endl;
81        }
82      }
83
84      counter++;
85    }
86
87    ifs.close();
88    return 0;
89}

実行してみる。。。

$ g++ -o random_data random_data.cpp
$ ./random_data 3
$ g++ -o ipaddress ipaddress.cpp -lboost_system
In file included from ipaddress.cpp:5:0:
/usr/include/boost/spirit.hpp:18:4: warning: #warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp" [-Wcpp]
# warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp"
^~~~~~
$ ./ipaddress random_data.txt
line:0 *.178.167.132
line:0 *.215.218.225
line:1 *.40.197.93
line:1 *.213.36.241
line:2 *.175.225.23
line:2 *.156.36.246

(`ー´)b