Boost Semantic Actionからマルチマップ(multimap)に値を格納する


下記のようなログから <timestamp, sourceIP, destinationIP>のようなデータ列を作りたいとする。。
「タプルの列」が思い浮かんだ。

 1"2019/07/02 00:00:48.073","2019/07/02 00:00:48","2019/07/02 00:00:48","841","32.249.185.200","25846","Uu","244.210.2.143","51321","V4","K6i","d9ecn1Blv","2lH","Jlx6g","Bo93lPJx","8"\
  ,"lAp3mgEhJPaOxnVYzaQhG6jo7E","912","198","336","769","278","554","rand-pa1"
 2"2019/07/02 03:03:24.826","2019/07/02 03:03:24","2019/07/02 03:03:24","478","80.115.211.4","41214","ov","34.252.129.206","23907","er","dOG","NyhxNslii","tKa","QXniz","uKXh6OT7","5",\
  "cFoCLZMoDrKQSnhgJap936q1Nl","953","917","636","718","142","607","rand-pa1"

*IPアドレスはランダムに生成

タプルについて調べる。。

「標準ライブラリには、単純に2つの型を格納するために定義されたpairというクラステンプレートがあります。」
「これは主に、.... 「関数から2つの値を返したい」といった場面で使われます。」
Boost C++ Libraries 稲葉一浩 秀和システム

tupleはあくまでpairの拡張であって、列(std::mapのような使い方)にはならないらしい。。

そのため、multimapを使って、<timestamp, sourceIP> , <timestamp, destinationIP>をいうペアを作ることにした。

セマンティックアクション部を見てみる。

12static int line_counter = 0;
13std::multimap<int, std::string> m;
14
15struct AddrParse : grammar<AddrParse>
16{
17    struct MyAction
18    {
19        template<typename Ite>    
20          void operator()( Ite i1, Ite i2 ) const
21            { cout << "文字数:" << i2 - i1 << endl
22                   << " 内容:" << string(i1,i2) << endl;
23              m.insert(std::make_pair(line_counter, string(i1,i2)));
24            }
25    };
26
27    template<typename ScannerT>
28      struct definition
29      {
30          typedef rule<ScannerT> rule_t;
31          rule_t r;
32          definition( const AddrParse& self )
33          {
34            // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';                                                                                                                            
35            r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()]; // >> +( '*' >> int_p );                                                                         
36          }
37          const rule_t& start() const { return r; }
38      };
39};

23行目で、パースした (INT).(INT).(INT).(INT)の文字列をmultimapに代入する。

+c++
21 v.push_back(string(i1,i2));
33 r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()]; // >> +( '*' >> int_p );
+

少し長いが、コードを見てみる。。。

 1#include <iostream>
 2#include <fstream>
 3#include <sstream>
 4#include <string>
 5#include <map>
 6#include <boost/spirit.hpp>
 7#include <boost/tokenizer.hpp>
 8
 9using namespace std;
10using namespace boost::spirit;
11
12static int line_counter = 0;
13std::multimap<int, std::string> m;
14
15struct AddrParse : grammar<AddrParse>
16{
17    struct MyAction
18    {
19        template<typename Ite>    20          void operator()( Ite i1, Ite i2 ) const
21            { cout << "文字数:" << i2 - i1 << endl
22                   << " 内容:" << string(i1,i2) << endl;
23              m.insert(std::make_pair(line_counter, string(i1,i2)));
24            }
25    };
26
27    template<typename ScannerT>
28      struct definition
29      {
30          typedef rule<ScannerT> rule_t;
31          rule_t r;
32          definition( const AddrParse& self )
33          {
34            // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';                                                                                                                            
35            r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()]; // >> +( '*' >> int_p );                                                                         
36          }
37          const rule_t& start() const { return r; }
38      };
39};
40
41std::vector < std::vector< std::string > > parse_csv(const char* filepath)
42{
43    std::vector< std::vector< std::string > > cells;
44    std::string line;
45    std::ifstream ifs(filepath);
46
47    while (std::getline(ifs, line)) {
48
49        std::vector< std::string > data;
50
51        boost::tokenizer< boost::escaped_list_separator< char > > tokens(line);
52        for (const std::string& token : tokens) {
53            data.push_back(token);
54        }
55        cells.push_back(data);
56    }
57
58    return cells;
59}
60 
61#include <typeinfo>
62int main(int argc, char* argv[]){
63    using namespace std;
64
65    if(argc != 2){
66        cerr << "引数の数が間違っています." << endl;
67        cerr << "./spirit_file_read [INPUT_FILE_NAME]" << endl;
68        return 1;
69    }
70
71    ifstream ifs(argv[1], ios::in);
72    if(!ifs){
73        cerr << "Error: file not opened." << endl;
74        return 1;
75    }
76
77    string tmp;
78    string str;
79
80    int counter = 0;
81
82    const auto cells = parse_csv(argv[1]);
83    AddrParse parser;
84    85    for (const auto& rows : cells) {
86
87      for (const auto& cell : rows) {
88        parse_info<string::const_iterator> info =
89          parse( cell.begin(), cell.end(), parser );
90
91        if(info.full) {
92          cout << "line:" << counter << " " << cell << endl;
93          // push_vector(cell);                                                                                                                                                       
94        }
95      }
96      line_counter++;
97      counter++;
98    }

   100    ifs.close();
   102    cout << "Displaying elements..." << endl;
   103
   104    auto begin = m.begin(), end = m.end();
   105    for (auto iter = begin; iter != end; iter++) {
   106      cout << "key = " << iter->first << "\n";
   107      cout << "value = " << iter->second << "\n";
   108    }
   109
   110    /*                                                                                                                                                                                
   111    for (auto& x:m) {                                                                                                                                                                 
   112      std::cout << x.first << " => " << x.second << std::endl;                                                                                                                        
   113    }                                                                                                                                                                                 
   114    */
   115
   116    return 0;
   117}

実行してみる(IPアドレスの値はランダムに生成)


$ g++ -o random_data random_data.cpp
$ ./random_data 3

$ cat random_data.txt 
"2019/07/02 02:02:00.839","2019/07/02 02:02:00","2019/07/02 02:02:00","841",".178.167.132","25846","iy",".215.218.225","51321","bu","8MP","VX13Gpdkt","drN","deETa","gyAY4gdZ","8","TJoPPQuOKvrxzAjCd11rpqqSqs","912","198","336","769","278","554","rand-pa1"
"2019/07/02 02:02:52.006","2019/07/02 02:02:52","2019/07/02 02:02:52","478",".40.197.93","41214","ol",".213.36.241","23907","Vu","OYF","H6zQlnN5X","yV3","P2VPw","9D3viFsS","5","cogAhSIycmvdYl7RaZNjGCsWqj","953","917","636","718","142","607","rand-pa1"
"2019/07/02 12:12:16.086","2019/07/02 12:12:16","2019/07/02 12:12:16","17",".175.225.23","15918","Hk",".156.36.246","8994","u1","Tt2","hihIKl4xd","OX5","uj8uP","hKwtE4iF","8","9EuzqcBTUrBjAago2vY5MMugKb","157","401","130","109","999","219","rand-pa1"

$ g++ ipaddress5.cpp -o ipaddress5 -lboost_system
In file included from ipaddress5.cpp:6:0:
/usr/include/boost/spirit.hpp:18:4: warning: #warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp" [-Wcpp]
 #  warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp"
    ^~~~~~~
$ ./ipaddress5 random_data.txt 
文字数:15
 内容:255.178.167.132
line:0 255.178.167.132
文字数:14
 内容:69.215.218.225
line:0 69.215.218.225
文字数:13
 内容:104.40.197.93
line:1 104.40.197.93
文字数:13
 内容:76.213.36.241
line:1 76.213.36.241
文字数:14
 内容:120.175.225.23
line:2 120.175.225.23
文字数:13
 内容:55.156.36.246
line:2 55.156.36.246
Displaying elements...
key = 0
value = 255.178.167.132
key = 0
value = 69.215.218.225
key = 1
value = 104.40.197.93
key = 1
value = 76.213.36.241
key = 2
value = 120.175.225.23
key = 2
value = 55.156.36.246

↑で、keyは行番号である。
(`ー´)b