Intel TBBのConcurrent Hashmapを使ってみる


Intel TBBは、並列処理を実装するためのライブラリである (Intelが提供している)

Concurrent HashMapは、Intel TBBコンテナのうちの1つである。(そのほかに、ベクターとキューもある)

↓このようにして使う。。


     7#include "tbb/concurrent_hash_map.h"
     8#include "tbb/blocked_range.h"
     9#include "tbb/parallel_for.h"
    10#include "tbb/tick_count.h"
    11#include "tbb/task_scheduler_init.h"
    12#include "tbb/concurrent_vector.h"
    13
    14using namespace std;
    15using namespace tbb;
    16
    17typedef tbb::concurrent_hash_map iTbb_addr_pair;
    18static iTbb_addr_pair Tbb_Addr_Pair;

インサーション


    69          iTbb_addr_pair::accessor t;
    70          Tbb_Addr_Pair.insert(t, src_ipAddr);
    71          t->second = dest_ipAddr;

格納した要素を表示


    80    counter = 0;
    81    for(auto itr = Tbb_Addr_Pair.begin(); itr != Tbb_Addr_Pair.end(); ++itr) {
    82      if(counter > 0)
    83        std::cout << counter << ":" << itr->first << "," << itr->second << std::endl;
    84      counter++;
    85    }

慣れてくると、利用はシンプルである。

例えば、↓のようなデータからIPアドレスのペア (71.153.59.69/149.38.245.191 - IPアドレスはランダムに生成)を格納したいとする。。。


"2019/07/02 00:00:48.033","2019/07/02 00:00:48","2019-07-02T00:00:48Z","841","149.38.245.191","25846","pU","71.153.59.69","51321","Yx","mU6","7gGd0vvjl","5pw","6KLBv","qQOS2G3d","8","nQTqV
mphosHwgZlYtVANbxyXO8","912","198","336","769","278","554","rand-pa1"
"2019/07/02 02:02:54.230","2019/07/02 02:02:54","2019-07-02T02:02:54Z","478","70.146.59.78","41214","Pq","77.21.128.75","23907","Xd","3bt","N1ADubtI0","iJq","XvZpV","TqaGYZOW","5","TCkH2EM
jrPpuVtUhZB3bEpuMpw","953","917","636","718","142","607","rand-pa1"

CSVファイルを読み込み、4列目と7列目の項目を map(srcIP, destIP) に格納することにする。。

コードを見てみる。。


     1#include 
     2#include 
     3#include 
     4#include 
     5#include 
     6
     7#include "tbb/concurrent_hash_map.h"
     8#include "tbb/blocked_range.h"
     9#include "tbb/parallel_for.h"
    10#include "tbb/tick_count.h"
    11#include "tbb/task_scheduler_init.h"
    12#include "tbb/concurrent_vector.h"
    13
    14using namespace std;
    15using namespace tbb;
    16
    17typedef tbb::concurrent_hash_map iTbb_addr_pair;
    18static iTbb_addr_pair Tbb_Addr_Pair;
    19
    20std::vector < std::vector< std::string > > parse_csv(const char* filepath)
    21{
    22    std::vector< std::vector< std::string > > cells;
    23    std::string line;
    24    std::ifstream ifs(filepath);
    25
    26    while (std::getline(ifs, line)) {
    27
    28        std::vector< std::string > data;
    29                                                                                                                                                            
    31        boost::tokenizer< boost::escaped_list_separator< char > > tokens(line);
    32        for (const std::string& token : tokens) {
    33            data.push_back(token);
    34        }
    35                                                                                                                                              
    37        cells.push_back(data);
    38    }
    39
    40    return cells;
    41}
    42
    43int main(int argc, char *argv[])
    44{
    45    int counter = 0;
    46
    47    std::string src_ipAddr;
    48    std::string dest_ipAddr;
    49
    50    const auto cells = parse_csv(argv[1]);
    51    for (const auto& rows : cells) {
    52
    53        counter = 0;
    54        for (const auto& cell : rows) {
    55          // std::cout << " " << std::endl;                                                                                                                           
    56
    57          if(counter == 4)
    58            {
    59              // std::cout << cell << std::endl;                                                                                                                                      
    60             src_ipAddr = string(cell);
    61            }
    62
    63          if(counter == 7)
    64            {
    65              // std::cout << cell << std::endl;                                                                                                                                      
    66             dest_ipAddr = string(cell);
    67            }
    68
    69          iTbb_addr_pair::accessor t;
    70          Tbb_Addr_Pair.insert(t, src_ipAddr);
    71          t->second = dest_ipAddr;
    72
    73          counter++;
    74
    75        }
    76                                                                                                                                               
    78    }
    79
    80    counter = 0;
    81    for(auto itr = Tbb_Addr_Pair.begin(); itr != Tbb_Addr_Pair.end(); ++itr) {
    82      if(counter > 0)
    83        std::cout << counter << ":" << itr->first << "," << itr->second << std::endl;
    84      counter++;
    85    }
    86
    87    return 0;
    88} 

実行してみる。。。


$ g++ tbb.cpp -ltbb

$ head -n 2 random_data.txt 
"2019/07/02 00:00:00.033","2019/07/02 00:00:00","2019-07-02T00:00:00Z","841","68.104.166.4","25846","hY","142.2.153.83","51321","ip","O0I","4s38T52FF","TUy","uuYOm","MSBa7NoD","8","67RgzNBbmggPJsN5p5J7YxQou6","912","198","336","769","278","554","rand-pa1"
"2019/07/02 00:00:00.043","2019/07/02 00:00:00","2019-07-02T00:00:00Z","478","24.65.11.145","41214","0p","40.193.169.129","23907","Pz","Vh8","D7i2u4FKG","mUX","7Eupl","ZjBDfZbs","5","NheJki5vi0XlwrOVS8MFE9vgQ2","953","917","636","718","142","607","rand-pa1"

$ ./a.out random_data.txt 
1:80.7.36.18,87.202.209.244
2:84.197.61.38,28.31.35.245
3:149.38.245.191,71.153.59.69
4:244.201.242.36,3.140.138.200
5:69.208.33.205,206.26.44.221
6:20.174.210.174,191.70.14.196
7:158.40.105.63,35.52.152.35
8:93.16.76.199,193.99.45.197
9:70.146.59.78,77.21.128.75
10:250.210.21.183,47.166.17.227

少しメモ:

STLのハッシュマップを並列処理に使う場合 -> この場合、STLのハッシュマップをmutex(ロック)でラップして、スレッドセーフに実装するのが吉だが、こうすると、コンテナー(ハッシュマップ)内部の並列化が行われないため、マルチコアが活きない模様。

一方で、Intel TBBのハッシュマップは「重い」ので、並列化による効果が順次パフォーマンスより大きい場合に、使うとよい。

詳しくは、↓を参照
https://www.oreilly.co.jp/books/9784873113555/
https://www.amazon.com/Intel-Threading-Building-Blocks-Parallelism/dp/0596514808

「したがって、追加した並列化がより遅い順次パフォーマンスよりも重要である場合に、高度なコンカレント・コンテナーを利用してください」
とある。(86ページ)

(`ー´)b