CUDA Thrustで時系列データのタイムスタンプでヒストグラムを作成する
CUDA ThrustはC++ STLのGPU版のようなもので、とても便利だ。しかもメチャクチャ速い。(`ー´)b
↓ このようなデータのタイムスタンプごとのヒストグラムを作成したいとする。
"2019/07/02 04:00:00.000","2019/07/02 04:04:04","2019-07-02T04:04:04Z","841","23.152.114.42","25846","9I","158.179.169.214","51321","ee","2hD","JFG2o2oas","MzG","MmWuH","ScsPLgdi","8","8Sd
dEBaWjIbLvBqnlX9j3LQN5I","912","198","336","769","278","554","rand-pa1"
"2019/07/02 04:00:00.000","2019/07/02 04:04:40","2019-07-02T04:04:40Z","478","248.99.6.15","41214","91","30.219.176.148","23907","xY","yJb","dguBRDbrb","N8i","fnSSU","uge0R9Ud","5","GkR6VA
Zr3vbshujEEFYYkSrMn7","953","917","636","718","142","607","rand-pa1"
Thrustでは、このように書くと。。
+c++
102 thrust::sort(key_in.begin(), key_in.end());
103
104 auto new_end = thrust::reduce_by_key(key_in.begin(),
105 key_in.end(),
106 value_in.begin(),
107 key_out.begin(),
108 value_out.begin());
+
↓ こういう感じで出力される。
20190702000000000,24236,
20190702010000000,15036,
20190702020000000,20739,
20190702030000000,18314,
2019/07/02の1時間ごとのヒストグラムになる。
サンプルはこちら↓
https://github.com/RuoAndo/qiita/tree/master/gpu/thrust/histogram
コードを見てみる。。。
1 #include <cublas_v2.h>
2 #include <string>
3 #include <cstring>
4 #include <cctype>
5 #include <cstdlib>
6 #include <cstdio>
7 #include <iostream>
8 #include <fstream>
9 #include <bitset>
10 #include <thrust/host_vector.h>
11 #include <thrust/device_vector.h>
12 #include <thrust/generate.h>
13 #include <thrust/reduce.h>
14 #include <thrust/functional.h>
15 #include <thrust/random.h>
16 #include <thrust/sequence.h>
17 #include <stdio.h>
18 #include <iostream>
19 /*
20 #include "Utilities.cuh"
21 #include "TimingGPU.cuh"
22 */
23 #include "csv.hpp"
24 using namespace std;
25 int main( int argc, char* argv[] )
26 {
27 int N = atoi(argv[2]);
28
29 thrust::host_vector<unsigned long long> h_vec_1(N);
30 thrust::host_vector<long> h_vec_2(N);
31 const string csv_file = std::string(argv[1]);
32 vector<vector<string>> data;
33 Csv objCsv(csv_file);
34 if (!objCsv.getCsv(data)) {
35 cout << "read ERROR" << endl;
36 return 1;
37 }
38 for (int row = 0; row < data.size(); row++) {
39 vector<string> rec = data[row];
40 std::string timestamp = rec[0];
41 std::string bytes = rec[3];
42 for(size_t c = timestamp.find_first_of("\""); c != string::npos; c = c = timestamp.find_first_of("\"")){
43 timestamp.erase(c,1);
44 }
45 for(size_t c = timestamp.find_first_of("\/"); c != string::npos; c = c = timestamp.find_first_of("\/")){
46 timestamp.erase(c,1);
47 }
48 for(size_t c = timestamp.find_first_of("\:"); c != string::npos; c = c = timestamp.find_first_of("\:")){
49 timestamp.erase(c,1);
50 }
51 for(size_t c = timestamp.find_first_of(" "); c != string::npos; c = c = timestamp.find_first_of(" ")){
52 timestamp.erase(c,1);
53 }
54 for(size_t c = timestamp.find_first_of("."); c != string::npos; c = c = timestamp.find_first_of(".")){
55 timestamp.erase(c,1);
56 }
57 for(size_t c = bytes.find_first_of("\""); c != string::npos; c = c = bytes.find_first_of("\"")){
58 bytes.erase(c,1);
59 }
60 /*
61 std::cout << timestamp << std::endl;
62 std::cout << bytes << std::endl;
63 */
64 // h_vec_1.push_back(std::stoull(timestamp.c_str()));
65 h_vec_1[row] = std::stoull(timestamp.c_str());
66 h_vec_2[row] = std::stol(bytes);
67 }
68 int in_size = N;
69 thrust::device_vector<unsigned long long> key_in(N);
70 thrust::device_vector<long> value_in(N);
71 /*
72 thrust::device_vector<unsigned long long> key_in(in_size) = h_vec_1;
73 thrust::device_vector<long> value_in(in_size) = h_vec_2;
74 */
75 thrust::copy(h_vec_1.begin(), h_vec_1.end(), key_in.begin());
76 thrust::copy(h_vec_2.begin(), h_vec_2.end(), value_in.begin());
77 thrust::device_vector<unsigned long long> key_out(in_size, 0);
78 thrust::device_vector<long> value_out(in_size, 0);
79 thrust::sort(key_in.begin(), key_in.end());
80 auto new_end = thrust::reduce_by_key(key_in.begin(),
81 key_in.end(),
82 value_in.begin(),
83 key_out.begin(),
84 value_out.begin());
85 long new_size = new_end.first - key_out.begin();
86
87 for(long i=0; i < new_size;i++)
88 {
89 std::cout << key_out[i] << "," << value_out[i] << "," << std::endl;
90 }
91 std::cout << std::endl;
92 return 0;
93 }
68行目までで、
h_vec_1に20190702000000000のような時刻データ
h_vec_2に24のような値が入っていることになる。(サンプルコード参照してください)
ここから、75-76行目でGPUに値を転送する。
68 int in_size = N;
69 thrust::device_vector<unsigned long long> key_in(N);
70 thrust::device_vector<long> value_in(N);
71 /*
72 thrust::device_vector<unsigned long long> key_in(in_size) = h_vec_1;
73 thrust::device_vector<long> value_in(in_size) = h_vec_2;
74 */
75 thrust::copy(h_vec_1.begin(), h_vec_1.end(), key_in.begin());
76 thrust::copy(h_vec_2.begin(), h_vec_2.end(), value_in.begin());
実行してみる。。。
# g++ random_data.cpp
# time ./a.out 1000
# ./build-gpu.sh 4
# ./4 random_data.txt 1000
20190702000000000,24236,
20190702010000000,15036,
20190702020000000,20739,
20190702030000000,18314,
20190702040000000,25253,
20190702050000000,26673,
20190702060000000,22629,
20190702070000000,21106,
20190702080000000,25918,
20190702090000000,20456,
20190702100000000,20267,
20190702110000000,21369,
20190702120000000,14971,
20190702130000000,24321,
20190702140000000,14919,
20190702150000000,15799,
20190702160000000,27511,
20190702170000000,14720,
20190702180000000,21536,
20190702190000000,17116,
20190702200000000,16300,
20190702210000000,20417,
20190702220000000,22010,
20190702230000000,17901,
# time ./a.out 1000
# ./4 random_data.txt 1000
20190702000000000,22897,
20190702010000000,14236,
20190702020000000,25371,
20190702030000000,16396,
20190702040000000,16508,
20190702050000000,19085,
20190702060000000,22892,
20190702070000000,17482,
20190702080000000,26165,
20190702090000000,22288,
20190702100000000,15953,
20190702110000000,22415,
20190702120000000,22263,
20190702130000000,15309,
20190702140000000,17042,
20190702150000000,25743,
20190702160000000,28322,
20190702170000000,23870,
20190702180000000,24243,
20190702190000000,20595,
20190702200000000,21645,
20190702210000000,12285,
20190702220000000,18816,
20190702230000000,17696,
(`ー´)b
Author And Source
この問題について(CUDA Thrustで時系列データのタイムスタンプでヒストグラムを作成する), 我々は、より多くの情報をここで見つけました https://qiita.com/Ruo_Ando/items/51613dc5da856f6861ba著者帰属:元の著者の情報は、元のURLに含まれています。著作権は原作者に属する。
Content is automatically searched and collected through network algorithms . If there is a violation . Please contact us . We will adjust (correct author information ,or delete content ) as soon as possible .