Skip to content

Commit 0f948b0

Browse files
committed
add benchmark for SOfieReader
1 parent c6c6f5a commit 0f948b0

File tree

2 files changed

+189
-0
lines changed

2 files changed

+189
-0
lines changed

root/tmva/sofie/CMakeLists.txt

+8
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,14 @@ RB_ADD_GBENCHMARK(RDF_SOFIE_Inference
184184

185185
add_dependencies(RDF_SOFIE_Inference SofieCompileModels)
186186

187+
RB_ADD_GBENCHMARK(SOFIEInference_Reader
188+
SOFIEInference_Reader.cxx
189+
LABEL short
190+
LIBRARIES Core Cling TMVA ROOTTMVASofie ${SOFIE_BLAS_LIBS}
191+
)
192+
193+
add_dependencies(SOFIEInference_Reader SofieCompileModels)
194+
187195
#
188196
# add optimization flags for best performances (factor 3 on simple Conv1 test)
189197
#
+181
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
// Author: Federico Sossai (fsossai), 2021
2+
3+
#include <benchmark/benchmark.h>
4+
5+
#include <fstream>
6+
#include <thread>
7+
#include <chrono>
8+
#include <utility>
9+
#include <vector>
10+
#include <memory>
11+
#include <functional>
12+
#include <random>
13+
14+
#include "TMVA/RSofieReader.hxx"
15+
16+
#include "TMath.h"
17+
18+
19+
using namespace std;
20+
bool verbose = false;
21+
bool testOutput = true;
22+
23+
24+
void BM_SOFIE_Inference(benchmark::State &state, std::string model_file)
25+
{
26+
std::string model_path = "input_models/" + model_file;
27+
size_t inputSize = state.range(0); // input size (without batch size)
28+
size_t bsize = (state.range(1) > 0) ? state.range(1) : 1;
29+
size_t nevts = 64;
30+
size_t nrep = nevts / bsize;
31+
32+
vector<float> input(inputSize*nevts);
33+
34+
if (testOutput) {
35+
input = std::vector<float>(input.size(),1.);
36+
}
37+
else {
38+
static std::uniform_real_distribution<float> distribution(-1, 1);
39+
static std::default_random_engine generator;
40+
std::generate(input.begin(), input.end(), []() { return distribution(generator); });
41+
}
42+
float *input_ptr = input.data();
43+
44+
45+
// parse the model
46+
TMVA::Experimental::RSofieReader r(model_path);
47+
48+
double totDuration = 0;
49+
int ntimes = 0;
50+
std::vector<float> yOut;
51+
bool first = true;
52+
bool doWrite = testOutput;
53+
for (auto _ : state) {
54+
auto t1 = std::chrono::high_resolution_clock::now();
55+
for (int i = 0; i < nevts; i += bsize) {
56+
std::vector<float> x(input.begin()+inputSize*i, input.begin()+inputSize*(i+1));
57+
auto y = r.Compute(x);
58+
if (first) {
59+
//std::cout << std::string(typeid(s).name()) << " : " << y[0] << " " << y[1] << std::endl;
60+
yOut = y;
61+
first = false;
62+
}
63+
}
64+
auto t2 = std::chrono::high_resolution_clock::now();
65+
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
66+
totDuration += duration / 1.E3; // in milliseconds
67+
ntimes++;
68+
if (doWrite) {
69+
// write output for test
70+
//std::cout << "write output " << std::endl;
71+
std::ofstream f;
72+
std::string filename = std::string(model_file) + ".out";
73+
f.open(filename);
74+
f << yOut.size();
75+
for (size_t i = 0; i < yOut.size(); i++) {
76+
if ((i % 10) == 0) f << "\n"; // add endline every 10
77+
f << yOut[i] << " ";
78+
}
79+
f << std::endl;
80+
f.close();
81+
doWrite = false;
82+
}
83+
}
84+
85+
state.counters["time/evt(ms)"] = totDuration / double(ntimes * nevts);
86+
// input[0] = -999;
87+
// s.inf
88+
// std::cout << "number of times " << s.itime << std::endl;
89+
// int n = s.itime - 1;
90+
// for (size_t i = 0; i < 5; ++i) {
91+
// double mean = TMath::Mean(n, resTimes[i].data());
92+
// double rms = TMath::RMS(n, resfTimes[i].data());
93+
// std::cout << "elapsed time for " << i << " : " << mean << " +/- " << rms / sqrt(n) << std::endl;
94+
// }
95+
//if (verbose) std::cout << "output : " << output.size() << " : " << output.front() << " ......" << output.back() << std::endl;
96+
}
97+
#if 0
98+
// inference for model with 3 inputs
99+
template <class S>
100+
void BM_SOFIE_Inference_3(benchmark::State &state)
101+
{
102+
size_t bsize = state.range(0); // batch size
103+
size_t inputSize1 = state.range(1); // input 1 size
104+
size_t inputSize2 = state.range(2); // input 2 size
105+
size_t inputSize3 = state.range(3);
106+
107+
size_t nevts = 64;
108+
size_t nrep = nevts / bsize;
109+
110+
size_t eventSize = inputSize1 + inputSize2+inputSize3;
111+
112+
vector<float> input1(inputSize1*nevts);
113+
vector<float> input2(inputSize2*nevts);
114+
vector<float> input3(inputSize3*nevts);
115+
116+
if (!testOutput) {
117+
static std::uniform_real_distribution<float> distribution(-1, 1);
118+
static std::default_random_engine generator;
119+
std::generate(input1.begin(), input1.end(), []() { return distribution(generator); });
120+
std::generate(input2.begin(), input2.end(), []() { return distribution(generator); });
121+
std::generate(input3.begin(), input3.end(), []() { return distribution(generator); });
122+
}
123+
else {
124+
// generate fixed data
125+
input1 = vector<float>(input1.size(),1.);
126+
input2 = vector<float>(input2.size(),2.);
127+
input3 = vector<float>(input3.size(),3.);
128+
}
129+
130+
S s("");
131+
132+
//std::cout << "init done - do benchmark \n";
133+
134+
double totDuration = 0;
135+
int ntimes = 0;
136+
for (auto _ : state) {
137+
auto t1 = std::chrono::high_resolution_clock::now();
138+
for (int i = 0; i < nevts; i += bsize) {
139+
float * p1 = input1.data()+ inputSize1*i;
140+
float * p2 = input2.data()+ inputSize2*i;
141+
float * p3 = input3.data()+ inputSize3*i;
142+
auto y = s.infer(p1,p2,p3);
143+
}
144+
auto t2 = std::chrono::high_resolution_clock::now();
145+
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
146+
totDuration += duration / 1.E3; // in milliseconds
147+
ntimes++;
148+
}
149+
150+
state.counters["time/evt(ms)"] = totDuration / double(ntimes * nevts);
151+
}
152+
#endif
153+
154+
BENCHMARK_CAPTURE(BM_SOFIE_Inference,higgs_model_dense,"higgs_model_dense.onnx")->Args({7, 1})->Unit(benchmark::kMillisecond);
155+
BENCHMARK_CAPTURE(BM_SOFIE_Inference,Conv2DTranspose_Relu_Sigmoid,"Conv2DTranspose_Relu_Sigmoid.onnx")->Args({15,1})->Unit(benchmark::kMillisecond);
156+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, ConvTrans2dModel_B1,"ConvTrans2dModel_B1.onnx")->Args({4*4*4,1})->Unit(benchmark::kMillisecond);
157+
158+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, SimpleNN_Alice,"SimpleNN_Alice.onnx")->Args({16,1})->Unit(benchmark::kMillisecond);
159+
160+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, Linear_16,"Linear_16.onnx")->Args({100, 16})->Unit(benchmark::kMillisecond);
161+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, Linear_32,"Linear_32.onnx")->Args({100, 32})->Unit(benchmark::kMillisecond);
162+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, Linear_64,"Linear_64.onnx")->Args({100, 64})->Unit(benchmark::kMillisecond);
163+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, Linear_event,"Linear_event.onnx")->Args({100, 1})->Unit(benchmark::kMillisecond);
164+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, Generator_B1,"Generator_B1.onnx")->Args({14, 1})->Unit(benchmark::kMillisecond);
165+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, Generator_B64,"Generator_B64.onnx")->Args({14, 64})->Unit(benchmark::kMillisecond);
166+
167+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, Conv_d100_L14_B1,"Conv_d100_L14_B1.onnx")->Args({100*100, 1})->Unit(benchmark::kMillisecond);
168+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, Conv_d100_L14_B32,"Conv_d100_L14_B32.onnx")->Args({100*100, 32})->Unit(benchmark::kMillisecond);
169+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, Conv_d100_L1_B1,"Conv_d100_L1_B1.onnx")->Args({100*100, 1})->Unit(benchmark::kMillisecond);
170+
171+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, Conv3d_d32_L4_B1,"Conv3d_d32_L4_B1.onnx")->Args({32*32*32, 1})->Unit(benchmark::kMillisecond);
172+
173+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, resnet18v1,"resnet18v1.onnx")->Args({3 * 224 * 224, 1})->Unit(benchmark::kMillisecond);
174+
175+
//Recurrent benchmark
176+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, RNN_d10_L20_h8_B1,"RNN_d10_L20_h8_B1.onnx")->Args({3 * 5, 1})->Unit(benchmark::kMillisecond);
177+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, GRU_d10_L20_h8_B1,"GRU_d10_L20_h8_B1.onnx")->Args({3 * 5, 1})->Unit(benchmark::kMillisecond);
178+
BENCHMARK_CAPTURE(BM_SOFIE_Inference, LSTM_d10_L20_h8_B1,"LSTM_d10_L20_h8_B1.onnx")->Args({1 * 1, 1})->Unit(benchmark::kMillisecond);
179+
180+
181+
BENCHMARK_MAIN();

0 commit comments

Comments
 (0)