|
| 1 | +// Author: Federico Sossai (fsossai), 2021 |
| 2 | + |
| 3 | +#include <benchmark/benchmark.h> |
| 4 | + |
| 5 | +#include <fstream> |
| 6 | +#include <thread> |
| 7 | +#include <chrono> |
| 8 | +#include <utility> |
| 9 | +#include <vector> |
| 10 | +#include <memory> |
| 11 | +#include <functional> |
| 12 | +#include <random> |
| 13 | + |
| 14 | +#include "TMVA/RSofieReader.hxx" |
| 15 | + |
| 16 | +#include "TMath.h" |
| 17 | + |
| 18 | + |
| 19 | +using namespace std; |
| 20 | +bool verbose = false; |
| 21 | +bool testOutput = true; |
| 22 | + |
| 23 | + |
| 24 | +void BM_SOFIE_Inference(benchmark::State &state, std::string model_file) |
| 25 | +{ |
| 26 | + std::string model_path = "input_models/" + model_file; |
| 27 | + size_t inputSize = state.range(0); // input size (without batch size) |
| 28 | + size_t bsize = (state.range(1) > 0) ? state.range(1) : 1; |
| 29 | + size_t nevts = 64; |
| 30 | + size_t nrep = nevts / bsize; |
| 31 | + |
| 32 | + vector<float> input(inputSize*nevts); |
| 33 | + |
| 34 | + if (testOutput) { |
| 35 | + input = std::vector<float>(input.size(),1.); |
| 36 | + } |
| 37 | + else { |
| 38 | + static std::uniform_real_distribution<float> distribution(-1, 1); |
| 39 | + static std::default_random_engine generator; |
| 40 | + std::generate(input.begin(), input.end(), []() { return distribution(generator); }); |
| 41 | + } |
| 42 | + float *input_ptr = input.data(); |
| 43 | + |
| 44 | + |
| 45 | + // parse the model |
| 46 | + TMVA::Experimental::RSofieReader r(model_path); |
| 47 | + |
| 48 | + double totDuration = 0; |
| 49 | + int ntimes = 0; |
| 50 | + std::vector<float> yOut; |
| 51 | + bool first = true; |
| 52 | + bool doWrite = testOutput; |
| 53 | + for (auto _ : state) { |
| 54 | + auto t1 = std::chrono::high_resolution_clock::now(); |
| 55 | + for (int i = 0; i < nevts; i += bsize) { |
| 56 | + std::vector<float> x(input.begin()+inputSize*i, input.begin()+inputSize*(i+1)); |
| 57 | + auto y = r.Compute(x); |
| 58 | + if (first) { |
| 59 | + //std::cout << std::string(typeid(s).name()) << " : " << y[0] << " " << y[1] << std::endl; |
| 60 | + yOut = y; |
| 61 | + first = false; |
| 62 | + } |
| 63 | + } |
| 64 | + auto t2 = std::chrono::high_resolution_clock::now(); |
| 65 | + auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count(); |
| 66 | + totDuration += duration / 1.E3; // in milliseconds |
| 67 | + ntimes++; |
| 68 | + if (doWrite) { |
| 69 | + // write output for test |
| 70 | + //std::cout << "write output " << std::endl; |
| 71 | + std::ofstream f; |
| 72 | + std::string filename = std::string(model_file) + ".out"; |
| 73 | + f.open(filename); |
| 74 | + f << yOut.size(); |
| 75 | + for (size_t i = 0; i < yOut.size(); i++) { |
| 76 | + if ((i % 10) == 0) f << "\n"; // add endline every 10 |
| 77 | + f << yOut[i] << " "; |
| 78 | + } |
| 79 | + f << std::endl; |
| 80 | + f.close(); |
| 81 | + doWrite = false; |
| 82 | + } |
| 83 | + } |
| 84 | + |
| 85 | + state.counters["time/evt(ms)"] = totDuration / double(ntimes * nevts); |
| 86 | + // input[0] = -999; |
| 87 | + // s.inf |
| 88 | + // std::cout << "number of times " << s.itime << std::endl; |
| 89 | + // int n = s.itime - 1; |
| 90 | + // for (size_t i = 0; i < 5; ++i) { |
| 91 | + // double mean = TMath::Mean(n, resTimes[i].data()); |
| 92 | + // double rms = TMath::RMS(n, resfTimes[i].data()); |
| 93 | + // std::cout << "elapsed time for " << i << " : " << mean << " +/- " << rms / sqrt(n) << std::endl; |
| 94 | + // } |
| 95 | + //if (verbose) std::cout << "output : " << output.size() << " : " << output.front() << " ......" << output.back() << std::endl; |
| 96 | +} |
| 97 | +#if 0 |
| 98 | +// inference for model with 3 inputs |
| 99 | +template <class S> |
| 100 | +void BM_SOFIE_Inference_3(benchmark::State &state) |
| 101 | +{ |
| 102 | + size_t bsize = state.range(0); // batch size |
| 103 | + size_t inputSize1 = state.range(1); // input 1 size |
| 104 | + size_t inputSize2 = state.range(2); // input 2 size |
| 105 | + size_t inputSize3 = state.range(3); |
| 106 | + |
| 107 | + size_t nevts = 64; |
| 108 | + size_t nrep = nevts / bsize; |
| 109 | + |
| 110 | + size_t eventSize = inputSize1 + inputSize2+inputSize3; |
| 111 | + |
| 112 | + vector<float> input1(inputSize1*nevts); |
| 113 | + vector<float> input2(inputSize2*nevts); |
| 114 | + vector<float> input3(inputSize3*nevts); |
| 115 | + |
| 116 | + if (!testOutput) { |
| 117 | + static std::uniform_real_distribution<float> distribution(-1, 1); |
| 118 | + static std::default_random_engine generator; |
| 119 | + std::generate(input1.begin(), input1.end(), []() { return distribution(generator); }); |
| 120 | + std::generate(input2.begin(), input2.end(), []() { return distribution(generator); }); |
| 121 | + std::generate(input3.begin(), input3.end(), []() { return distribution(generator); }); |
| 122 | + } |
| 123 | + else { |
| 124 | + // generate fixed data |
| 125 | + input1 = vector<float>(input1.size(),1.); |
| 126 | + input2 = vector<float>(input2.size(),2.); |
| 127 | + input3 = vector<float>(input3.size(),3.); |
| 128 | + } |
| 129 | + |
| 130 | + S s(""); |
| 131 | + |
| 132 | + //std::cout << "init done - do benchmark \n"; |
| 133 | + |
| 134 | + double totDuration = 0; |
| 135 | + int ntimes = 0; |
| 136 | + for (auto _ : state) { |
| 137 | + auto t1 = std::chrono::high_resolution_clock::now(); |
| 138 | + for (int i = 0; i < nevts; i += bsize) { |
| 139 | + float * p1 = input1.data()+ inputSize1*i; |
| 140 | + float * p2 = input2.data()+ inputSize2*i; |
| 141 | + float * p3 = input3.data()+ inputSize3*i; |
| 142 | + auto y = s.infer(p1,p2,p3); |
| 143 | + } |
| 144 | + auto t2 = std::chrono::high_resolution_clock::now(); |
| 145 | + auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count(); |
| 146 | + totDuration += duration / 1.E3; // in milliseconds |
| 147 | + ntimes++; |
| 148 | + } |
| 149 | + |
| 150 | + state.counters["time/evt(ms)"] = totDuration / double(ntimes * nevts); |
| 151 | +} |
| 152 | +#endif |
| 153 | + |
| 154 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference,higgs_model_dense,"higgs_model_dense.onnx")->Args({7, 1})->Unit(benchmark::kMillisecond); |
| 155 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference,Conv2DTranspose_Relu_Sigmoid,"Conv2DTranspose_Relu_Sigmoid.onnx")->Args({15,1})->Unit(benchmark::kMillisecond); |
| 156 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, ConvTrans2dModel_B1,"ConvTrans2dModel_B1.onnx")->Args({4*4*4,1})->Unit(benchmark::kMillisecond); |
| 157 | + |
| 158 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, SimpleNN_Alice,"SimpleNN_Alice.onnx")->Args({16,1})->Unit(benchmark::kMillisecond); |
| 159 | + |
| 160 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, Linear_16,"Linear_16.onnx")->Args({100, 16})->Unit(benchmark::kMillisecond); |
| 161 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, Linear_32,"Linear_32.onnx")->Args({100, 32})->Unit(benchmark::kMillisecond); |
| 162 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, Linear_64,"Linear_64.onnx")->Args({100, 64})->Unit(benchmark::kMillisecond); |
| 163 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, Linear_event,"Linear_event.onnx")->Args({100, 1})->Unit(benchmark::kMillisecond); |
| 164 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, Generator_B1,"Generator_B1.onnx")->Args({14, 1})->Unit(benchmark::kMillisecond); |
| 165 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, Generator_B64,"Generator_B64.onnx")->Args({14, 64})->Unit(benchmark::kMillisecond); |
| 166 | + |
| 167 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, Conv_d100_L14_B1,"Conv_d100_L14_B1.onnx")->Args({100*100, 1})->Unit(benchmark::kMillisecond); |
| 168 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, Conv_d100_L14_B32,"Conv_d100_L14_B32.onnx")->Args({100*100, 32})->Unit(benchmark::kMillisecond); |
| 169 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, Conv_d100_L1_B1,"Conv_d100_L1_B1.onnx")->Args({100*100, 1})->Unit(benchmark::kMillisecond); |
| 170 | + |
| 171 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, Conv3d_d32_L4_B1,"Conv3d_d32_L4_B1.onnx")->Args({32*32*32, 1})->Unit(benchmark::kMillisecond); |
| 172 | + |
| 173 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, resnet18v1,"resnet18v1.onnx")->Args({3 * 224 * 224, 1})->Unit(benchmark::kMillisecond); |
| 174 | + |
| 175 | +//Recurrent benchmark |
| 176 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, RNN_d10_L20_h8_B1,"RNN_d10_L20_h8_B1.onnx")->Args({3 * 5, 1})->Unit(benchmark::kMillisecond); |
| 177 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, GRU_d10_L20_h8_B1,"GRU_d10_L20_h8_B1.onnx")->Args({3 * 5, 1})->Unit(benchmark::kMillisecond); |
| 178 | +BENCHMARK_CAPTURE(BM_SOFIE_Inference, LSTM_d10_L20_h8_B1,"LSTM_d10_L20_h8_B1.onnx")->Args({1 * 1, 1})->Unit(benchmark::kMillisecond); |
| 179 | + |
| 180 | + |
| 181 | +BENCHMARK_MAIN(); |
0 commit comments