From cdf415f0d2cc8e84337975dc49b03ead377b8d5e Mon Sep 17 00:00:00 2001
From: Abhigyan Acherjee <abhiacherjee@Abhigyans-MacBook-Air.local>
Date: Tue, 12 Dec 2023 17:34:34 +0530
Subject: [PATCH] BenchMarking for Codegen and CodegenNoGrad()

---
 root/CMakeLists.txt                           |  4 ++--
 .../roofit/RooFitUnBinnedBenchmarks.cxx       | 12 +++++++++++
 root/roofit/roofit/benchRooFitBackends.cxx    | 20 ++++++++++++++++---
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/root/CMakeLists.txt b/root/CMakeLists.txt
index a25eb8f3..27aa635c 100644
--- a/root/CMakeLists.txt
+++ b/root/CMakeLists.txt
@@ -1,10 +1,10 @@
 add_subdirectory(interpreter)
-add_subdirectory(io)
+#add_subdirectory(io)
 add_subdirectory(hist)
 add_subdirectory(math)
 add_subdirectory(pyroot)
 if (roofit)
   add_subdirectory(roofit)
 endif()
-add_subdirectory(tree)
+#add_subdirectory(tree)
 add_subdirectory(tmva)
diff --git a/root/roofit/roofit/RooFitUnBinnedBenchmarks.cxx b/root/roofit/roofit/RooFitUnBinnedBenchmarks.cxx
index 58f8ecf5..698b976a 100644
--- a/root/roofit/roofit/RooFitUnBinnedBenchmarks.cxx
+++ b/root/roofit/roofit/RooFitUnBinnedBenchmarks.cxx
@@ -239,6 +239,10 @@ const auto unit = benchmark::kMillisecond;
 auto Legacy = static_cast<int>(RooFit::EvalBackend::Value::Legacy);
 auto Cpu = static_cast<int>(RooFit::EvalBackend::Value::Cpu);
 auto Cuda = static_cast<int>(RooFit::EvalBackend::Value::Cuda);
+//testing for codegen and codegen_no_grad-codegen without clad:
+auto Codegen = static_cast<int>(RooFit::EvalBackend::Value::Codegen);
+auto CodegenNoGrad = static_cast<int>(RooFit::EvalBackend::Value::CodegenNoGrad);
+
 
 #define ARGS UseRealTime()->Unit(unit)
 
@@ -246,6 +250,9 @@ BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitLegacy")->Args({nEvents,
 BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitLegacyNumCPU2")->Args({nEvents, Legacy, 2})->ARGS;
 BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitLegacyNumCPU4")->Args({nEvents, Legacy, 4})->ARGS;
 BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitCPU")->Args({nEvents, Cpu, 1})->ARGS;
+//testing for codegen and codegennograd
+BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitCodegen")->Args({nEvents, Codegen, 1})->ARGS;
+BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitCodegenNoGrad")->Args({nEvents, CodegenNoGrad, 1})->ARGS;
 #ifdef DO_BENCH_ROOFIT_CUDA
 BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitCUDA")->Args({nEvents, Cuda, 1})->ARGS;
 #endif
@@ -254,6 +261,9 @@ BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitLegacy")->Args(
 BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitLegacyNumCPU2")->Args({nEvents, Legacy, 2})->ARGS;
 BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitLegacyNumCPU4")->Args({nEvents, Legacy, 4})->ARGS;
 BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitCPU")->Args({nEvents, Cpu, 1})->ARGS;
+//testing for codegen and codegennograd
+BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitCodegen")->Args({nEvents, Codegen, 1})->ARGS;
+BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitCodegenNoGrad")->Args({nEvents, CodegenNoGrad, 1})->ARGS;
 #ifdef DO_BENCH_ROOFIT_CUDA
 BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitCUDA")->Args({nEvents, Cuda, 1})->ARGS;
 #endif
@@ -262,6 +272,8 @@ BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitLegacy")->Args({nEvents
 BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitLegacyNumCPU2")->Args({nEvents, Legacy, 2})->ARGS;
 BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitLegacyNumCPU4")->Args({nEvents, Legacy, 4})->ARGS;
 BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitCPU")->Args({nEvents, Cpu, 1})->ARGS;
+BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitCodegen")->Args({nEvents, Codegen, 1})->ARGS;
+BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitCodegenNoGrad")->Args({nEvents, CodegenNoGrad, 1})->ARGS;
 #ifdef DO_BENCH_ROOFIT_CUDA
 BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitCUDA")->Args({nEvents, Cuda, 1})->ARGS;
 #endif
diff --git a/root/roofit/roofit/benchRooFitBackends.cxx b/root/roofit/roofit/benchRooFitBackends.cxx
index 4513baee..afe8c169 100644
--- a/root/roofit/roofit/benchRooFitBackends.cxx
+++ b/root/roofit/roofit/benchRooFitBackends.cxx
@@ -22,11 +22,11 @@
 #include <benchmark/benchmark.h>
 
 int printLevel = 0;
-size_t nEvents = 100000;
+size_t nEvents = 10000;
 const auto minimizerName = "Minuit2";
 constexpr bool verbose = false;
 
-enum RunConfig_t { runScalar, runCpu, fitScalar, fitCpu, fitCuda };
+enum RunConfig_t { runScalar, runCpu, fitScalar, fitCpu, fitCuda};//, fitCodegen, fitCodegenNoGrad };
 
 void runFitBenchmark(benchmark::State &state, RooAbsPdf &pdf, RooAbsData &data)
 {
@@ -45,7 +45,11 @@ void runFitBenchmark(benchmark::State &state, RooAbsPdf &pdf, RooAbsData &data)
          pdf.fitTo(data, EvalBackend::Cpu(), Minimizer(minimizerName), PrintLevel(printLevel - 1), PrintEvalErrors(-1));
       } else if (runConfig == fitCuda) {
          pdf.fitTo(data, EvalBackend::Cuda(), Minimizer(minimizerName), PrintLevel(printLevel - 1), PrintEvalErrors(-1));
-      }
+      }// else if (runConfig == fitCodegen) {
+      //    pdf.fitTo(data, EvalBackend::Codegen(), Minimizer(minimizerName), PrintLevel(printLevel - 1), PrintEvalErrors(-1));
+      // } else if (runConfig == fitCodegenNoGrad) {
+      //    pdf.fitTo(data, EvalBackend::CodegenNoGrad(), Minimizer(minimizerName), PrintLevel(printLevel - 1), PrintEvalErrors(-1));
+      // } 
       state.PauseTiming();
       params.assign(paramsInitial);
       state.ResumeTiming();
@@ -332,18 +336,26 @@ auto const unit = benchmark::kMillisecond;
 BENCHMARK(benchFitGauss)->Unit(unit)->Name("Gaus_FitLegacy")->Args({fitScalar});
 BENCHMARK(benchFitGauss)->Unit(unit)->Name("Gaus_FitCPU")->Args({fitCpu});
 CUDA_ONLY(BENCHMARK(benchFitGauss)->Unit(unit)->Name("Gaus_FitCUDA")->Args({fitCuda}));
+BENCHMARK(benchFitGauss)->Unit(unit)->Name("Gaus_FitCodegen")->Args({fitCodegen});
+BENCHMARK(benchFitGauss)->Unit(unit)->Name("Gaus_FitCodegen_No_Grad")->Args({fitCodegenNoGrad});
 
 BENCHMARK(benchFitGaussXSigma)->Unit(unit)->Name("GausXS_FitLegacy")->Args({fitScalar});
 BENCHMARK(benchFitGaussXSigma)->Unit(unit)->Name("GausXS_FitCPU")->Args({fitCpu});
 CUDA_ONLY(BENCHMARK(benchFitGaussXSigma)->Unit(unit)->Name("GausXS_FitCUDA")->Args({fitCuda}));
+BENCHMARK(benchFitGaussXSigma)->Unit(unit)->Name("GausXS_FitCodegen")->Args({fitCodegen});
+BENCHMARK(benchFitGaussXSigma)->Unit(unit)->Name("GausXS_FitCodegen_No_Grad")->Args({fitCodegenNoGrad});
 
 BENCHMARK(benchFit)->Unit(unit)->Name("AddPdf_FitLegacy")->Args({fitScalar});
 BENCHMARK(benchFit)->Unit(unit)->Name("AddPdf_FitCPU")->Args({fitCpu});
 CUDA_ONLY(BENCHMARK(benchFit)->Unit(unit)->Name("AddPdf_FitCUDA")->Args({fitCuda}));
+BENCHMARK(benchFit)->Unit(unit)->Name("AddPdf_FitCodegen")->Args({fitCodegen});
+BENCHMARK(benchFit)->Unit(unit)->Name("AddPdf_FitCodegen_No_Grad")->Args({fitCodegenNoGrad});
 
 BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitLegacy")->Unit(benchmark::kMillisecond)->Args({fitScalar});
 BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitCPU")->Unit(benchmark::kMillisecond)->Args({fitCpu});
 CUDA_ONLY(BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitCUDA")->Unit(benchmark::kMillisecond)->Args({fitCuda}));
+BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitCodegen")->Unit(benchmark::kMillisecond)->Args({fitCodegen});
+BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitCodegen_No_Grad")->Unit(benchmark::kMillisecond)->Args({fitCodegenNoGrad});
 
 // Watch out with the result from these benchmarks: if there are evaluation
 // errors during the minimization, the time differences will mostly come from
@@ -351,6 +363,8 @@ CUDA_ONLY(BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitCUDA")->Unit(ben
 BENCHMARK(benchModel)->Unit(unit)->Name("FitModel_FitLegacy")->Unit(benchmark::kMillisecond)->Args({fitScalar});
 BENCHMARK(benchModel)->Unit(unit)->Name("FitModel_FitCPU")->Unit(benchmark::kMillisecond)->Args({fitCpu});
 CUDA_ONLY(BENCHMARK(benchModel)->Unit(unit)->Name("FitModel_FitCUDA")->Unit(benchmark::kMillisecond)->Args({fitCuda}));
+BENCHMARK(benchModel)->Unit(unit)->Name("FitModel_FitCodegen")->Unit(benchmark::kMillisecond)->Args({fitCodegen});
+BENCHMARK(benchModel)->Unit(unit)->Name("FitModel_FitCodegen_No_Grad")->Unit(benchmark::kMillisecond)->Args({fitCodegenNoGrad});
 
 int main(int argc, char **argv)
 {