CMU-Perceptual-Computing-Lab · bushibushi · Sep 21, 2017 · Sep 21, 2017 · Sep 21, 2017 · Sep 21, 2017
diff --git a/Makefile b/Makefile
@@ -68,6 +68,10 @@ else
 	LDFLAGS += -Wl,-rpath=$(CAFFE_DIR)/lib
 	INCLUDE_DIRS += $(CAFFE_DIR)/include
 	LIBRARY_DIRS += $(CAFFE_DIR)/lib
+
+        ifeq ($(DEEP_NET), tensorrt)
+                COMMON_FLAGS += -DUSE_TENSORRT
+        endif
 endif
 
 ##############################
@@ -145,6 +149,11 @@ ifeq ($(USE_CUDA), 1)
 	LIBRARIES += cudart cublas curand
 endif
 
+# TensorRT
+ifeq ($(DEEP_NET), tensorrt)
+        LIBRARIES += nvinfer nvcaffe_parser
+endif
+
 # LIBRARIES += glog gflags boost_system boost_filesystem m hdf5_hl hdf5 caffe
 LIBRARIES += glog gflags boost_system boost_filesystem m hdf5_hl hdf5
 

diff --git a/examples/tutorial_pose/1_extract_from_image.cpp b/examples/tutorial_pose/1_extract_from_image.cpp
@@ -59,10 +59,29 @@ DEFINE_double(render_threshold,         0.05,           "Only estimated keypoint
 DEFINE_double(alpha_pose,               0.6,            "Blending factor (range 0-1) for the body part rendering. 1 will show it completely, 0 will"
                                                         " hide it. Only valid for GPU rendering.");
 
+typedef std::vector<std::pair<std::string, std::chrono::high_resolution_clock::time_point>> OpTimings;
+
+static OpTimings timings;
+
+static void timeNow(const std::string& label){
+    const auto now = std::chrono::high_resolution_clock::now();
+    const auto timing = std::make_pair(label, now);
+    timings.push_back(timing);
+}
+
+static std::string timeDiffToString(const std::chrono::high_resolution_clock::time_point& t1,
+                                const std::chrono::high_resolution_clock::time_point& t2 ) {
+    return std::to_string((double)std::chrono::duration_cast<std::chrono::duration<double>>(t1 - t2).count() * 1e3) + " ms";
+}
+
+
 int openPoseTutorialPose1()
 {
     op::log("OpenPose Library Tutorial - Example 1.", op::Priority::High);
     // ------------------------- INITIALIZATION -------------------------
+
+    timeNow("Start");
+
     // Step 1 - Set logging level
         // - 0 will output all the logging messages
         // - 255 will output nothing
@@ -80,7 +99,7 @@ int openPoseTutorialPose1()
     // Check no contradictory flags enabled
     if (FLAGS_alpha_pose < 0. || FLAGS_alpha_pose > 1.)
         op::error("Alpha value for blending must be in the range [0,1].", __LINE__, __FUNCTION__, __FILE__);
-    if (FLAGS_scale_gap <= 0. && FLAGS_scale_number > 1)
+    if (FLAGS_scale_gap <= 0. && FLAGS_scale_number > 1.)
         op::error("Incompatible flag configuration: scale_gap must be greater than 0 or scale_number = 1.",
                   __LINE__, __FUNCTION__, __FILE__);
     // Enabling Google Logging
@@ -101,6 +120,8 @@ int openPoseTutorialPose1()
     poseExtractorCaffe.initializationOnThread();
     poseRenderer.initializationOnThread();
 
+    timeNow("Initialization");
+
     // ------------------------- POSE ESTIMATION AND RENDERING -------------------------
     // Step 1 - Read and load image, error if empty (possibly wrong path)
     // Alternative: cv::imread(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
@@ -125,13 +146,25 @@ int openPoseTutorialPose1()
     poseRenderer.renderPose(outputArray, poseKeypoints, scaleInputToOutput);
     // Step 6 - OpenPose output format to cv::Mat
     auto outputImage = opOutputToCvMat.formatToCvMat(outputArray);
-
+    timeNow("Step 5");
+
     // ------------------------- SHOWING RESULT AND CLOSING -------------------------
     // Step 1 - Show results
     frameDisplayer.displayFrame(outputImage, 0); // Alternative: cv::imshow(outputImage) + cv::waitKey(0)
     // Step 2 - Logging information message
     op::log("Example 1 successfully finished.", op::Priority::High);
     // Return successful message
+
+    const auto totalTimeSec = timeDiffToString(timings.back().second, timings.front().second);
+    const auto message = "Pose estimation successfully finished. Total time: " + totalTimeSec + " seconds.";
+    op::log(message, op::Priority::High);
+
+    for(OpTimings::iterator timing = timings.begin()+1; timing != timings.end(); ++timing) {
+        const auto log_time = (*timing).first + " - " + timeDiffToString((*timing).second, (*(timing-1)).second);
+        op::log(log_time, op::Priority::High);
+    }
+
+
     return 0;
 }
 

diff --git a/examples/tutorial_pose/3_extract_from_image_TensorRT.cpp b/examples/tutorial_pose/3_extract_from_image_TensorRT.cpp
@@ -0,0 +1,180 @@
+// ------------------------- OpenPose Library Tutorial - Pose - Example 3 - Extract from Image with TensorRT -------------------------
+// This first example shows the user how to:
+// 1. Load an image (`filestream` module)
+// 2. Extract the pose of that image (`pose` module)
+// 3. Render the pose on a resized copy of the input image (`pose` module)
+// 4. Display the rendered pose (`gui` module)
+// In addition to the previous OpenPose modules, we also need to use:
+// 1. `core` module: for the Array<float> class that the `pose` module needs
+// 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+
+// 3rdparty dependencies
+// GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string
+#include <gflags/gflags.h>
+// Allow Google Flags in Ubuntu 14
+#ifndef GFLAGS_GFLAGS_H_
+namespace gflags = google;
+#endif
+// OpenPose dependencies
+#include <openpose/core/headers.hpp>
+#include <openpose/filestream/headers.hpp>
+#include <openpose/gui/headers.hpp>
+#include <openpose/pose/headers.hpp>
+#include <openpose/utilities/headers.hpp>
+
+// See all the available parameter options withe the `--help` flag. E.g. `./build/examples/openpose/openpose.bin --help`.
+// Note: This command will show you flags for other unnecessary 3rdparty files. Check only the flags for the OpenPose
+// executable. E.g. for `openpose.bin`, look for `Flags from examples/openpose/openpose.cpp:`.
+// Debugging/Other
+DEFINE_int32(logging_level,             3,              "The logging level. Integer in the range [0, 255]. 0 will output any log() message, while"
+             " 255 will not output any. Current OpenPose library messages are in the range 0-4: 1 for"
+             " low priority messages and 4 for important ones.");
+// Producer
+DEFINE_string(image_path,               "examples/media/COCO_val2014_000000000192.jpg",     "Process the desired image.");
+// OpenPose
+DEFINE_string(model_pose,               "COCO",         "Model to be used. E.g. `COCO` (18 keypoints), `MPI` (15 keypoints, ~10% faster), "
+              "`MPI_4_layers` (15 keypoints, even faster but less accurate).");
+DEFINE_string(model_folder,             "models/",      "Folder path (absolute or relative) where the models (pose, face, ...) are located.");
+DEFINE_string(net_resolution,           "-1x368",       "Multiples of 16. If it is increased, the accuracy potentially increases. If it is"
+              " decreased, the speed increases. For maximum speed-accuracy balance, it should keep the"
+              " closest aspect ratio possible to the images or videos to be processed. Using `-1` in"
+              " any of the dimensions, OP will choose the optimal aspect ratio depending on the user's"
+              " input value. E.g. the default `-1x368` is equivalent to `656x368` in 16:9 resolutions,"
+              " e.g. full HD (1980x1080) and HD (1280x720) resolutions.");
+DEFINE_string(output_resolution,        "-1x-1",        "The image resolution (display and output). Use \"-1x-1\" to force the program to use the"
+              " input image resolution.");
+DEFINE_int32(num_gpu_start,             0,              "GPU device start number.");
+DEFINE_double(scale_gap,                0.3,            "Scale gap between scales. No effect unless scale_number > 1. Initial scale is always 1."
+              " If you want to change the initial scale, you actually want to multiply the"
+              " `net_resolution` by your desired initial scale.");
+DEFINE_int32(scale_number,              1,              "Number of scales to average.");
+// OpenPose Rendering
+DEFINE_bool(disable_blending,           false,          "If enabled, it will render the results (keypoint skeletons or heatmaps) on a black"
+            " background, instead of being rendered into the original image. Related: `part_to_show`,"
+            " `alpha_pose`, and `alpha_pose`.");
+DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be"
+              " rendered. Generally, a high threshold (> 0.5) will only render very clear body parts;"
+              " while small thresholds (~0.1) will also output guessed and occluded keypoints, but also"
+              " more false positives (i.e. wrong detections).");
+DEFINE_double(alpha_pose,               0.6,            "Blending factor (range 0-1) for the body part rendering. 1 will show it completely, 0 will"
+              " hide it. Only valid for GPU rendering.");
+
+
+typedef std::vector<std::pair<std::string, std::chrono::high_resolution_clock::time_point>> OpTimings;
+
+static OpTimings timings;
+
+static void timeNow(const std::string& label){
+    const auto now = std::chrono::high_resolution_clock::now();
+    const auto timing = std::make_pair(label, now);
+    timings.push_back(timing);
+} 
+
+static std::string timeDiffToString(const std::chrono::high_resolution_clock::time_point& t1,
+                                const std::chrono::high_resolution_clock::time_point& t2 ) {
+    return std::to_string((double)std::chrono::duration_cast<std::chrono::duration<double>>(t1 - t2).count() * 1e3) + " ms";
+}
+
+int openPoseTutorialPose3()
+{
+#ifdef USE_TENSORRT
+    op::log("Starting pose estimation.", op::Priority::High);
+
+    timeNow("Start");
+
+    op::log("OpenPose Library Tutorial - Pose Example 3.", op::Priority::High);
+    // ------------------------- INITIALIZATION -------------------------
+    // Step 1 - Set logging level
+        // - 0 will output all the logging messages
+        // - 255 will output nothing
+    op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", __LINE__, __FUNCTION__, __FILE__);
+    op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
+    op::log("", op::Priority::Low, __LINE__, __FUNCTION__, __FILE__);
+    // Step 2 - Read Google flags (user defined configuration)
+    // outputSize
+    const auto outputSize = op::flagsToPoint(FLAGS_output_resolution, "-1x-1");
+    // netInputSize
+    const auto netInputSize = op::flagsToPoint(FLAGS_net_resolution, "-1x368");
+    // poseModel
+    const auto poseModel = op::flagsToPoseModel(FLAGS_model_pose);
+    // Check no contradictory flags enabled
+    if (FLAGS_alpha_pose < 0. || FLAGS_alpha_pose > 1.)
+        op::error("Alpha value for blending must be in the range [0,1].", __LINE__, __FUNCTION__, __FILE__);
+    if (FLAGS_scale_gap <= 0. && FLAGS_scale_number > 1)
+        op::error("Incompatible flag configuration: scale_gap must be greater than 0 or scale_number = 1.", __LINE__, __FUNCTION__, __FILE__);
+    // Enabling Google Logging
+    const bool enableGoogleLogging = true;
+    // Logging
+    op::log("", op::Priority::Low, __LINE__, __FUNCTION__, __FILE__);
+    // Step 3 - Initialize all required classes
+    op::ScaleAndSizeExtractor scaleAndSizeExtractor(netInputSize, outputSize, FLAGS_scale_number, FLAGS_scale_gap);
+    op::CvMatToOpInput cvMatToOpInput;
+    op::CvMatToOpOutput cvMatToOpOutput;
+    op::PoseExtractorTensorRT poseExtractorTensorRT{poseModel, FLAGS_model_folder,
+        FLAGS_num_gpu_start, {}, op::ScaleMode::ZeroToOne, enableGoogleLogging};
+    op::PoseCpuRenderer poseRenderer{poseModel, (float)FLAGS_render_threshold, !FLAGS_disable_blending,
+        (float)FLAGS_alpha_pose};
+    op::OpOutputToCvMat opOutputToCvMat;
+    op::FrameDisplayer frameDisplayer{"OpenPose Tutorial - Example 3", outputSize};
+    // Step 4 - Initialize resources on desired thread (in this case single thread, i.e. we init resources here)
+    poseExtractorTensorRT.initializationOnThread();
+    poseRenderer.initializationOnThread();
+
+    timeNow("Initialization");
+
+    // ------------------------- POSE ESTIMATION AND RENDERING -------------------------
+    // Step 1 - Read and load image, error if empty (possibly wrong path)
+    // Alternative: cv::imread(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
+    cv::Mat inputImage = op::loadImage(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
+    if(inputImage.empty())
+        op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__);
+    const op::Point<int> imageSize{inputImage.cols, inputImage.rows};
+    // Step 2 - Get desired scale sizes
+    std::vector<double> scaleInputToNetInputs;
+    std::vector<op::Point<int>> netInputSizes;
+    double scaleInputToOutput;
+    op::Point<int> outputResolution;
+    std::tie(scaleInputToNetInputs, netInputSizes, scaleInputToOutput, outputResolution)
+    = scaleAndSizeExtractor.extract(imageSize);
+    // Step 3 - Format input image to OpenPose input and output formats
+    const auto netInputArray = cvMatToOpInput.createArray(inputImage, scaleInputToNetInputs, netInputSizes);
+    auto outputArray = cvMatToOpOutput.createArray(inputImage, scaleInputToOutput, outputResolution);
+    // Step 4 - Estimate poseKeypoints
+    poseExtractorTensorRT.forwardPass(netInputArray, imageSize, scaleInputToNetInputs);
+    const auto poseKeypoints = poseExtractorTensorRT.getPoseKeypoints();
+    // Step 5 - Render poseKeypoints
+    poseRenderer.renderPose(outputArray, poseKeypoints, scaleInputToOutput);
+    // Step 6 - OpenPose output format to cv::Mat
+    auto outputImage = opOutputToCvMat.formatToCvMat(outputArray);
+    timeNow("Step 5");
+
+    // ------------------------- SHOWING RESULT AND CLOSING -------------------------
+    // Step 1 - Show results
+    frameDisplayer.displayFrame(outputImage, 0); // Alternative: cv::imshow(outputImage) + cv::waitKey(0)
+    // Step 2 - Logging information message
+    op::log("Example 3 successfully finished.", op::Priority::High);
+
+    const auto totalTimeSec = timeDiffToString(timings.back().second, timings.front().second);
+    const auto message = "Pose estimation successfully finished. Total time: " + totalTimeSec + " seconds.";
+    op::log(message, op::Priority::High);
+
+    for(OpTimings::iterator timing = timings.begin()+1; timing != timings.end(); ++timing) {
+        const auto log_time = (*timing).first + " - " + timeDiffToString((*timing).second, (*(timing-1)).second);
+        op::log(log_time, op::Priority::High);
+    }
+
+#endif // USE_TENSORRT
+
+    // Return successful message
+    return 0;
+}
+
+int main(int argc, char *argv[])
+{
+    // Parsing command line flags
+    gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+    // Running openPoseTutorialPose1
+    return openPoseTutorialPose3();
+}
+
diff --git a/include/openpose/core/netTensorRT.hpp b/include/openpose/core/netTensorRT.hpp
@@ -0,0 +1,45 @@
+#ifndef OPENPOSE_CORE_NET_TENSORRT_HPP
+#define OPENPOSE_CORE_NET_TENSORRT_HPP
+
+#include <openpose/core/common.hpp>
+#include <openpose/core/net.hpp>
+
+
+#ifdef USE_TENSORRT
+    #include "NvInfer.h"
+#endif
+
+namespace op
+{
+    class OP_API NetTensorRT : public Net
+    {
+    public:
+        NetTensorRT(const std::string& caffeProto, const std::string& caffeTrainedModel, const int gpuId = 0, const bool enableGoogleLogging = true,
+                 const std::string& lastBlobName = "net_output");
+
+        virtual ~NetTensorRT();
+
+        void initializationOnThread();
+
+        void forwardPass(const Array<float>& inputNetData) const;
+
+        boost::shared_ptr<caffe::Blob<float>> getOutputBlob() const;
+
+    private:
+#ifdef USE_TENSORRT
+        nvinfer1::ICudaEngine* caffeToGIEModel();
+
+        nvinfer1::ICudaEngine* createEngine();
+#endif
+        // PIMPL idiom
+        // http://www.cppsamples.com/common-tasks/pimpl.html
+        struct ImplNetTensorRT;
+        std::unique_ptr<ImplNetTensorRT> upImpl;
+
+        // PIMP requires DELETE_COPY & destructor, or extra code
+        // http://oliora.github.io/2015/12/29/pimpl-and-rule-of-zero.html
+        DELETE_COPY(NetTensorRT);
+    };
+}
+
+#endif // OPENPOSE_CORE_NET_TENSORRT_HPP
diff --git a/include/openpose/pose/headers.hpp b/include/openpose/pose/headers.hpp
@@ -8,6 +8,7 @@
 #include <openpose/pose/poseCpuRenderer.hpp>
 #include <openpose/pose/poseExtractor.hpp>
 #include <openpose/pose/poseExtractorCaffe.hpp>
+#include <openpose/pose/poseExtractorTensorRT.hpp>
 #include <openpose/pose/poseGpuRenderer.hpp>
 #include <openpose/pose/poseParameters.hpp>
 #include <openpose/pose/poseRenderer.hpp>

diff --git a/include/openpose/pose/poseExtractorTensorRT.hpp b/include/openpose/pose/poseExtractorTensorRT.hpp
@@ -0,0 +1,45 @@
+#ifndef OPENPOSE_POSE_POSE_EXTRACTOR_TENSORRT_HPP
+#define OPENPOSE_POSE_POSE_EXTRACTOR_TENSORRT_HPP
+
+#include <openpose/core/common.hpp>
+#include <openpose/pose/enumClasses.hpp>
+#include <openpose/pose/poseExtractor.hpp>
+
+namespace op
+{
+    class OP_API PoseExtractorTensorRT : public PoseExtractor
+    {
+    public:
+        PoseExtractorTensorRT(const PoseModel poseModel, const std::string& modelFolder, const int gpuId,
+                              const std::vector<HeatMapType>& heatMapTypes = {},
+                              const ScaleMode heatMapScale = ScaleMode::ZeroToOne,
+                              const bool enableGoogleLogging = true);
+
+        virtual ~PoseExtractorTensorRT();
+
+        void netInitializationOnThread();
+
+        void forwardPass(const std::vector<Array<float>>& inputNetData, const Point<int>& inputDataSize,
+                         const std::vector<double>& scaleInputToNetInputs = {1.f});
+
+        const float* getHeatMapCpuConstPtr() const;
+
+        const float* getHeatMapGpuConstPtr() const;
+
+        std::vector<int> getHeatMapSize() const;
+
+        const float* getPoseGpuConstPtr() const;
+
+    private:
+        // PIMPL idiom
+        // http://www.cppsamples.com/common-tasks/pimpl.html
+        struct ImplPoseExtractorTensorRT;
+        std::unique_ptr<ImplPoseExtractorTensorRT> upImpl;
+
+        // PIMP requires DELETE_COPY & destructor, or extra code
+        // http://oliora.github.io/2015/12/29/pimpl-and-rule-of-zero.html
+        DELETE_COPY(PoseExtractorTensorRT);
+    };
+}
+
+#endif // OPENPOSE_POSE_POSE_EXTRACTOR_TENSORRT_HPP
diff --git a/include/openpose/wrapper/wrapper.hpp b/include/openpose/wrapper/wrapper.hpp
@@ -619,7 +619,11 @@ namespace op
             {
                 // Pose estimators
                 for (auto gpuId = 0; gpuId < gpuNumber; gpuId++)
+#ifdef USE_TENSORRT
+                    poseExtractors.emplace_back(std::make_shared<PoseExtractorTensorRT>(
+#else
                     poseExtractors.emplace_back(std::make_shared<PoseExtractorCaffe>(
+#endif
                         wrapperStructPose.poseModel, modelFolder, gpuId + gpuNumberStart,
                         wrapperStructPose.heatMapTypes, wrapperStructPose.heatMapScale,
                         wrapperStructPose.enableGoogleLogging