neuralize-ai · ciaranbor · Jul 27, 2024 · Jul 19, 2024 · Jul 19, 2024 · Jul 19, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -81,7 +81,8 @@ if(edgerunner_ENABLE_NPU)
     target_sources(
         edgerunner_edgerunner
         PRIVATE source/qnn/model.cpp source/qnn/tensor.cpp
-                source/qnn/backend.cpp
+                source/qnn/backend.cpp source/qnn/graph.cpp
+                source/qnn/tensorOps.cpp
     )
 
     find_package(qnn REQUIRED)

diff --git a/HACKING.md b/HACKING.md
@@ -113,6 +113,43 @@ cause issues. See the link above for profiles documentation.
 [conan]: https://conan.io/
 [profile]: https://docs.conan.io/2/reference/config_files/profiles.html
 
+#### Android
+
+An example Android profile is bundled with this repository. It can be installed
+to your local conan prefix using:
+
+```sh
+conan config install profiles -tf profiles
+```
+
+Use it by adding `-pr android` to your `conan install` invocation.
+
+#### GPU
+
+For GPU support add `-o gpu=True` to the `conan install` invocation.
+> [!NOTE]
+> The tensorflow-lite conan package disables GPU by default and as such these
+  steps will not work currently. I have patched the recipe locally to enable GPU
+  support and will make this available on Conan Center or another repository
+  soon. In the mean time, my custom recipe can be be used as outlined
+  [here](https://github.com/neuralize-ai/tensorflow-lite-conan). If you have
+  previously `conan install`ed, remove the existing TFLite package(s) using
+  `conan remove "tensorflow-lite"`. Make sure to create the TFLite package
+  version that is required in [conanfile](/conanfile.py).
+
+GPU support requires a functioning OpenCL installation. Refer to your OS
+documentation for the steps for setting this up correctly for your GPU vendor.
+
+#### NPU
+
+There is support for executing on Qualcomm NPUs (more hardware support is
+upcoming). Since this involves using Qualcomm's pre-compiled shared libraries,
+I have created a Conan recipe that must be used
+[here](https://github.com/neuralize-ai/qnn-conan). Follow the instructions on
+that repository and the steps above with `-o with_npu=True` supplied to the
+`conan install` invocation. Make sure to create the package version required
+in [conanfile](/conanfile.py).
+
 ### Configure, build and test
 
 If you followed the above instructions, then you can configure, build and test
@@ -134,6 +171,15 @@ the number of jobs to use, which should ideally be specified to the number of
 threads your CPU has. You may also want to add that to your preset using the
 `jobs` property, see the [presets documentation][1] for more details.
 
+For Android, the above `ctest` approach does not work. Instead, provided that `conan install` is invoked with an appropriate android profile and Android compatible presets are used, there will be an additional `test-android` target that can be executed with:
+
+```sh
+cmake --build --preset=<preset> -t test-android
+```
+
+Ensure [adb](https://developer.android.com/tools/adb) is configured and a device
+with USB debugging enabled is connected.
+
 ### Developer mode targets
 
 These are targets you may invoke using the build command from above, with an

diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
@@ -28,8 +28,8 @@ function(add_example NAME)
     if(ANDROID)
         add_custom_target(
             "run_${NAME}"
-            COMMAND "${CMAKE_SOURCE_DIR}/example/run_with_adb.sh" -b
-                    "${CMAKE_BINARY_DIR}" -e "${NAME}"
+            COMMAND "${CMAKE_SOURCE_DIR}/scripts/run_with_adb.sh" -b
+                    "${CMAKE_CURRENT_BINARY_DIR}" -e "${NAME}"
             VERBATIM
         )
     else()
@@ -45,8 +45,6 @@ endfunction()
 
 # NOTE: for Android, adb push fails on symlinks, push directly manually instead
 if(ANDROID)
-    # file(COPY "${CMAKE_BINARY_DIR}/../runtimeLibs/" DESTINATION
-    # ${CMAKE_CURRENT_BINARY_DIR} )
     foreach(dir ${CONAN_RUNTIME_LIB_DIRS})
         file(GLOB_RECURSE shared_libs "${dir}/*.so")
         file(COPY ${shared_libs} DESTINATION ${CMAKE_CURRENT_BINARY_DIR})

diff --git a/example/README.md b/example/README.md
@@ -39,19 +39,7 @@ For MacOS, replace "Unix Makefiles" with "Xcode".
 > Examples require additional dependencies to the main library. As such, it is
 required to supply `-o examples=True` to the `conan install` command.
 
-In the examples below, for GPU support add `-o gpu=True` to the `conan install` command.
-> [!NOTE]
-> The tensorflow-lite conan package disables GPU by default and as such these
-  steps will not work currently. I have patched the recipe locally to enable GPU
-  support and will make this available on Conan Center or another repository
-  soon. In the mean time, my custom recipe can be be used as outlined
-  [here](https://github.com/neuralize-ai/tensorflow-lite-conan). If you have
-  previously `conan install`ed, remove the existing TFLite package(s) using
-  `conan remove "tensorflow-lite"`. Make sure to create the TFLite package
-  version that is required in [conanfile](/conanfile.py).
-
-GPU support requires a functioning OpenCL installation. Refer to your OS
-documentation for the steps for setting this up correctly for your GPU vendor.
+Refer to [HACKING](/HACKING.md) for further configuration options.
 
 ## Unix
 
@@ -118,11 +106,3 @@ cmake --build --preset=rel -t run_<example_name>
 ```
 
 where `example_name` is the example filename without the extension (eg. `mobilenet_v3_small`).
-
-There is support for executing on Qualcomm NPUs (more hardware support is
-upcoming). Since this involves using Qualcomm's pre-compiled shared libraries,
-I have created a Conan recipe that must be used
-[here](https://github.com/neuralize-ai/qnn-conan). Follow the instructions on
-that repository and the steps above with `-o with_npu=True` supplied to the
-`conan install` invocation. Make sure to create the package version required
-in [conanfile](/conanfile.py).
diff --git a/include/edgerunner/qnn/backend.hpp b/include/edgerunner/qnn/backend.hpp
@@ -35,8 +35,10 @@ class Backend {
     /**
      * @brief Constructor for the Backend class.
      * @param delegate The delegate type for the backend (CPU, GPU, NPU).
+     * @param isContextBinary Whether the model will be loaded from a context
+     * binary.
      */
-    explicit Backend(DELEGATE delegate);
+    explicit Backend(DELEGATE delegate, bool isContextBinary);
 
     Backend(const Backend&) = default;
     Backend(Backend&&) = delete;
@@ -53,6 +55,15 @@ class Backend {
      * @return Reference to the backend handle.
      */
     auto getHandle() -> auto& { return m_backendHandle; }
+    /**
+     * @brief Returns a reference to the device handle.
+     *
+     * This function returns a reference to the device handle, allowing access
+     * to the underlying device handle object.
+     *
+     * @return Reference to the device handle.
+     */
+    auto getDeviceHandle() -> auto& { return m_deviceHandle; }
 
     /**
      * @brief Get the context for the backend.
@@ -66,6 +77,12 @@ class Backend {
      */
     auto getInterface() -> auto& { return m_qnnInterface; }
 
+    /**
+     * @brief Get the QNN system interface.
+     * @return Reference to the QNN system interface.
+     */
+    auto getSystemInterface() -> auto& { return m_qnnSystemInterface; }
+
     /**
      * @brief Get the delegate type for the backend.
      * @return The delegate type.

diff --git a/include/edgerunner/qnn/graph.hpp b/include/edgerunner/qnn/graph.hpp
@@ -0,0 +1,151 @@
+#pragma once
+
+#include <cstring>
+#include <memory>
+
+#include <QnnCommon.h>
+#include <QnnGraph.h>
+#include <QnnInterface.h>
+#include <QnnTypes.h>
+#include <System/QnnSystemContext.h>
+#include <dlfcn.h>
+#include <nonstd/span.hpp>
+
+#include "edgerunner/model.hpp"
+
+namespace edge::qnn {
+
+using GraphErrorT = enum GraphError {
+    GRAPH_NO_ERROR = 0,
+    GRAPH_TENSOR_ERROR = 1,
+    GRAPH_PARAMS_ERROR = 2,
+    GRAPH_NODES_ERROR = 3,
+    GRAPH_GRAPH_ERROR = 4,
+    GRAPH_CONTEXT_ERROR = 5,
+    GRAPH_GENERATION_ERROR = 6,
+    GRAPH_SETUP_ERROR = 7,
+    GRAPH_INVALID_ARGUMENT_ERROR = 8,
+    GRAPH_FILE_ERROR = 9,
+    GRAPH_MEMORY_ALLOCATE_ERROR = 10,
+    // Value selected to ensure 32 bits.
+    GRAPH_UNKNOWN_ERROR = 0x7FFFFFFF
+};
+
+using GraphInfoT = struct GraphInfo {
+    Qnn_GraphHandle_t graph;
+    char* graphName;
+    Qnn_Tensor_t* inputTensors;
+    uint32_t numInputTensors;
+    Qnn_Tensor_t* outputTensors;
+    uint32_t numOutputTensors;
+};
+
+using GraphConfigInfoT = struct GraphConfigInfo {
+    char* graphName;
+    const QnnGraph_Config_t** graphConfigs;
+};
+
+using ComposeGraphsFnHandleTypeT =
+    GraphErrorT (*)(Qnn_BackendHandle_t,
+                    QnnInterface_ImplementationV2_16_t,
+                    Qnn_ContextHandle_t,
+                    const GraphConfigInfoT**,
+                    const uint32_t,
+                    GraphInfoT***,
+                    uint32_t*,
+                    bool,
+                    QnnLog_Callback_t,
+                    QnnLog_Level_t);
+
+using FreeGraphInfoFnHandleTypeT = GraphErrorT (*)(GraphInfoT***, uint32_t);
+
+class GraphsInfo {
+  public:
+    GraphsInfo() = default;
+
+    GraphsInfo(const GraphsInfo&) = delete;
+    GraphsInfo(GraphsInfo&&) = delete;
+    auto operator=(const GraphsInfo&) -> GraphsInfo& = delete;
+    auto operator=(GraphsInfo&&) -> GraphsInfo& = delete;
+
+    ~GraphsInfo();
+
+    auto getPtr() -> GraphInfoT*** { return &m_graphsInfo; }
+
+    auto accessGraphs() -> auto& { return m_graphsInfo; }
+
+    auto setGraph() {
+        m_graphInfo = std::unique_ptr<GraphInfoT>(m_graphsInfo[0] /* NOLINT */);
+    }
+
+    auto getGraphsCountPtr() -> uint32_t* { return &m_graphsCount; }
+
+    auto getGraphCount() const { return m_graphsCount; }
+
+    auto accessGraphCount() -> auto& { return m_graphsCount; }
+
+    auto getGraph() -> auto& { return m_graphInfo->graph; }
+
+    auto accessGraph() -> auto& { return m_graphInfo; }
+
+    auto getInputs() -> nonstd::span<Qnn_Tensor_t> {
+        return {m_graphInfo->inputTensors, m_graphInfo->numInputTensors};
+    }
+
+    auto getOutputs() -> nonstd::span<Qnn_Tensor_t> {
+        return {m_graphInfo->outputTensors, m_graphInfo->numOutputTensors};
+    }
+
+    auto getNumInputs() const { return m_graphInfo->numInputTensors; }
+
+    auto getNumOutputs() const { return m_graphInfo->numOutputTensors; }
+
+    auto operator[](const size_t index) -> auto& {
+        return (*m_graphsInfo)[index] /* NOLINT */;
+    }
+
+    auto loadFromSharedLibrary(const std::filesystem::path& modelPath)
+        -> STATUS;
+
+    auto setComposeGraphsFnHandle(
+        ComposeGraphsFnHandleTypeT composeGraphsFnHandle) -> STATUS;
+
+    auto setFreeGraphInfoFnHandle(
+        FreeGraphInfoFnHandleTypeT freeGraphInfoFnHandle) -> STATUS;
+
+    auto composeGraphs(Qnn_BackendHandle_t& qnnBackendHandle,
+                       QnnInterface_ImplementationV2_16_t& qnnInterface,
+                       Qnn_ContextHandle_t& qnnContext) -> STATUS;
+
+    auto retrieveGraphFromContext(
+        QnnInterface_ImplementationV2_16_t& qnnInterface,
+        Qnn_ContextHandle_t& qnnContext) -> STATUS;
+
+    auto copyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t* graphInfoSrc,
+                          GraphInfoT* graphInfoDst) -> bool;
+
+    auto copyGraphsInfo(const QnnSystemContext_GraphInfo_t* graphsInput,
+                        uint32_t numGraphs) -> bool;
+
+    auto copyMetadataToGraphsInfo(
+        const QnnSystemContext_BinaryInfo_t* binaryInfo) -> bool;
+
+  private:
+    std::vector<GraphInfoT> m_graphs;
+    std::vector<GraphInfoT*> m_graphPtrs;
+
+    GraphInfoT** m_graphsInfo {};
+    uint32_t m_graphsCount {};
+
+    std::unique_ptr<GraphInfoT> m_graphInfo;
+
+    ComposeGraphsFnHandleTypeT m_composeGraphsFnHandle {};
+    FreeGraphInfoFnHandleTypeT m_freeGraphInfoFnHandle {};
+
+    void* m_libModelHandle {};
+
+    std::vector<Qnn_Tensor_t> m_inputTensors;
+    std::vector<Qnn_Tensor_t> m_outputTensors;
+};
+
+}  // namespace edge::qnn