Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

51 support qnn context binaries #52

Merged
merged 28 commits into from
Jul 27, 2024
Merged
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
2eccb57
Add qnn::ModelImpl method to save context binary
ciaranbor Jul 19, 2024
e746150
Save context binary if loaded a shared library model
ciaranbor Jul 19, 2024
3638281
Pull resultant context binary from adb script
ciaranbor Jul 19, 2024
e75ac0d
Add qnn::Backend method to get system interface
ciaranbor Jul 19, 2024
bd204f2
Backend needs to load system library for context binary models
ciaranbor Jul 20, 2024
5c199fa
Add Backend method to get device handle
ciaranbor Jul 20, 2024
54bc346
Move QnnTensor operations to dedicated file, add more operations
ciaranbor Jul 20, 2024
7f95896
Add qnn::ModelImpl method to load model from context binary
ciaranbor Jul 20, 2024
23f6dcb
Modify qnn::ModelImpl constructor to load .bin files as context binaries
ciaranbor Jul 20, 2024
cfc10a1
Add .bin models to createModel functions
ciaranbor Jul 20, 2024
e01d77d
Move graph types to dedicated header
ciaranbor Jul 20, 2024
090463c
Move further QnnTensor operations to dedicated file
ciaranbor Jul 20, 2024
e2038c8
Move graph functions to graph helpers
ciaranbor Jul 20, 2024
0932630
Encapsulate all graph logic in GraphInfoHelper class
ciaranbor Jul 21, 2024
f49a786
Manage graphs memory with containers
ciaranbor Jul 21, 2024
b5c29e7
Move complex graph methods to source file
ciaranbor Jul 21, 2024
ca5387a
Manage graph memory cleanup
ciaranbor Jul 21, 2024
82b628f
Move more graph logic to graph.cpp
ciaranbor Jul 25, 2024
6c02403
Bump version
ciaranbor Jul 25, 2024
c9b1456
Generalise run_with_adb beyond examples
ciaranbor Jul 26, 2024
124a220
Move run_with_adb to scripts directory
ciaranbor Jul 26, 2024
cad7518
Support running tests on android devices using adb
ciaranbor Jul 26, 2024
556839b
Add tflite NPU test
ciaranbor Jul 26, 2024
b6eeb20
Add QNN shared library test
ciaranbor Jul 26, 2024
6c682f9
Fix setting delegate in QNN backend
ciaranbor Jul 26, 2024
02efdc0
Add QNN context binary test
ciaranbor Jul 26, 2024
04751dd
Move QNN tensorOps function implementations to source file
ciaranbor Jul 27, 2024
1d43380
Move GPU and NPU configuration instructions to HACKING.md
ciaranbor Jul 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add QNN context binary test
ciaranbor committed Jul 26, 2024
commit 02efdc0043bb6bf3760bbcdd24fd94994f0947aa
Binary file added models/qnn/mobilenet_v3_small.bin
Binary file not shown.
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -49,6 +49,7 @@ endif()
if(edgerunner_ENABLE_NPU)
list(APPEND TEST_SOURCES source/tflite_npu_test.cpp
source/qnn_shared_library_npu_test.cpp
source/qnn_context_binary_npu_test.cpp
)
endif()

79 changes: 79 additions & 0 deletions test/source/qnn_context_binary_npu_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#include <algorithm>
#include <cstddef>
#include <iterator>
#include <string>
#include <vector>

#include <catch2/benchmark/catch_benchmark.hpp>
#include <catch2/catch_message.hpp>
#include <catch2/catch_test_macros.hpp>

#include "edgerunner/edgerunner.hpp"
#include "edgerunner/model.hpp"
#include "edgerunner/tensor.hpp"
#include "utils.hpp"

TEST_CASE("QNN context binary NPU runtime", "[qnn][context][npu]") {
const std::string modelPath = "models/qnn/mobilenet_v3_small.bin";

auto model = edge::createModel(modelPath);
REQUIRE(model != nullptr);
REQUIRE(std::string {"mobilenet_v3_small"} == model->name());
REQUIRE(model->getDelegate() == edge::DELEGATE::CPU);

/* ensure CPU and NPU inference have the same inputs */
auto cpuInputData = model->getInput(0)->getTensorAs<float>();
std::fill(cpuInputData.begin(), cpuInputData.end(), 0);

auto executionStatus = model->execute();
CHECK(executionStatus == edge::STATUS::SUCCESS);

const auto cpuOutput = model->getOutput(0)->getTensorAs<float>();

/* applying a new delegate releases memory, so need to copy CPU output to
* compare later */
std::vector<float> cpuResult;
cpuResult.reserve(cpuOutput.size());
std::copy(
cpuOutput.cbegin(), cpuOutput.cend(), std::back_inserter(cpuResult));

const auto delegateStatus = model->applyDelegate(edge::DELEGATE::NPU);
REQUIRE(delegateStatus == edge::STATUS::SUCCESS);
REQUIRE(model->getDelegate() == edge::DELEGATE::NPU);

const auto numInputs = model->getNumInputs();
REQUIRE(numInputs == 1);

const auto numOutputs = model->getNumOutputs();
REQUIRE(numOutputs == 1);

auto input = model->getInput(0);
REQUIRE(input->getName() == "image_tensor");
REQUIRE(input->getDimensions() == std::vector<size_t> {1, 224, 224, 3});
REQUIRE(input->getType() == edge::TensorType::FLOAT32);

auto inputData = input->getTensorAs<float>();
REQUIRE(inputData.size() == input->getSize());

/* ensure CPU and NPU inference have the same inputs */
std::fill(inputData.begin(), inputData.end(), 0);

executionStatus = model->execute();
REQUIRE(executionStatus == edge::STATUS::SUCCESS);

BENCHMARK("execution") {
return model->execute();
};

auto output = model->getOutput(0);
REQUIRE(output->getName() == "class_logits");
REQUIRE(output->getDimensions() == std::vector<size_t> {1, 1000});
REQUIRE(output->getType() == edge::TensorType::FLOAT32);

auto outputData = output->getTensorAs<float>();
REQUIRE(outputData.size() == output->getSize());

const auto mse = meanSquaredError(cpuResult, outputData);
CAPTURE(mse);
REQUIRE(mse < MseThreshold);
}
4 changes: 2 additions & 2 deletions test/source/qnn_shared_library_npu_test.cpp
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@
#include "edgerunner/tensor.hpp"
#include "utils.hpp"

TEST_CASE("QNN NPU runtime", "[qnn][npu]") {
TEST_CASE("QNN shared library NPU runtime", "[qnn][shared][npu]") {
const std::string modelPath = "models/qnn/mobilenet_v3_small.so";

auto model = edge::createModel(modelPath);
@@ -66,7 +66,7 @@ TEST_CASE("QNN NPU runtime", "[qnn][npu]") {
};

auto output = model->getOutput(0);
REQUIRE(output->getName() == "output_0");
REQUIRE(output->getName() == "class_logits");
REQUIRE(output->getDimensions() == std::vector<size_t> {1, 1000});
REQUIRE(output->getType() == edge::TensorType::FLOAT32);