Skip to content

Commit

Permalink
MNN:Sync: Sync Internal 2.9.2
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaying committed Jul 4, 2024
1 parent a980dba commit 4c16542
Show file tree
Hide file tree
Showing 108 changed files with 33,412 additions and 4,848 deletions.
13 changes: 8 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -636,11 +636,6 @@ IF(MNN_BUILD_CODEGEN)
include(${CMAKE_CURRENT_LIST_DIR}/codegen/CMakeLists.txt)
ENDIF()

IF(MNN_BUILD_LLM)
# add_definitions(-DMNN_BUILD_LLM)
include(${CMAKE_CURRENT_LIST_DIR}/transformers/llm/engine/CMakeLists.txt)
ENDIF()

# NPU
IF(MNN_NPU)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/hiai/)
Expand Down Expand Up @@ -735,6 +730,14 @@ IF(MNN_BUILD_OPENCV AND NOT MNN_SEP_BUILD)
target_sources(MNN PRIVATE $<TARGET_OBJECTS:MNNOpenCV>)
ENDIF()

IF(MNN_BUILD_LLM)
# add_definitions(-DMNN_BUILD_LLM)
include(${CMAKE_CURRENT_LIST_DIR}/transformers/llm/engine/CMakeLists.txt)
IF(NOT MNN_SEP_BUILD)
target_sources(MNN PRIVATE $<TARGET_OBJECTS:llm>)
ENDIF()
ENDIF()

if(CMAKE_SYSTEM_NAME MATCHES "^Linux")
# Using -pthread, needed by thread-safe implemention of glibc, is better than only using -lpthread
# https://stackoverflow.com/questions/23250863/difference-between-pthread-and-lpthread-while-compiling
Expand Down
20 changes: 0 additions & 20 deletions docs/Makefile

This file was deleted.

2 changes: 1 addition & 1 deletion docs/inference/module.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ MNN::TensorCallBackWithInfo callBack = [&](const std::vector<MNN::Tensor*>& nten
return true;
};

// 设置回调函数,需要是创建该 Module 时的 executor ,非多实例情况下用全局 executor 即可:
// 设置回调函数,需要时创建该 Module 时的 executor ,非多实例情况下用全局 executor 即可:
Express::Executor::getGlobalExecutor()->setCallBack(std::move(beforeCallBack), std::move(callBack));

// forward would trigger callback
Expand Down
2 changes: 1 addition & 1 deletion docs/tools/convert.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ Usage:
--weightQuantAsymmetric 与weightQuantBits结合使用,决定是否用非对称量化,默认为`true`

--compressionParamsFile arg
使用MNN模型压缩工具箱生成的模型压缩信息文件
使用MNN模型压缩工具箱生成的模型压缩信息文件或根据用户提供的量化参数来生成对应的量化模型,量化参数文件可参考tools/converter/user_provide_quant_params.json

--saveStaticModel 固定输入形状,保存静态模型, default: false

Expand Down
44 changes: 43 additions & 1 deletion docs/transformers/diffusion.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,45 @@
# 扩散模型

TODO
## 模型支持与下载

[Download-runwayml/stable-diffusion-v1-5]:
https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main
[Download-IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1]:
https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1/tree/main

## 模型转换
### 将Huggingface的Stable Diffusion模型 转为onnx模型
python export/onnx_export.py \
--model_path hf_sd_load_path \
--output_path onnx_save_path

### 将onnx模型转为mnn模型
新建diffusion mnn模型文件夹,将转好的mnn文件放在该文件夹下。
./MNNConvert -f ONNX --modelFile onnx_save_path/text_encoder/model.onnx --MNNModel mnn_save_path/text_encoder.mnn --weightQuantBits 8 --bizCode biz
./MNNConvert -f ONNX --modelFile onnx_save_path/unet/model.onnx --MNNModel mnn_save_path/unet.mnn --transformerFuse --weightQuantBits 8 --bizCode biz
./MNNConvert -f ONNX --modelFile onnx_save_path/vae_decoder/model.onnx --keepInputFormat --MNNModel mnn_save_path/vae_decoder.mnn --weightQuantBits 8 --bizCode biz

## 编译Diffusion Demo
### Linux/MAC/Windows上
cmake .. -DMNN_BUILD_DIFFUSION=ON -DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON -DMNN_OPENCL=ON -DMNN_SEP_BUILD=OFF -DMNN_SUPPORT_TRANSFORMER_FUSE=ON

### Android上
cd project/android/build
../build_64.sh -DMNN_BUILD_DIFFUSION=ON -DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON -DMNN_OPENCL=ON -DMNN_SEP_BUILD=OFF -DMNN_SUPPORT_TRANSFORMER_FUSE=ON

## 运行Diffusion Demo
./diffusion_demo <resource_path> <model_type> <output_image_name> <input_text>
其中,resource_path 就是mnn模型文件的路径,除了mnn文件,还需要
(1)将MNN目录transformers/diffusion/scheduler/alphas.txt文件拷贝到该文件夹下。
(2)针对stable-diffusion-v1-5模型需要将huggingfacetokenizer目录下merges.txt和vocab.json拷贝到该文件夹中。针对Taiyi-Stable-Diffusion模型需要将huggingfacetokenizer目录下vocab.txt拷贝到该文件夹中。

model_type是目前支持的两种diffusion模型的类别。如果是stable-diffusion-v1-5模型设为0,如果是Taiyi-Stable-Diffusion模型设为1。

output_image_name是生成图片的名字,默认图片位置在当前运行目录下。

input_text是文生图的prompt,如果是stable-diffusion-v1-5模型建议英文prompt,如果是Taiyi-Stable-Diffusion建议中文prompt。

运行指令例如:
./diffusion_demo mnn_save_path 0 demo.jpg "a cute cat"
./diffusion_demo mnn_save_path 1 demo.jpg "一只可爱的猫"

2 changes: 1 addition & 1 deletion include/MNN/MNNDefine.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,6 @@ MNN_ERROR("Check failed: %s ==> %s\n", #success, #log); \
#define STR(x) STR_IMP(x)
#define MNN_VERSION_MAJOR 2
#define MNN_VERSION_MINOR 9
#define MNN_VERSION_PATCH 1
#define MNN_VERSION_PATCH 2
#define MNN_VERSION STR(MNN_VERSION_MAJOR) "." STR(MNN_VERSION_MINOR) "." STR(MNN_VERSION_PATCH)
#endif /* MNNDefine_h */
1 change: 1 addition & 0 deletions project/android/updateTest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ DIR=MNN
make -j16
adb push ./libllm.so /data/local/tmp/MNN/libllm.so
adb push ./llm_demo /data/local/tmp/MNN/llm_demo
adb push ./diffusion_demo /data/local/tmp/MNN/diffusion_demo
adb push ./libMNN.so /data/local/tmp/$DIR/libMNN.so
adb push ./libMNN_CL.so /data/local/tmp/$DIR/libMNN_CL.so
adb push ./libMNN_Vulkan.so /data/local/tmp/$DIR/libMNN_Vulkan.so
Expand Down
19 changes: 19 additions & 0 deletions pymnn/examples/MNNLlm/llm_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import MNN.llm as llm
import sys

if len(sys.argv) < 2:
print('usage: python llm_example.py <path_to_model_config>')
exit(1)

config_path = sys.argv[1]
# create model
qwen = llm.create(config_path)
# load model
qwen.load()

# response stream
out = qwen.response('你好', True)
print(out)

out_ids = qwen.generate([151644, 872, 198, 108386, 151645, 198, 151644, 77091])
print(out_ids)
76 changes: 76 additions & 0 deletions pymnn/pip_package/MNN/llm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import _mnncengine.llm as _F

class LLM(_F.LLM):
def load(self, model_dir):
'''
load model from model_dir
Parameters
----------
model_dir : model path (split) or model name (single)
Returns
-------
None
Example:
-------
>>> llm.load('../qwen-1.8b-in4/conig.json')
'''
super.load(model_dir)

def generate(self, input_ids):
'''
generate by input_ids
Parameters
----------
input_ids : input token ids, list of int
Returns
-------
output_ids : output token ids, list of int
Example:
-------
>>> input_ids = [151644, 872, 198, 108386, 151645, 198, 151644, 77091]
>>> output_ids = qwen.generate(input_ids)
'''
return super.generate(input_ids)

def response(self, prompt, stream = False):
'''
response by prompt
Parameters
----------
prompt : input prompt
stream : generate string stream, default is False
Returns
-------
res : output string
Example:
-------
>>> res = qwen.response('Hello', True)
'''
return super.response(prompt, stream)

def create(config_path):
'''
create LLM instance by `config.json`
Parameters
----------
config_path : config path or model path
Returns
-------
llm : LLM instance
Example:
-------
>>> qwen = llm.create('./qwen-1.8b-int4/config.json')
'''
return _F.create(config_path)
12 changes: 12 additions & 0 deletions pymnn/pip_package/build_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
USE_RENDER = False
USE_SSE = True
USE_OPENMP = False
USE_LLM = False
USE_ARM82 = False

if len(sys.argv) > 1 and sys.argv[1] != None:
if "trt" in sys.argv[1]:
Expand All @@ -51,6 +53,10 @@
USE_SSE = False
if "openmp" in sys.argv[1]:
USE_OPENMP = True
if "llm" in sys.argv[1]:
USE_LLM = True
if "arm82" in sys.argv[1]:
USE_ARM82 = True

print ("USE_INTERNAL:", USE_INTERNAL)
print ("USE_TRT:", USE_TRT)
Expand All @@ -62,6 +68,8 @@
print ("USE_RENDER:", USE_RENDER)
print ("USE_SSE:", USE_SSE)
print ("USE_OPENMP:", USE_OPENMP)
print ("USE_LLM:", USE_LLM)
print ("USE_ARM82:", USE_ARM82)

def build_deps():
""" build depency """
Expand All @@ -79,6 +87,10 @@ def build_deps():
extra_opts += ' -DMNN_VULKAN=ON -DMNN_VULKAN_IMAGE=OFF'
if USE_OPENCL:
extra_opts += ' -DMNN_OPENCL=ON'
if USE_LLM:
extra_opts += ' -DMNN_BUILD_LLM=ON -DMNN_LOW_MEMORY=ON -DMNN_SUPPORT_TRANSFORMER_FUSE=ON'
if USE_ARM82:
extra_opts += ' -DMNN_ARM82=ON'
extra_opts += ' -DMNN_USE_THREAD_POOL=OFF -DMNN_OPENMP=ON' if USE_OPENMP else ' -DMNN_USE_THREAD_POOL=ON -DMNN_OPENMP=OFF'

if IS_WINDOWS:
Expand Down
9 changes: 9 additions & 0 deletions pymnn/pip_package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ def configure_extension_build():
engine_include_dirs += [os.path.join(root_dir, "3rd_party", "rapidjson")]
# cv include
engine_include_dirs += [os.path.join(root_dir, "tools", "cv", "include")]
# llm include
engine_include_dirs += [os.path.join(root_dir, "transformers", "llm", "engine", "include")]
engine_include_dirs += [os.path.join(root_dir, "3rd_party")]
engine_include_dirs += [np.get_include()]

lib_files = []
Expand Down Expand Up @@ -247,6 +250,12 @@ def configure_extension_build():
# add libTorch dependency
torch_lib = None
cmakecache = os.path.join(root_dir, BUILD_DIR, 'CMakeCache.txt')
# llm
for line in open(cmakecache, 'rt').readlines():
if 'MNN_BUILD_LLM' in line:
if 'ON' in line:
extra_compile_args += ['-DPYMNN_LLM_API']
# torch lib
for line in open(cmakecache, 'rt').readlines():
if 'TORCH_LIBRARY' in line:
torch_lib = os.path.dirname(line[line.find('=')+1:])
Expand Down
22 changes: 20 additions & 2 deletions pymnn/src/MNN.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ using RegularizationMethod = ParameterOptimizer::RegularizationMethod;
#endif
#endif

#ifdef PYMNN_LLM_API
#include "llm.h"
#endif

#ifdef PYMNN_INTERNAL_SERVING
#include <MNN/AutoTime.hpp>
#include "internal/monitor_service.h"
Expand Down Expand Up @@ -1610,7 +1614,7 @@ static PyObject* PyMNNTensor_fromNumpy(PyMNNTensor *self, PyObject *args) {
return NULL;
}
DType dtype = htype2dtype(self->tensor->getType());
int npy_type = PyArray_TYPE(data);
int npy_type = PyArray_TYPE((const PyArrayObject*)data);
int itemsize = getitemsize(dtype, npy_type);
PyArrayObject *data_cont= PyArray_GETCONTIGUOUS((PyArrayObject*)data);
auto tmpBuffer = PyArray_DATA(data_cont);
Expand Down Expand Up @@ -1946,7 +1950,7 @@ static PyObject* PyMNNCVImageProcess_convert(PyMNNCVImageProcess *self, PyObject
#ifdef PYMNN_NUMPY_USABLE
else if(gNumpyValid && PyArray_Check(source)) {
// Array Data
int npy_type = PyArray_TYPE(source);
int npy_type = PyArray_TYPE((const PyArrayObject*)source);
if(npy_type != NPY_UINT8) {
PyErr_SetString(PyExc_Exception,
"PyMNNCVImageProcess_convert: only numpy.uint8 is supported for numpy");
Expand Down Expand Up @@ -2710,6 +2714,20 @@ PyMODINIT_FUNC MOD_INIT_FUNC(void) {
}
#endif
#endif
#ifdef PYMNN_LLM_API
// llm submodule
auto llm_module = def_submodule(m, "llm");
if (PyType_Ready(&PyMNNLLM) < 0) {
PyErr_SetString(PyExc_Exception, "initMNN.llm: PyType_Ready PyMNNLLM failed");
ERROR_RETURN
}
PyModule_AddObject(llm_module, "LLM", (PyObject *)PyType_FindTLSType(&PyMNNLLM));
// add methods of llm
constexpr int llm_method_num = sizeof(PyMNNLLM_static_methods) / sizeof(PyMethodDef);
for (int i = 0; i < llm_method_num; i++) {
def_method(llm_module, &PyMNNLLM_static_methods[i]);
}
#endif

#if PY_MAJOR_VERSION >= 3
return m;
Expand Down
Loading

0 comments on commit 4c16542

Please sign in to comment.