Merge branch 'develop' into add_batch_size_for_uie

mei-727 · Dec 28, 2022 · a906ddd · a906ddd
2 parents e36ee2e + 866d044
commit a906ddd
Show file tree

Hide file tree

Showing 253 changed files with 6,560 additions and 2,579 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -66,7 +66,7 @@ option(ENABLE_TEXT "Whether to enable text models usage." OFF)
 option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF)
 option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF)
 option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
-option(WITH_XPU "Whether to compile for KunlunXin XPU deploy." OFF)
+option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF)
 option(WITH_TESTING "Whether to compile with unittest." OFF)
 ############################# Options for Android cross compiling #########################
 option(WITH_OPENCV_STATIC "Use OpenCV static lib for Android." OFF)
@@ -148,12 +148,12 @@ if (WITH_ASCEND)
   include(${PROJECT_SOURCE_DIR}/cmake/ascend.cmake)
 endif()
 
-if (WITH_XPU)
+if (WITH_KUNLUNXIN)
   if(NOT ENABLE_LITE_BACKEND)
       set(ENABLE_LITE_BACKEND ON)
   endif()
   if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
-    message(FATAL_ERROR "XPU is only supported on Linux x64 platform")
+    message(FATAL_ERROR "KunlunXin XPU is only supported on Linux x64 platform")
   endif()
   if(NOT PADDLELITE_URL)
     set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz")

diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
@@ -27,7 +27,7 @@ set(OPENCV_DIRECTORY "@OPENCV_DIRECTORY@")
 set(ORT_DIRECTORY "@ORT_DIRECTORY@")
 set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
 set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
-set(WITH_XPU @WITH_XPU@)
+set(WITH_KUNLUNXIN @WITH_KUNLUNXIN@)
 
 set(FASTDEPLOY_LIBS "")
 set(FASTDEPLOY_INCS "")
@@ -246,7 +246,7 @@ if(ENABLE_PADDLE_FRONTEND)
   list(APPEND FASTDEPLOY_LIBS ${PADDLE2ONNX_LIB})
 endif()
 
-if(WITH_XPU)
+if(WITH_KUNLUNXIN)
   list(APPEND FASTDEPLOY_LIBS -lpthread -lrt -ldl)
 endif()
 

diff --git a/cmake/flycv.cmake b/cmake/flycv.cmake
@@ -84,6 +84,8 @@ else()
   else()
     if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
       set(FLYCV_FILE "flycv-linux-aarch64-${FLYCV_VERSION}.tgz")
+    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
+      set(FLYCV_FILE "flycv-linux-armhf-${FLYCV_VERSION}.tgz")
     else()
       set(FLYCV_FILE "flycv-linux-x64-${FLYCV_VERSION}.tgz")
     endif()

diff --git a/cmake/rknpu2.cmake b/cmake/rknpu2.cmake
@@ -10,12 +10,6 @@ download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE}
 # set path
 set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime)
 
-if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-else ()
-    message(FATAL_ERROR "[rknpu2.cmake] Only support build rknpu2 in Linux")
-endif ()
-
-
 if (EXISTS ${RKNPU_RUNTIME_PATH})
     set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so)
     include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include)

diff --git a/cmake/summary.cmake b/cmake/summary.cmake
@@ -39,7 +39,7 @@ function(fastdeploy_summary)
   message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
   message(STATUS "  WITH_ASCEND               : ${WITH_ASCEND}")
   message(STATUS "  WITH_TIMVX                : ${WITH_TIMVX}")
-  message(STATUS "  WITH_XPU                  : ${WITH_XPU}")
+  message(STATUS "  WITH_KUNLUNXIN            : ${WITH_KUNLUNXIN}")
   if(ENABLE_ORT_BACKEND)
     message(STATUS "  ONNXRuntime version       : ${ONNXRUNTIME_VERSION}")
   endif()

diff --git a/docs/README.md b/docs/README.md
@@ -8,7 +8,7 @@
 - [Build and Install FastDeploy Library on GPU Platform](en/build_and_install/gpu.md)
 - [Build and Install FastDeploy Library on CPU Platform](en/build_and_install/cpu.md)
 - [Build and Install FastDeploy Library on IPU Platform](en/build_and_install/ipu.md)
-- [Build and Install FastDeploy Library on KunlunXin XPU Platform](en/build_and_install/xpu.md)
+- [Build and Install FastDeploy Library on KunlunXin XPU Platform](en/build_and_install/kunlunxin.md)
 - [Build and Install on RV1126 Platform](en/build_and_install/rv1126.md)
 - [Build and Install on RK3588 and RK356X Platform](en/build_and_install/rknpu2.md)
 - [Build and Install on A311D Platform](en/build_and_install/a311d.md)

diff --git a/docs/README_CN.md b/docs/README_CN.md
@@ -8,7 +8,7 @@
 - [GPU部署环境编译安装](cn/build_and_install/gpu.md)
 - [CPU部署环境编译安装](cn/build_and_install/cpu.md)
 - [IPU部署环境编译安装](cn/build_and_install/ipu.md)
-- [昆仑芯XPU部署环境编译安装](cn/build_and_install/xpu.md)
+- [昆仑芯XPU部署环境编译安装](cn/build_and_install/kunlunxin.md)
 - [瑞芯微RV1126部署环境编译安装](cn/build_and_install/rv1126.md)
 - [瑞芯微RK3588部署环境编译安装](cn/build_and_install/rknpu2.md)
 - [晶晨A311D部署环境编译安装](cn/build_and_install/a311d.md)

diff --git a/docs/README_EN.md b/docs/README_EN.md
@@ -8,7 +8,7 @@
 - [Build and Install FastDeploy Library on GPU Platform](en/build_and_install/gpu.md)
 - [Build and Install FastDeploy Library on CPU Platform](en/build_and_install/cpu.md)
 - [Build and Install FastDeploy Library on IPU Platform](en/build_and_install/ipu.md)
-- [Build and Install FastDeploy Library on KunlunXin XPU Platform](en/build_and_install/xpu.md)
+- [Build and Install FastDeploy Library on KunlunXin XPU Platform](en/build_and_install/kunlunxin.md)
 - [Build and Install on RV1126 Platform](en/build_and_install/rv1126.md)
 - [Build and Install on RK3588 Platform](en/build_and_install/rknpu2.md)
 - [Build and Install on A311D Platform](en/build_and_install/a311d.md)

diff --git a/docs/cn/build_and_install/README.md b/docs/cn/build_and_install/README.md
@@ -14,7 +14,7 @@
 - [瑞芯微RV1126部署环境](rv1126.md)
 - [瑞芯微RK3588部署环境](rknpu2.md)
 - [晶晨A311D部署环境](a311d.md)
-- [昆仑芯XPU部署环境](xpu.md)
+- [昆仑芯XPU部署环境](kunlunxin.md)
 - [华为昇腾部署环境](huawei_ascend.md)
 
 
@@ -27,7 +27,7 @@
 | ENABLE_LITE_BACKEND     | 默认OFF，是否编译集成Paddle Lite后端(编译Android库时需要设置为ON)                          |
 | ENABLE_RKNPU2_BACKEND   | 默认OFF，是否编译集成RKNPU2后端(RK3588/RK3568/RK3566上推荐打开)                           |
 | WITH_ASCEND             | 默认OFF，当在华为昇腾NPU上部署时, 需要设置为ON                                              |
-| WITH_XPU                | 默认OFF，当在昆仑芯XPU上部署时，需设置为ON                                                |
+| WITH_KUNLUNXIN          | 默认OFF，当在昆仑芯XPU上部署时，需设置为ON                                                |
 | WITH_TIMVX              | 默认OFF，需要在RV1126/RV1109/A311D上部署时，需设置为ON                                   |
 | ENABLE_TRT_BACKEND      | 默认OFF，是否编译集成TensorRT后端(GPU上推荐打开)                                          |
 | ENABLE_OPENVINO_BACKEND | 默认OFF，是否编译集成OpenVINO后端(CPU上推荐打开)                                          |

diff --git a/docs/cn/build_and_install/xpu.md → docs/cn/build_and_install/kunlunxin.md b/docs/cn/build_and_install/xpu.md → docs/cn/build_and_install/kunlunxin.md
@@ -1,4 +1,4 @@
-[English](../../en/build_and_install/xpu.md) | 简体中文
+[English](../../en/build_and_install/kunlunxin.md) | 简体中文
 
 # 昆仑芯 XPU 部署环境编译安装
 
@@ -10,7 +10,7 @@ FastDeploy 基于 Paddle Lite 后端支持在昆仑芯 XPU 上进行部署推理
 相关编译选项说明如下：  
 |编译选项|默认值|说明|备注|  
 |:---|:---|:---|:---|  
-| WITH_XPU| OFF | 需要在XPU上部署时需要设置为ON | - |
+| WITH_KUNLUNXIN| OFF | 需要在昆仑芯XPU上部署时需要设置为ON | - |
 | ENABLE_ORT_BACKEND | OFF | 是否编译集成ONNX Runtime后端 | - |
 | ENABLE_PADDLE_BACKEND | OFF | 是否编译集成Paddle Inference后端 | - |
 | ENABLE_OPENVINO_BACKEND | OFF | 是否编译集成OpenVINO后端 | - |
@@ -41,11 +41,11 @@ cd FastDeploy
 mkdir build && cd build
 
 # CMake configuration with KunlunXin xpu toolchain
-cmake -DWITH_XPU=ON  \
+cmake -DWITH_KUNLUNXIN=ON  \
       -DWITH_GPU=OFF  \ # 不编译 GPU
       -DENABLE_ORT_BACKEND=ON  \ # 可选择开启 ORT 后端
       -DENABLE_PADDLE_BACKEND=ON  \ # 可选择开启 Paddle 后端
-      -DCMAKE_INSTALL_PREFIX=fastdeploy-xpu \
+      -DCMAKE_INSTALL_PREFIX=fastdeploy-kunlunxin \
       -DENABLE_VISION=ON \ # 是否编译集成视觉模型的部署模块，可选择开启
       -DOPENCV_DIRECTORY=/usr/lib/x86_64-linux-gnu/cmake/opencv4 \
       ..
@@ -54,14 +54,14 @@ cmake -DWITH_XPU=ON  \
 make -j8
 make install
 ```  
-编译完成之后，会生成 fastdeploy-xpu 目录，表示基于 Paddle Lite 的 FastDeploy 库编译完成。
+编译完成之后，会生成 fastdeploy-kunlunxin 目录，表示基于 Paddle Lite 的 FastDeploy 库编译完成。
 
 ## Python 编译
 编译命令如下：
 ```bash
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy/python
-export WITH_XPU=ON
+export WITH_KUNLUNXIN=ON
 export WITH_GPU=OFF
 export ENABLE_ORT_BACKEND=ON
 export ENABLE_PADDLE_BACKEND=ON

diff --git a/docs/cn/build_and_install/rv1126.md b/docs/cn/build_and_install/rv1126.md
@@ -61,6 +61,7 @@ mkdir build && cd build
 cmake -DCMAKE_TOOLCHAIN_FILE=./../cmake/toolchain.cmake \
       -DWITH_TIMVX=ON  \
       -DTARGET_ABI=armhf \
+      -DENABLE_FLYCV=ON \ # 是否开启 FlyCV 优化前后处理，可以选择开启
       -DCMAKE_INSTALL_PREFIX=fastdeploy-timvx \
       -DENABLE_VISION=ON \ # 是否编译集成视觉模型的部署模块，可选择开启
       -Wno-dev ..

diff --git a/docs/cn/faq/rknpu2/export.md b/docs/cn/faq/rknpu2/export.md
@@ -4,7 +4,10 @@
 
 ## 简介
 
-Fastdeploy已经简单的集成了onnx->rknn的转换过程。本教程使用tools/export.py文件导出模型，在导出之前需要编写yaml配置文件。
+Fastdeploy已经简单的集成了onnx->rknn的转换过程。
+本教程使用tools/rknpu2/export.py文件导出模型，在导出之前需要编写yaml配置文件。
+
+## 环境要求
 在进行转换前请根据[rknn_toolkit2安装文档](./install_rknn_toolkit2.md)检查环境是否已经安装成功。
 
 
@@ -14,29 +17,72 @@ Fastdeploy已经简单的集成了onnx->rknn的转换过程。本教程使用too
 |-----------------|------------|--------------------|
 | verbose         | 是，默认值为True | 是否在屏幕上输出转换模型时的具体信息 |
 | config_path     | 否          | 配置文件路径             |
+| target_platform | 否          | cpu型号              |
 
 ## config 配置文件介绍
 
 ### config yaml文件模版
 
 ```yaml
-model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx
-output_folder: ./
-target_platform: RK3588
-normalize:
-  mean: [[0.5,0.5,0.5]]
-  std: [[0.5,0.5,0.5]]
-outputs: None
+mean:
+  -
+    - 128.5
+    - 128.5
+    - 128.5
+std:
+  -
+    - 128.5
+    - 128.5
+    - 128.5
+model_path: "./scrfd_500m_bnkps_shape640x640.onnx"
+outputs_nodes:
+do_quantization: True
+dataset: "./datasets.txt"
+output_folder: "./"
 ```
 
 ### config 配置参数介绍
-* model_path: 模型储存路径
-* output_folder: 模型储存文件夹名字
-* target_platform: 模型跑在哪一个设备上，只能为RK3588或RK3568
-* normalize: 配置在NPU上的normalize操作，有std和mean两个参数
-  * std: 如果在外部做normalize操作，请配置为[1/255,1/255,1/255]
-  * mean: 如果在外部做normalize操作，请配置为[0,0,0]
-* outputs: 输出节点列表，如果使用默认输出节点，请配置为None
+#### model_path
+代表需要转换为RKNN的ONNX格式的模型路径
+```yaml
+model_path: "./scrfd_500m_bnkps_shape640x640.onnx"
+```
+#### output_folder
+代表最后储存RKNN模型文件的文件夹路径
+```yaml
+output_folder: "./"
+```
+
+#### std 和 mean
+如果需要在NPU上进行normalize操作需要配置此参数，并且请自行将参数乘以255，例如你的normalize中mean参数为[0.5,0.5,0.5]时，
+配置文件中的mean应该配置为[128.5,128.5,128.5]。 请自行将[128.5,128.5,128.5]换成yaml格式,如下:
+```yaml
+mean:
+  -
+    - 128.5
+    - 128.5
+    - 128.5
+std:
+  -
+    - 128.5
+    - 128.5
+    - 128.5
+```
+当然如果在外部进行normalize和permute操作，则无需配置这两个参数。
+
+#### outputs_nodes
+输出节点的名字。当整个模型导出时，无语配置改参数。
+```yaml
+outputs_nodes:
+```
+
+#### do_quantization 和 dataset
+do_quantization代表是否进行静态量化。dataset表示进行静态量化时的图片数据集目录。
+这两个参数配套使用，当do_quantization生效时，dataset才生效。
+```yaml
+do_quantization: True
+dataset: "./datasets.txt"
+```
 
 ## 如何转换模型
 根目录下执行以下代码
@@ -47,4 +93,4 @@ python tools/export.py  --config_path=./config.yaml
 
 ## 模型导出要注意的事项
 
-* 请不要导出带softmax和argmax的模型，这两个算子存在bug，请在外部进行运算
+* 不建议导出softmax以及argmax算子
diff --git a/docs/en/build_and_install/README.md b/docs/en/build_and_install/README.md
@@ -15,7 +15,7 @@ English | [中文](../../cn/build_and_install/README.md)
 - [Build and Install on RV1126 Platform](rv1126.md)
 - [Build and Install on RK3588 Platform](rknpu2.md)
 - [Build and Install on A311D Platform](a311d.md)
-- [Build and Install on KunlunXin XPU Platform](xpu.md)
+- [Build and Install on KunlunXin XPU Platform](kunlunxin.md)
 
 
 ## Build options
@@ -29,7 +29,7 @@ English | [中文](../../cn/build_and_install/README.md)
 | ENABLE_VISION | Default OFF，whether to enable vision models deployment module |
 | ENABLE_TEXT | Default OFF，whether to enable text models deployment module |
 | WITH_GPU | Default OFF, if build on GPU, this needs to be ON |
-| WITH_XPU | Default OFF，if deploy on KunlunXin XPU，this needs to be ON |
+| WITH_KUNLUNXIN | Default OFF，if deploy on KunlunXin XPU，this needs to be ON |
 | WITH_TIMVX | Default OFF，if deploy on RV1126/RV1109/A311D，this needs to be ON |
 | WITH_ASCEND | Default OFF，if deploy on Huawei Ascend，this needs to be ON |
 | CUDA_DIRECTORY | Default /usr/local/cuda, if build on GPU, this defines the path of CUDA(>=11.2) |

diff --git a/docs/en/build_and_install/xpu.md → docs/en/build_and_install/kunlunxin.md b/docs/en/build_and_install/xpu.md → docs/en/build_and_install/kunlunxin.md
@@ -1,4 +1,4 @@
-English | [中文](../../cn/build_and_install/xpu.md)
+English | [中文](../../cn/build_and_install/kunlunxin.md)
 
 # How to Build KunlunXin XPU Deployment Environment
 
@@ -10,7 +10,7 @@ The relevant compilation options are described as follows:
 |Compile Options|Default Values|Description|Remarks|  
 |:---|:---|:---|:---|  
 | ENABLE_LITE_BACKEND | OFF | It needs to be set to ON when compiling the RK library| - |  
-| WITH_XPU | OFF | It needs to be set to ON when compiling the KunlunXin XPU library| - |
+| WITH_KUNLUNXIN | OFF | It needs to be set to ON when compiling the KunlunXin XPU library| - |
 | ENABLE_ORT_BACKEND | OFF | whether to intergrate ONNX Runtime backend | - |
 | ENABLE_PADDLE_BACKEND | OFF | whether to intergrate Paddle Inference backend | - |
 | ENABLE_OPENVINO_BACKEND | OFF | whether to intergrate OpenVINO backend | - |
@@ -44,11 +44,11 @@ cd FastDeploy
 mkdir build && cd build
 
 # CMake configuration with KunlunXin xpu toolchain
-cmake -DWITH_XPU=ON  \
+cmake -DWITH_KUNLUNXIN=ON  \
       -DWITH_GPU=OFF  \
       -DENABLE_ORT_BACKEND=ON  \
       -DENABLE_PADDLE_BACKEND=ON  \
-      -DCMAKE_INSTALL_PREFIX=fastdeploy-xpu \
+      -DCMAKE_INSTALL_PREFIX=fastdeploy-kunlunxin \
       -DENABLE_VISION=ON \
       -DOPENCV_DIRECTORY=/usr/lib/x86_64-linux-gnu/cmake/opencv4 \
       ..
@@ -57,14 +57,14 @@ cmake -DWITH_XPU=ON  \
 make -j8
 make install
 ```  
-After the compilation is complete, the fastdeploy-xpu directory will be generated, indicating that the Padddle Lite based FastDeploy library has been compiled.
+After the compilation is complete, the fastdeploy-kunlunxin directory will be generated, indicating that the Padddle Lite based FastDeploy library has been compiled.
 
 ## Python compile
 The compilation command is as follows:
 ```bash
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy/python
-export WITH_XPU=ON
+export WITH_KUNLUNXIN=ON
 export WITH_GPU=OFF
 export ENABLE_ORT_BACKEND=ON
 export ENABLE_PADDLE_BACKEND=ON

diff --git a/docs/en/build_and_install/rv1126.md b/docs/en/build_and_install/rv1126.md
@@ -60,6 +60,7 @@ mkdir build && cd build
 cmake -DCMAKE_TOOLCHAIN_FILE=./../cmake/toolchain.cmake \
       -DWITH_TIMVX=ON  \
       -DTARGET_ABI=armhf \
+      -DENABLE_FLYCV=ON \ # Whether to enable FlyCV optimization
       -DCMAKE_INSTALL_PREFIX=fastdeploy-timvx \
       -DENABLE_VISION=ON \ # Whether to compile the vision module
       -Wno-dev ..

diff --git a/examples/multimodal/stable_diffusion/README.md b/examples/multimodal/stable_diffusion/README.md
@@ -41,7 +41,7 @@ python infer.py --model_dir stable-diffusion-v1-4/ --scheduler "pndm" --backend
 python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle
 
 # 在昆仑芯XPU上推理
-python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle-xpu
+python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle-kunlunxin
 ```
 
 #### 参数说明
@@ -52,7 +52,7 @@ python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral"
 |----------|--------------|
 | --model_dir | 导出后模型的目录。 |
 | --model_format | 模型格式。默认为`'paddle'`，可选列表：`['paddle', 'onnx']`。 |
-| --backend | 推理引擎后端。默认为`paddle`，可选列表：`['onnx_runtime', 'paddle', 'paddle-xpu']`，当模型格式为`onnx`时，可选列表为`['onnx_runtime']`。 |
+| --backend | 推理引擎后端。默认为`paddle`，可选列表：`['onnx_runtime', 'paddle', 'paddle-kunlunxin']`，当模型格式为`onnx`时，可选列表为`['onnx_runtime']`。 |
 | --scheduler | StableDiffusion 模型的scheduler。默认为`'pndm'`。可选列表：`['pndm', 'euler_ancestral']`，StableDiffusio模型对应的scheduler可参考[ppdiffuser模型列表](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/ppdiffusers/examples/textual_inversion)。|
 | --unet_model_prefix | UNet模型前缀。默认为`unet`。 |
 | --vae_model_prefix | VAE模型前缀。默认为`vae_decoder`。 |