[Backend] Add KunlunXin XPU deploy support (PaddlePaddle#747)

* add xpu support * fix docs * update code * update doc * update code * update yolov5 * update cmake * add int64_t data support * fix * update download links * add en doc * update code * update xpu options * update doc * update doc * update doc * update lib links * update doc * update code * update lite xpu link * update xpu lib * update doc * update en doc
Petal99 · Dec 15, 2022 · 5be839b · 5be839b
1 parent 6e79df4
commit 5be839b
Show file tree

Hide file tree

Showing 39 changed files with 870 additions and 58 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -40,7 +40,7 @@ if(NOT MSVC)
   add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1)
 endif(NOT MSVC)
 
-if(UNIX AND (NOT APPLE) AND (NOT ANDROID) AND (NOT ENABLE_TIMVX))
+if(UNIX AND (NOT APPLE) AND (NOT ANDROID) AND (NOT WITH_TIMVX))
   include(${PROJECT_SOURCE_DIR}/cmake/patchelf.cmake)
 endif()
 
@@ -64,7 +64,8 @@ option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
 option(ENABLE_VISION "Whether to enable vision models usage." OFF)
 option(ENABLE_TEXT "Whether to enable text models usage." OFF)
 option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF)
-option(ENABLE_TIMVX "Whether to compile for TIMVX deploy." OFF)
+option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
+option(WITH_XPU "Whether to compile for KunlunXin XPU deploy." OFF)
 option(WITH_TESTING "Whether to compile with unittest." OFF)
 ############################# Options for Android cross compiling #########################
 option(WITH_OPENCV_STATIC "Use OpenCV static lib for Android." OFF)
@@ -138,10 +139,23 @@ set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}")
 include_directories(${HEAD_DIR})
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
-if (ENABLE_TIMVX)
+if (WITH_TIMVX)
   include(${PROJECT_SOURCE_DIR}/cmake/timvx.cmake)
 endif()
 
+if (WITH_XPU)
+  if(NOT ENABLE_LITE_BACKEND)
+      message(WARNING "While compiling with -DWITH_XPU=ON, will force to set -DENABLE_LITE_BACKEND=ON")
+      set(ENABLE_LITE_BACKEND ON)
+  endif()
+  if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
+    message(FATAL_ERROR "XPU is only supported on Linux x64 platform")
+  endif()
+  if(NOT PADDLELITE_URL)
+    set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz")
+  endif()
+endif()
+
 
 if(ANDROID OR IOS)
   if(ENABLE_ORT_BACKEND)

diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
@@ -27,6 +27,7 @@ set(OPENCV_DIRECTORY "@OPENCV_DIRECTORY@")
 set(ORT_DIRECTORY "@ORT_DIRECTORY@")
 set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
 set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
+set(WITH_XPU @WITH_XPU@)
 
 set(FASTDEPLOY_LIBS "")
 set(FASTDEPLOY_INCS "")
@@ -237,6 +238,10 @@ if(ENABLE_PADDLE_FRONTEND)
   list(APPEND FASTDEPLOY_LIBS ${PADDLE2ONNX_LIB})
 endif()
 
+if(WITH_XPU)
+  list(APPEND FASTDEPLOY_LIBS -lpthread -lrt -ldl)
+endif()
+
 remove_duplicate_libraries(FASTDEPLOY_LIBS)
 
 # Print compiler information

diff --git a/cmake/summary.cmake b/cmake/summary.cmake
@@ -37,6 +37,8 @@ function(fastdeploy_summary)
   message(STATUS "  ENABLE_POROS_BACKEND      : ${ENABLE_POROS_BACKEND}")
   message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
   message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
+  message(STATUS "  WITH_TIMVX                : ${WITH_TIMVX}")
+  message(STATUS "  WITH_XPU                  : ${WITH_XPU}")
   if(ENABLE_ORT_BACKEND)
     message(STATUS "  ONNXRuntime version       : ${ONNXRUNTIME_VERSION}")
   endif()

diff --git a/docs/README_CN.md b/docs/README_CN.md
@@ -8,6 +8,7 @@
 - [GPU部署环境编译安装](cn/build_and_install/gpu.md)
 - [CPU部署环境编译安装](cn/build_and_install/cpu.md)
 - [IPU部署环境编译安装](cn/build_and_install/ipu.md)
+- [昆仑芯XPU部署环境编译安装](cn/build_and_install/xpu.md)
 - [Jetson部署环境编译安装](cn/build_and_install/jetson.md)
 - [Android平台部署环境编译安装](cn/build_and_install/android.md)
 - [服务化部署镜像编译安装](../serving/docs/zh_CN/compile.md)

diff --git a/docs/README_EN.md b/docs/README_EN.md
@@ -8,6 +8,7 @@
 - [Build and Install FastDeploy Library on GPU Platform](en/build_and_install/gpu.md)
 - [Build and Install FastDeploy Library on CPU Platform](en/build_and_install/cpu.md)
 - [Build and Install FastDeploy Library on IPU Platform](en/build_and_install/ipu.md)
+- [Build and Install FastDeploy Library on KunlunXin XPU Platform](en/build_and_install/xpu.md)
 - [Build and Install FastDeploy Library on  Nvidia Jetson Platform](en/build_and_install/jetson.md)
 - [Build and Install FastDeploy Library on Android Platform](en/build_and_install/android.md)
 - [Build and Install FastDeploy Serving Deployment Image](../serving/docs/EN/compile-en.md)
@@ -19,10 +20,10 @@
 - [A Quick Start on Runtime Python](en/quick_start/runtime/python.md)
 - [A Quick Start on Runtime C++](en/quick_start/runtime/cpp.md)
 
-## API 
+## API
 
 - [Python API](https://baidu-paddle.github.io/fastdeploy-api/python/html/)
-- [C++ API](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/) 
+- [C++ API](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/)
 - [Android Java API](../java/android)
 
 ## Performance Optimization

diff --git a/docs/cn/build_and_install/README.md b/docs/cn/build_and_install/README.md
@@ -13,17 +13,19 @@
 - [Android平台部署环境](android.md)
 - [瑞芯微RV1126部署环境](rv1126.md)
 - [晶晨A311D部署环境](a311d.md)
+- [昆仑芯XPU部署环境](xpu.md)
 
 
 ## FastDeploy编译选项说明
 
 | 选项                      | 说明                                                                        |
 |:------------------------|:--------------------------------------------------------------------------|
 | ENABLE_ORT_BACKEND      | 默认OFF, 是否编译集成ONNX Runtime后端(CPU/GPU上推荐打开)                                 |
-| ENABLE_PADDLE_BACKEND   | 默认OFF，是否编译集成Paddle Inference后端(CPU/GPU上推荐打开)                              |  
-| ENABLE_LITE_BACKEND     | 默认OFF，是否编译集成Paddle Lite后端(编译Android库时需要设置为ON)                             |
+| ENABLE_PADDLE_BACKEND   | 默认OFF，是否编译集成Paddle Inference后端(CPU/GPU上推荐打开)                             |  
+| ENABLE_LITE_BACKEND     | 默认OFF，是否编译集成Paddle Lite后端(编译Android库时需要设置为ON)                          |
 | ENABLE_RKNPU2_BACKEND   | 默认OFF，是否编译集成RKNPU2后端(RK3588/RK3568/RK3566上推荐打开)                           |
-| WITH_TIMVX            | 默认OFF，需要在RV1126/RV1109/A311D上部署时，需设置为ON                                   |
+| WITH_XPU                | 默认OFF，当在昆仑芯XPU上部署时，需设置为ON                                                |
+| WITH_TIMVX              | 默认OFF，需要在RV1126/RV1109/A311D上部署时，需设置为ON                                   |
 | ENABLE_TRT_BACKEND      | 默认OFF，是否编译集成TensorRT后端(GPU上推荐打开)                                          |
 | ENABLE_OPENVINO_BACKEND | 默认OFF，是否编译集成OpenVINO后端(CPU上推荐打开)                                          |
 | ENABLE_VISION           | 默认OFF，是否编译集成视觉模型的部署模块                                                     |

diff --git a/docs/cn/build_and_install/a311d.md b/docs/cn/build_and_install/a311d.md
@@ -9,7 +9,8 @@ FastDeploy 基于 Paddle-Lite 后端支持在晶晨 NPU 上进行部署推理。
 |编译选项|默认值|说明|备注|  
 |:---|:---|:---|:---|  
 |ENABLE_LITE_BACKEND|OFF|编译A311D部署库时需要设置为ON| - |
-|WITH_TIMVX|OFF|编译A311D部署库时需要设置为ON| - |
+|WITH_TIMVX|OFF|编译A311D部署库时需要设置为ON| - |  
+|TARGET_ABI|NONE|编译RK库时需要设置为arm64| - |
 
 更多编译选项请参考[FastDeploy编译选项说明](./README.md)
 

diff --git a/docs/cn/build_and_install/rv1126.md b/docs/cn/build_and_install/rv1126.md
@@ -8,8 +8,9 @@ FastDeploy基于 Paddle-Lite 后端支持在瑞芯微（Rockchip）Soc 上进行
 相关编译选项说明如下：  
 |编译选项|默认值|说明|备注|  
 |:---|:---|:---|:---|  
-|ENABLE_LITE_BACKEND|OFF|编译RK库时需要设置为ON| - |
-|WITH_TIMVX|OFF|编译RK库时需要设置为ON| - |
+|ENABLE_LITE_BACKEND|OFF|编译RK库时需要设置为ON| - |  
+|WITH_TIMVX|OFF|编译RK库时需要设置为ON| - |  
+|TARGET_ABI|NONE|编译RK库时需要设置为armhf| - |
 
 更多编译选项请参考[FastDeploy编译选项说明](./README.md)
 
@@ -86,7 +87,7 @@ dmesg | grep Galcore
 wget https://paddlelite-demo.bj.bcebos.com/devices/generic/PaddleLite-generic-demo.tar.gz
 tar -xf PaddleLite-generic-demo.tar.gz
 ```
-2. 使用 `uname -a` 查看 `Linux Kernel` 版本，确定为 `Linux` 系统 4.19.111 版本，
+2. 使用 `uname -a` 查看 `Linux Kernel` 版本，确定为 `Linux` 系统 4.19.111 版本
 3. 将 `PaddleLite-generic-demo/libs/PaddleLite/linux/armhf/lib/verisilicon_timvx/viv_sdk_6_4_6_5/lib/1126/4.19.111/` 路径下的 `galcore.ko` 上传至开发板。
 
 4. 登录开发板，命令行输入 `sudo rmmod galcore` 来卸载原始驱动，输入 `sudo insmod galcore.ko` 来加载传上设备的驱动。（是否需要 sudo 根据开发板实际情况，部分 adb 链接的设备请提前 adb root）。此步骤如果操作失败，请跳转至方法 2。

diff --git a/docs/cn/build_and_install/xpu.md b/docs/cn/build_and_install/xpu.md
@@ -0,0 +1,75 @@
+# 昆仑芯 XPU 部署环境编译安装
+
+FastDeploy 基于 Paddle-Lite 后端支持在昆仑芯 XPU 上进行部署推理。
+更多详细的信息请参考：[PaddleLite部署示例](https://www.paddlepaddle.org.cn/lite/develop/demo_guides/kunlunxin_xpu.html#xpu)。
+
+本文档介绍如何编译基于 PaddleLite 的 C++ FastDeploy 编译库。
+
+相关编译选项说明如下：  
+|编译选项|默认值|说明|备注|  
+|:---|:---|:---|:---|  
+| WITH_XPU| OFF | 需要在XPU上部署时需要设置为ON | - |
+| ENABLE_ORT_BACKEND | OFF | 是否编译集成ONNX Runtime后端 | - |
+| ENABLE_PADDLE_BACKEND | OFF | 是否编译集成Paddle Inference后端 | - |
+| ENABLE_OPENVINO_BACKEND | OFF | 是否编译集成OpenVINO后端 | - |
+| ENABLE_VISION | OFF | 是否编译集成视觉模型的部署模块 | - |
+| ENABLE_TEXT | OFF | 是否编译集成文本NLP模型的部署模块 | - |
+
+第三方库依赖指定（不设定如下参数，会自动下载预编译库）
+| 选项                     | 说明                                                                                           |
+| :---------------------- | :--------------------------------------------------------------------------------------------- |
+| ORT_DIRECTORY           | 当开启ONNX Runtime后端时，用于指定用户本地的ONNX Runtime库路径；如果不指定，编译过程会自动下载ONNX Runtime库  |
+| OPENCV_DIRECTORY        | 当ENABLE_VISION=ON时，用于指定用户本地的OpenCV库路径；如果不指定，编译过程会自动下载OpenCV库              |
+| OPENVINO_DIRECTORY      | 当开启OpenVINO后端时, 用于指定用户本地的OpenVINO库路径；如果不指定，编译过程会自动下载OpenVINO库             |
+更多编译选项请参考[FastDeploy编译选项说明](./README.md)
+
+## 基于 PaddleLite 的 C++ FastDeploy 库编译
+- OS: Linux
+- gcc/g++: version >= 8.2
+- cmake: version >= 3.15
+此外更推荐开发者自行安装，编译时通过`-DOPENCV_DIRECTORY`来指定环境中的OpenCV（如若不指定-DOPENCV_DIRECTORY，会自动下载FastDeploy提供的预编译的OpenCV，但在**Linux平台**无法支持Video的读取，以及imshow等可视化界面功能）
+```
+sudo apt-get install libopencv-dev
+```
+编译命令如下：
+```bash
+# Download the latest source code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy  
+mkdir build && cd build
+
+# CMake configuration with KunlunXin xpu toolchain
+cmake -DWITH_XPU=ON  \
+      -DWITH_GPU=OFF  \ # 不编译 GPU
+      -DENABLE_ORT_BACKEND=ON  \ # 可选择开启 ORT 后端
+      -DENABLE_PADDLE_BACKEND=ON  \ # 可选择开启 Paddle 后端
+      -DCMAKE_INSTALL_PREFIX=fastdeploy-xpu \
+      -DENABLE_VISION=ON \ # 是否编译集成视觉模型的部署模块，可选择开启
+      -DOPENCV_DIRECTORY=/usr/lib/x86_64-linux-gnu/cmake/opencv4 \
+      ..
+
+# Build FastDeploy KunlunXin XPU C++ SDK
+make -j8
+make install
+```  
+编译完成之后，会生成 fastdeploy-xpu 目录，表示基于 PadddleLite 的 FastDeploy 库编译完成。
+
+## Python 编译
+编译命令如下：
+```bash
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/python
+export WITH_XPU=ON
+export WITH_GPU=OFF
+export ENABLE_ORT_BACKEND=ON
+export ENABLE_PADDLE_BACKEND=ON
+export ENABLE_VISION=ON
+# OPENCV_DIRECTORY可选，不指定会自动下载FastDeploy提供的预编译OpenCV库
+export OPENCV_DIRECTORY=/usr/lib/x86_64-linux-gnu/cmake/opencv4
+
+python setup.py build
+python setup.py bdist_wheel
+```  
+编译完成即会在 `FastDeploy/python/dist` 目录下生成编译后的 `wheel` 包，直接 pip install 即可
+
+编译过程中，如若修改编译参数，为避免带来缓存影响，可删除 `FastDeploy/python` 目录下的 `build` 和 `.setuptools-cmake-build` 两个子目录后再重新编译
diff --git a/docs/en/build_and_install/README.md b/docs/en/build_and_install/README.md
@@ -12,6 +12,9 @@ English | [中文](../../cn/build_and_install/README.md)
 - [Build and Install on IPU Platform](ipu.md)
 - [Build and Install on Nvidia Jetson Platform](jetson.md)
 - [Build and Install on Android Platform](android.md)
+- [Build and Install on RV1126 Platform](rv1126.md)
+- [Build and Install on A311D Platform](a311d.md)
+- [Build and Install on KunlunXin XPU Platform](xpu.md)
 
 
 ## Build options
@@ -25,6 +28,8 @@ English | [中文](../../cn/build_and_install/README.md)
 | ENABLE_VISION | Default OFF，whether to enable vision models deployment module |
 | ENABLE_TEXT | Default OFF，whether to enable text models deployment module |
 | WITH_GPU | Default OFF, if build on GPU, this need to be ON |
+| WITH_XPU | Default OFF，if deploy on KunlunXin XPU，this need to be ON |
+| WITH_TIMVX | Default OFF，if deploy on RV1126/RV1109/A311D，this need to be ON |
 | CUDA_DIRECTORY | Default /usr/local/cuda, if build on GPU, this defines the path of CUDA(>=11.2) |
 | TRT_DIRECTORY | If build with ENABLE_TRT_BACKEND=ON, this defines the path of TensorRT(>=8.4) |
 | ORT_DIRECTORY | [Optional] If build with ENABLE_ORT_BACKEND=ON, this flag defines the path of ONNX Runtime, but if this flag is not set, it will download ONNX Runtime library automatically |

diff --git a/docs/en/build_and_install/a311d.md b/docs/en/build_and_install/a311d.md
@@ -0,0 +1,105 @@
+# How to Build A311D Deployment Environment
+
+FastDeploy supports AI deployment on Rockchip Soc based on Paddle-Lite backend. For more detailed information, please refer to: [PaddleLite Deployment Example](https://www.paddlepaddle.org.cn/lite/develop/demo_guides/verisilicon_timvx.html).
+
+This document describes how to compile the PaddleLite-based C++ FastDeploy cross-compilation library.
+
+The relevant compilation options are described as follows:  
+|Compile Options|Default Values|Description|Remarks|  
+|:---|:---|:---|:---|  
+|ENABLE_LITE_BACKEND|OFF|It needs to be set to ON when compiling the A311D library| - |  
+|WITH_TIMVX|OFF|It needs to be set to ON when compiling the A311D library| - |  
+|TARGET_ABI|NONE|It needs to be set to arm64 when compiling the A311D library| - |  
+
+For more compilation options, please refer to [Description of FastDeploy compilation options](./README.md)
+
+## Cross-compilation environment construction
+
+### Host Environment Requirements  
+- os：Ubuntu == 16.04
+- cmake： version >= 3.10.0  
+
+### Building the compilation environment
+You can enter the FastDeploy/tools/timvx directory and use the following command to install:
+```bash
+cd FastDeploy/tools/timvx
+bash install.sh
+```
+You can also install it with the following commands:
+```bash
+ # 1. Install basic software
+apt update
+apt-get install -y --no-install-recommends \
+  gcc g++ git make wget python unzip
+
+# 2. Install arm gcc toolchains
+apt-get install -y --no-install-recommends \
+  g++-arm-linux-gnueabi gcc-arm-linux-gnueabi \
+  g++-arm-linux-gnueabihf gcc-arm-linux-gnueabihf \
+  gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
+
+# 3. Install cmake 3.10 or above
+wget -c https://mms-res.cdn.bcebos.com/cmake-3.10.3-Linux-x86_64.tar.gz && \
+  tar xzf cmake-3.10.3-Linux-x86_64.tar.gz && \
+  mv cmake-3.10.3-Linux-x86_64 /opt/cmake-3.10 && \
+  ln -s /opt/cmake-3.10/bin/cmake /usr/bin/cmake && \
+  ln -s /opt/cmake-3.10/bin/ccmake /usr/bin/ccmake
+```
+
+## FastDeploy cross-compilation library compilation based on PaddleLite
+After setting up the cross-compilation environment, the compilation command is as follows:
+```bash
+# Download the latest source code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy  
+mkdir build && cd build
+
+# CMake configuration with A311D toolchain
+cmake -DCMAKE_TOOLCHAIN_FILE=./../cmake/toolchain.cmake \
+      -DWITH_TIMVX=ON  \
+      -DTARGET_ABI=arm64 \
+      -DCMAKE_INSTALL_PREFIX=fastdeploy-tmivx \
+      -DENABLE_VISION=ON \ # 是否编译集成视觉模型的部署模块，可选择开启
+      -Wno-dev ..
+
+# Build FastDeploy A311D C++ SDK
+make -j8
+make install
+```  
+After the compilation is complete, the fastdeploy-tmivx directory will be generated, indicating that the FastDeploy library based on PadddleLite TIM-VX has been compiled.
+
+## Prepare the Soc environment
+Before deployment, ensure that the version of the driver galcore.so of the Verisilicon Linux Kernel NPU meets the requirements. Before deployment, please log in to the development board, and enter the following command through the command line to query the NPU driver version. The recommended version of the Rockchip driver is: 6.4.4.3
+```bash
+dmesg | grep Galcore
+```  
+If the current version does not comply with the above, please read the following content carefully to ensure that the underlying NPU driver environment is correct.
+
+There are two ways to modify the current NPU driver version:
+1. Manually replace the NPU driver version. (recommend)
+2. flash the machine, and flash the firmware that meets the requirements of the NPU driver version.
+
+### Manually replace the NPU driver version
+1. Use the following command to download and decompress the PaddleLite demo, which provides ready-made driver files
+```bash
+wget https://paddlelite-demo.bj.bcebos.com/devices/generic/PaddleLite-generic-demo.tar.gz
+tar -xf PaddleLite-generic-demo.tar.gz
+```
+2. Use `uname -a` to check `Linux Kernel` version, it is determined to be version 4.19.111.
+3. Upload `galcore.ko` under `PaddleLite-generic-demo/libs/PaddleLite/linux/arm64/lib/verisilicon_timvx/viv_sdk_6_4_4_3/lib/a311d/4.9.113` path to the development board.
+4. Log in to the development board, enter `sudo rmmod galcore` on the command line to uninstall the original driver, and enter `sudo insmod galcore.ko` to load the uploaded device driver. (Whether sudo is needed depends on the actual situation of the development board. For some adb-linked devices, please adb root in advance). If this step fails, go to method 2.
+5. Enter `dmesg | grep Galcore` in the development board to query the NPU driver version, and it is determined to be: 6.4.4.3
+
+### flash
+According to the specific development board model, ask the development board seller or the official website customer service for the firmware and flashing method corresponding to the 6.4.4.3 version of the NPU driver.
+
+For more details, please refer to: [PaddleLite prepares the device environment](https://www.paddlepaddle.org.cn/lite/develop/demo_guides/verisilicon_timvx.html#zhunbeishebeihuanjing)
+
+## Deployment example based on FastDeploy on A311D
+1. For deploying the PaddleClas classification model on A311D, please refer to: [C++ deployment example of PaddleClas classification model on A311D](../../../examples/vision/classification/paddleclas/a311d/README.md)
+
+2. For deploying PPYOLOE detection model on A311D, please refer to: [C++ deployment example of PPYOLOE detection model on A311D](../../../examples/vision/detection/paddledetection/a311d/README.md)
+
+3. For deploying YOLOv5 detection model on A311D, please refer to: [C++ Deployment Example of YOLOv5 Detection Model on A311D](../../../examples/vision/detection/yolov5/a311d/README.md)
+
+4. For deploying PP-LiteSeg segmentation model on A311D, please refer to: [C++ Deployment Example of PP-LiteSeg Segmentation Model on A311D](../../../examples/vision/segmentation/paddleseg/a311d/README.md)