Skip to content

Commit

Permalink
support build cpu images (PaddlePaddle#341)
Browse files Browse the repository at this point in the history
  • Loading branch information
heliqi authored Oct 11, 2022
1 parent ce0e3fc commit 5328fbc
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 13 deletions.
33 changes: 28 additions & 5 deletions serving/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ cmake_minimum_required(VERSION 3.17)

project(trironpaddlebackend LANGUAGES C CXX)

option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
set(FASTDEPLOY_DIR "" CACHE PATH "Paths to FastDeploy Directory. Multiple paths may be specified by sparating them with a semicolon.")
set(FASTDEPLOY_INCLUDE_PATHS "${FASTDEPLOY_DIR}/include"
CACHE PATH "Paths to FastDeploy includes. Multiple paths may be specified by sparating them with a semicolon.")
Expand All @@ -39,6 +40,10 @@ set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

include(FetchContent)

FetchContent_Declare(
Expand All @@ -61,6 +66,13 @@ FetchContent_Declare(
)
FetchContent_MakeAvailable(repo-common repo-core repo-backend)

#
# CUDA
#
if(${TRITON_ENABLE_GPU})
find_package(CUDAToolkit REQUIRED)
endif() # TRITON_ENABLE_GPU

configure_file(src/libtriton_fastdeploy.ldscript libtriton_fastdeploy.ldscript COPYONLY)

add_library(
Expand All @@ -73,11 +85,7 @@ target_include_directories(
triton-fastdeploy-backend
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
)

target_include_directories(
triton-fastdeploy-backend
PRIVATE ${FASTDEPLOY_INCLUDE_PATHS}
${FASTDEPLOY_INCLUDE_PATHS}
)

target_link_libraries(
Expand All @@ -92,6 +100,13 @@ target_compile_options(
-Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
)

if(${TRITON_ENABLE_GPU})
target_compile_definitions(
triton-fastdeploy-backend
PRIVATE TRITON_ENABLE_GPU=1
)
endif() # TRITON_ENABLE_GPU

set_target_properties(
triton-fastdeploy-backend PROPERTIES
POSITION_INDEPENDENT_CODE ON
Expand All @@ -107,3 +122,11 @@ target_link_libraries(
triton-backend-utils # from repo-backend
triton-core-serverstub # from repo-core
)

if(${TRITON_ENABLE_GPU})
target_link_libraries(
triton-fastdeploy-backend
PRIVATE
CUDA::cudart
)
endif() # TRITON_ENABLE_GPU
32 changes: 32 additions & 0 deletions serving/Dockfile_cpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM paddlepaddle/fastdeploy:22.09-cpu-only-min

ENV TZ=Asia/Shanghai \
DEBIAN_FRONTEND=noninteractive

RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
&& python3 -m pip install -U pip \
&& python3 -m pip install paddlepaddle faster_tokenizer

COPY python/dist/*.whl /opt/fastdeploy/
RUN python3 -m pip install /opt/fastdeploy/*.whl \
&& rm -rf /opt/fastdeploy/*.whl

COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
COPY build/fastdeploy-0.0.3 /opt/fastdeploy/

RUN mv /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
ENV LD_LIBRARY_PATH="/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
17 changes: 14 additions & 3 deletions serving/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
WITH_GPU=${1:-ON}

sh build_fd_vison.sh
sh build_fd_runtime.sh
sh build_fd_backend.sh
if [ $WITH_GPU == "ON" ]; then

sh build_fd_vison.sh ON
sh build_fd_runtime.sh ON
sh build_fd_backend.sh ON

else

sh build_fd_vison.sh OFF
sh build_fd_runtime.sh OFF
sh build_fd_backend.sh OFF

fi
15 changes: 15 additions & 0 deletions serving/scripts/build_fd_backend.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

WITH_GPU=${1:-ON}

if [ $WITH_GPU == "ON" ]; then

if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
Expand All @@ -27,3 +32,13 @@ docker run -it --rm --name build_fd_backend \
apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
else
docker run -it --rm --name build_fd_backend \
-v`pwd`:/workspace/fastdeploy \
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
bash -c \
'cd /workspace/fastdeploy/serving;
rm -rf build; mkdir build; cd build;
apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
cmake .. -DTRITON_ENABLE_GPU=OFF -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r22.09 -DTRITON_CORE_REPO_TAG=r22.09 -DTRITON_BACKEND_REPO_TAG=r22.09; make -j`nproc`'
fi
18 changes: 18 additions & 0 deletions serving/scripts/build_fd_runtime.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
WITH_GPU=${1:-ON}

if [ $WITH_GPU == "ON" ]; then

if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
Expand All @@ -37,3 +41,17 @@ docker run -it --rm --name build_fd_runtime \
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
make -j`nproc`;
make install'

else

docker run -it --rm --name build_fd_runtime \
-v`pwd`:/workspace/fastdeploy \
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
bash -c \
'cd /workspace/fastdeploy;
rm -rf build; mkdir build; cd build;
cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
make -j`nproc`;
make install'

fi
20 changes: 20 additions & 0 deletions serving/scripts/build_fd_vison.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
WITH_GPU=${1:-ON}

if [ $WITH_GPU == "ON" ]; then

if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
Expand All @@ -34,3 +38,19 @@ docker run -it --rm --name build_fd_vison \
export ENABLE_TEXT=ON;
python setup.py build;
python setup.py bdist_wheel'

else

docker run -it --rm --name build_fd_vison \
-v`pwd`:/workspace/fastdeploy \
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
bash -c \
'cd /workspace/fastdeploy/python;
rm -rf .setuptools-cmake-build dist;
export WITH_GPU=OFF;
export ENABLE_VISION=ON;
export ENABLE_TEXT=ON;
python setup.py build;
python setup.py bdist_wheel'

fi
3 changes: 2 additions & 1 deletion serving/src/fastdeploy_backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ namespace triton {
namespace backend {
namespace fastdeploy_runtime {

#define RESPOND_ALL_AND_SET_TRUE_IF_ERROR(RESPONSES, RESPONSES_COUNT, BOOL, X) \
#define FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR( \
RESPONSES, RESPONSES_COUNT, BOOL, X) \
do { \
TRITONSERVER_Error* raasnie_err__ = (X); \
if (raasnie_err__ != nullptr) { \
Expand Down
8 changes: 4 additions & 4 deletions serving/src/fastdeploy_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -918,7 +918,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
requests, request_count, &responses, model_state_->TritonMemoryManager(),
model_state_->EnablePinnedInput(), CudaStream(), nullptr, nullptr, 0,
HostPolicyName().c_str());
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
responses, request_count, all_response_failed,
SetInputTensors(total_batch_size, requests, request_count, &responses,
&collector, &cuda_copy));
Expand All @@ -934,7 +934,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
SET_TIMESTAMP(compute_start_ns);

if (!all_response_failed) {
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
all_response_failed,
Run(&responses, request_count));
}
Expand All @@ -943,7 +943,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
SET_TIMESTAMP(compute_end_ns);

if (!all_response_failed) {
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
responses, request_count, all_response_failed,
ReadOutputTensors(total_batch_size, requests, request_count,
&responses));
Expand Down Expand Up @@ -1096,7 +1096,7 @@ TRITONSERVER_Error* ModelInstanceState::ReadOutputTensors(
// BackendOutputResponder responder(
// requests, request_count, responses,
// model_state_->TritonMemoryManager(), model_state_->MaxBatchSize() > 0,
// model_state_->EnablePinnedInput(), CudaStream());
// model_state_->EnablePinnedOutput(), CudaStream());
// r21.10
BackendOutputResponder responder(
requests, request_count, responses, StateForModel()->MaxBatchSize(),
Expand Down

0 comments on commit 5328fbc

Please sign in to comment.