Skip to content

Commit

Permalink
[FastTokenizer] Support FastTokenizer on Android (PaddlePaddle#783)
Browse files Browse the repository at this point in the history
* [FastTokenizer] Support FastTokenizer on Android

* [OMP] Add OMP bind_proc(close) policy

* [Android] Add Lite support for UIE

* [Android] Add VIS_SEG_OMP_NUM_THREADS
  • Loading branch information
DefTruth authored Dec 5, 2022
1 parent 8b2f231 commit 65f270d
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 43 deletions.
9 changes: 1 addition & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -425,13 +425,6 @@ if(ENABLE_VISION)
endif()
endif()

if(ANDROID OR IOS)
if(ENABLE_TEXT)
set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE)
message(STATUS "Found Android or IOS, force ENABLE_TEXT OFF. We do not support fast_tokenizer with Android/IOS now.")
endif()
endif()

if(ENABLE_TEXT)
add_definitions(-DENABLE_TEXT)
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TEXT_SRCS})
Expand Down Expand Up @@ -488,7 +481,7 @@ set_target_properties(${LIBRARY_NAME} PROPERTIES VERSION ${FASTDEPLOY_VERSION})
if(MSVC)
# disable warnings for dll export
target_compile_options(${LIBRARY_NAME} PRIVATE "$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CXX>>:/wd4251>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=/wd4251>")
file(GLOB FD_FILES_REQUIRE_BIGOBJ ${CSRCS_DIR_NAME}/fastdeploy/function/reduce.cc)
file(GLOB FD_FILES_REQUIRE_BIGOBJ ${CSRCS_DIR_NAME}/fastdeploy/function/reduce.cc)
set_source_files_properties(${FD_FILES_REQUIRE_BIGOBJ} PROPERTIES COMPILE_FLAGS "/bigobj")
endif()

Expand Down
15 changes: 11 additions & 4 deletions FastDeploy.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,18 @@ endif()

if (ENABLE_TEXT)
if(ANDROID)
message(FATAL_ERROR "Not support fastdeploy text APIs with Android now!")
if(NOT ANDROID_TOOLCHAIN MATCHES "clang")
message(FATAL_ERROR "Currently, only support clang toolchain while cross compiling FastDeploy for Android with FastTokenizer, but found ${ANDROID_TOOLCHAIN}.")
endif()
add_library(core_tokenizers STATIC IMPORTED GLOBAL)
set_property(TARGET core_tokenizers PROPERTY IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/lib/${ANDROID_ABI}/libcore_tokenizers.so)
list(APPEND FASTDEPLOY_LIBS core_tokenizers)
else()
# Add dependency libs later: Linux/Mac/Win/...
find_library(FAST_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/lib NO_DEFAULT_PATH)
list(APPEND FASTDEPLOY_LIBS ${FAST_TOKENIZER_LIB})
endif()
# Add dependency libs later
find_library(FAST_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/lib NO_DEFAULT_PATH)
list(APPEND FASTDEPLOY_LIBS ${FAST_TOKENIZER_LIB})
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/include)
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/third_party/include)
endif()
Expand Down
91 changes: 64 additions & 27 deletions cmake/fast_tokenizer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,16 @@ set(FASTTOKENIZER_INC_DIR
"${FASTTOKENIZER_INSTALL_DIR}/include"
"${FASTTOKENIZER_INSTALL_DIR}/third_party/include"
CACHE PATH "fast_tokenizer include directory." FORCE)
set(FASTTOKENIZER_LIB_DIR
"${FASTTOKENIZER_INSTALL_DIR}/lib/"
CACHE PATH "fast_tokenizer lib directory." FORCE)
if(ANDROID)
set(FASTTOKENIZER_LIB_DIR
"${FASTTOKENIZER_INSTALL_DIR}/lib/${ANDROID_ABI}"
CACHE PATH "fast_tokenizer lib directory." FORCE)
else()
set(FASTTOKENIZER_LIB_DIR
"${FASTTOKENIZER_INSTALL_DIR}/lib/"
CACHE PATH "fast_tokenizer lib directory." FORCE)
endif()

set(FASTTOKENIZER_THIRD_LIB_DIR
"${FASTTOKENIZER_INSTALL_DIR}/third_party/lib/"
CACHE PATH "fast_tokenizer lib directory." FORCE)
Expand All @@ -37,21 +44,21 @@ include_directories(${FASTTOKENIZER_INC_DIR})

# Set lib path
if(WIN32)
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/core_tokenizers.lib"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")
set(ICUDT_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icudt.lib")
set(ICUUC_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icuuc.lib")

set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/core_tokenizers.lib"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
set(ICUDT_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icudt.lib")
set(ICUUC_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icuuc.lib")
elseif(APPLE)
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.dylib"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.dylib"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
elseif(ANDROID)
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
else()

set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
endif(WIN32)
message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")

set(FASTTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/fast_tokenizer/")
set(FASTTOKENIZER_VERSION "1.0.0")
Expand All @@ -68,6 +75,15 @@ elseif(APPLE)
else()
set(FASTTOKENIZER_FILE "fast_tokenizer-osx-x86_64-${FASTTOKENIZER_VERSION}.tgz")
endif()
elseif(ANDROID)
# check ABI, toolchain
if((NOT ANDROID_ABI MATCHES "armeabi-v7a") AND (NOT ANDROID_ABI MATCHES "arm64-v8a"))
message(FATAL_ERROR "FastDeploy with FastTokenizer on Android only support armeabi-v7a, arm64-v8a now.")
endif()
if(NOT ANDROID_TOOLCHAIN MATCHES "clang")
message(FATAL_ERROR "Currently, only support clang toolchain while cross compiling FastDeploy for Android with FastTokenizer, but found ${ANDROID_TOOLCHAIN}.")
endif()
set(FASTTOKENIZER_FILE "fast_tokenizer-android-${ANDROID_ABI}-${FASTTOKENIZER_VERSION}.tgz")
else()
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
set(FASTTOKENIZER_FILE "fast_tokenizer-linux-aarch64-${FASTTOKENIZER_VERSION}.tgz")
Expand All @@ -77,18 +93,39 @@ else()
endif()
set(FASTTOKENIZER_URL "${FASTTOKENIZER_URL_BASE}${FASTTOKENIZER_FILE}")

ExternalProject_Add(
${FASTTOKENIZER_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${FASTTOKENIZER_URL}
PREFIX ${FASTTOKENIZER_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR} ${FASTTOKENIZER_INSTALL_DIR}
BUILD_BYPRODUCTS ${FASTTOKENIZER_COMPILE_LIB})
if(ANDROID)
ExternalProject_Add(
${FASTTOKENIZER_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${FASTTOKENIZER_URL}
PREFIX ${FASTTOKENIZER_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E remove_directory ${FASTTOKENIZER_INSTALL_DIR} &&
${CMAKE_COMMAND} -E make_directory ${FASTTOKENIZER_INSTALL_DIR} &&
${CMAKE_COMMAND} -E make_directory ${FASTTOKENIZER_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E make_directory ${FASTTOKENIZER_INSTALL_DIR}/third_party &&
${CMAKE_COMMAND} -E rename ${FASTTOKENIZER_SOURCE_DIR}/lib/ ${FASTTOKENIZER_INSTALL_DIR}/lib/${ANDROID_ABI} &&
${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR}/include ${FASTTOKENIZER_INSTALL_DIR}/include &&
${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR}/third_party/include ${FASTTOKENIZER_INSTALL_DIR}/third_party/include
BUILD_BYPRODUCTS ${FASTTOKENIZER_COMPILE_LIB})
else()
ExternalProject_Add(
${FASTTOKENIZER_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${FASTTOKENIZER_URL}
PREFIX ${FASTTOKENIZER_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR} ${FASTTOKENIZER_INSTALL_DIR}
BUILD_BYPRODUCTS ${FASTTOKENIZER_COMPILE_LIB})
endif()

add_library(fast_tokenizer STATIC IMPORTED GLOBAL)
set_property(TARGET fast_tokenizer PROPERTY IMPORTED_LOCATION ${FASTTOKENIZER_COMPILE_LIB})
Expand Down
4 changes: 4 additions & 0 deletions fastdeploy/backends/lite/lite_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
tensor->CopyFromCpu<uint8_t, paddle::lite_api::TargetType::kARM>(
reinterpret_cast<const uint8_t*>(const_cast<void*>(
inputs[i].CpuData())));
} else if (inputs[i].dtype == FDDataType::INT64) {
tensor->CopyFromCpu<int64_t, paddle::lite_api::TargetType::kARM>(
reinterpret_cast<const int64_t*>(const_cast<void*>(
inputs[i].CpuData())));
} else {
FDASSERT(false, "Unexpected data type of %d.", inputs[i].dtype);
}
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/text/uie/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ bool UIEModel::Initialize() {

void UIEModel::SetValidBackend() {
// TODO(zhoushunjie): Add lite backend in future
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER};
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER, Backend::LITE};
valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
}

Expand Down
11 changes: 8 additions & 3 deletions fastdeploy/vision/visualize/segmentation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ namespace fastdeploy {
namespace vision {

#ifdef __ARM_NEON
static constexpr int VIS_SEG_OMP_NUM_THREADS=2;

static inline void QuantizeBlendingWeight8(
float weight, uint8_t* old_multi_factor, uint8_t* new_multi_factor) {
// Quantize the weight to boost blending performance.
Expand Down Expand Up @@ -53,7 +55,8 @@ static cv::Mat FastVisSegmentationNEON(
const uint8_t *im_ptr = static_cast<const uint8_t*>(im.data);

if (!quantize_weight) {
#pragma omp parallel for num_threads(2) schedule(static)
#pragma omp parallel for proc_bind(close) \
num_threads(VIS_SEG_OMP_NUM_THREADS) schedule(static)
for (int i = 0; i < size - 15; i += 16) {
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
// e.g 0b00000001 << 7 -> 0b10000000 128;
Expand Down Expand Up @@ -87,7 +90,8 @@ static cv::Mat FastVisSegmentationNEON(

if (new_multi_factor == 8) {
// Only keep mask, no need to blending with origin image.
#pragma omp parallel for num_threads(2) schedule(static)
#pragma omp parallel for proc_bind(close) \
num_threads(VIS_SEG_OMP_NUM_THREADS) schedule(static)
for (int i = 0; i < size - 15; i += 16) {
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
// e.g 0b00000001 << 7 -> 0b10000000 128;
Expand All @@ -112,7 +116,8 @@ static cv::Mat FastVisSegmentationNEON(
uint8x16_t old_mulx16 = vdupq_n_u8(old_multi_factor);
uint8x16_t new_mulx16 = vdupq_n_u8(new_multi_factor);
// Blend the two colors together with quantize 'weight'.
#pragma omp parallel for num_threads(2) schedule(static)
#pragma omp parallel for proc_bind(close) \
num_threads(VIS_SEG_OMP_NUM_THREADS) schedule(static)
for (int i = 0; i < size - 15; i += 16) {
uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3); // 48 bytes
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
Expand Down

0 comments on commit 65f270d

Please sign in to comment.