Skip to content

Commit

Permalink
Main merged release 1.10.0 (#395)
Browse files Browse the repository at this point in the history
* Changes for TVM 1.10

Fix TRT define

* Initial changes for treelite 1.x apis

* Update versions

* Update TVM submodule and cmake dlr version

* Fixes after TVM update

* Use MLAS, OPENMP, MKL

* Allows user to configure RelayVM allocator via env var or metadata file (#361)

* Allows user to configure RelayVM allocator via env var or metadata file

* Add unit tests. Rename env var

* Fix lint

* Update TVM submodule

* Update TVM submodule

* Update: bring new commits from TVM 1.10.0-rc (#364)

* Update TVM submodule (#368)

* Update: make dmlc build statically into dlr

* Update: bring new commits from TVM 1.10.0-rc

* Fix invalid operands to binary expression in shape comparison

* Use gtest EXPECT_EQ instead of TVM CHECK_EQ in dlr_data_transform_test.cc

* Use std::equal for tvm::runtime::ShapeTuple comparison (#386)

* Fix: update vm artifacts to pass unit test. (#389)

* Use branch release-1.10.0 for git-clang-format.sh (#390)

* Use branch release-1.10.0 for git-clang-format.sh

* Upgrade cmake for CI builds to 3.21.4

* Fix: treelite unit test (#391)

* Temporarily disable dlr_pipeline_skl_xgb_test (#392)

* Fix: integration test: load_and_run_treelite_model.py (#393)

* Enable treelite integration test for mnist-1.10.0 model (#394)

Co-authored-by: Trevor Morris <[email protected]>
Co-authored-by: Alexander Pivovarov <[email protected]>
  • Loading branch information
3 people authored Jan 28, 2022
1 parent f5df876 commit 95c0559
Show file tree
Hide file tree
Showing 32 changed files with 282 additions and 244 deletions.
2 changes: 1 addition & 1 deletion 3rdparty/treelite
Submodule treelite updated 236 files
2 changes: 1 addition & 1 deletion 3rdparty/tvm
Submodule tvm updated 1569 files
68 changes: 44 additions & 24 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if(ANDROID_BUILD)
endif()
endif(ANDROID_BUILD)

project(dlr VERSION 1.9.0 LANGUAGES C CXX)
project(dlr VERSION 1.10.0 LANGUAGES C CXX)

message(STATUS "dlr version: ${dlr_VERSION}")

Expand Down Expand Up @@ -77,7 +77,14 @@ option(TEST_COVERAGE "C++ test coverage" OFF)
# Compiler flags
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# --exclude-libs is not available on Windows and macOS. As such, Windows and
# Mac do not support the creation of multiple DLRModel instances (in Python) in
# case model folders have their own libdlr.so.
if (WIN32 OR APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--exclude-libs,ALL")
endif()
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funroll-loops")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}")
Expand All @@ -102,7 +109,6 @@ include_directories("${TVM_SRC}/src/runtime")
include_directories("${DLPACK_SRC}/include")
include_directories("${DMLC_CORE_SRC}/include")
include_directories("${TREELITE_SRC}/include")
include_directories("${TREELITE_SRC}/runtime/native/include")
include_directories("${PROJECT_SOURCE_DIR}/include")
include_directories("${JSON_SRC}")

Expand Down Expand Up @@ -141,7 +147,7 @@ endif(USE_OPENCL)

if(USE_CUDA)
message("USING CUDA")
find_cuda(${USE_CUDA})
find_cuda(${USE_CUDA} ${USE_CUDNN})
if(NOT CUDA_FOUND)
message(FATAL_ERROR "CUDA not found, please specify CUDA location with -DUSE_CUDA=/path/to/cuda/")
endif(NOT CUDA_FOUND)
Expand Down Expand Up @@ -223,7 +229,7 @@ if(USE_TENSORRT)
list(APPEND DLR_SRC ${RUNTIME_TENSORRT_SRCS})

# Set defines
add_definitions(-DTVM_GRAPH_RUNTIME_TENSORRT)
add_definitions(-DTVM_GRAPH_EXECUTOR_TENSORRT)

set(USE_TENSORRT OFF)
endif()
Expand All @@ -233,7 +239,7 @@ if(WITH_HEXAGON)
endif()

if(ENABLE_DATATRANSFORM)
add_definitions(-DENABLE_DATATRANSFORM)
add_definitions(-DENABLE_DATATRANSFORM)
endif()

if(AAR_BUILD)
Expand All @@ -259,23 +265,32 @@ if(USE_MKL)
endif()

if(USE_MLAS)
message(STATUS "Build with MLAS library")
if (NOT (USE_OPENMP STREQUAL "gnu" OR USE_OPENMP STREQUAL "intel"))
message(FATAL_ERROR "MLAS library must be built with USE_OPENMP=gnu or USE_OPENMP=intel")
endif()
file(GLOB RUNTIME_MLAS_SRCS ${TVM_SRC}/src/runtime/contrib/mlas/*.cc)
list(APPEND DLR_SRC ${RUNTIME_MLAS_SRCS})
list(APPEND DLR_LINKER_LIBS onnxruntime_mlas_static)
include_directories(${TVM_SRC}/3rdparty/mlas/inc)
message(STATUS "Build with MLAS library")
if (NOT (USE_OPENMP STREQUAL "gnu" OR USE_OPENMP STREQUAL "intel"))
message(FATAL_ERROR "MLAS library must be built with USE_OPENMP=gnu or USE_OPENMP=intel")
endif()
file(GLOB RUNTIME_MLAS_SRCS ${TVM_SRC}/src/runtime/contrib/mlas/*.cc)
list(APPEND DLR_SRC ${RUNTIME_MLAS_SRCS})
list(APPEND DLR_LINKER_LIBS onnxruntime_mlas_static)
include_directories(${TVM_SRC}/3rdparty/mlas/inc)
endif()

set(MAIN_EXEC "")
FILE(GLOB MAIN_SRC src/*.cc)

set(USE_LIBBACKTRACE OFF)
add_subdirectory(${TVM_SRC} EXCLUDE_FROM_ALL)
set(BUILD_SHARED_LIBS_SAVED "${BUILD_SHARED_LIBS}") # Save BUILD_SHARED_LIBS
set(BUILD_STATIC_LIBS ON)
set(BUILD_SHARED_LIBS OFF)
add_subdirectory(${TREELITE_SRC} EXCLUDE_FROM_ALL)
set(BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS_SAVED}") # Restore BUILD_SHARED_LIBS
set(BUILD_STATIC_LIBS OFF)
add_library(objdlr OBJECT ${DLR_SRC})

target_compile_definitions(objdlr PUBLIC DMLC_USE_LOGGING_LIBRARY=<tvm/runtime/logging.h>)
target_compile_definitions(objdlr PRIVATE TVM_USE_LIBBACKTRACE=0)

#shared_library
find_package(Threads)
set(THREADS_PREFER_PTHREAD_FLAG TRUE)
Expand Down Expand Up @@ -333,6 +348,9 @@ install(EXPORT dlrTargets
FILE dlrTargets.cmake
DESTINATION lib/cmake/dlr
)
if(USE_MLAS)
install(TARGETS onnxruntime_mlas_static EXPORT dlrTargets)
endif()

include(CMakePackageConfigHelpers)
# generate the config file that is includes the exports
Expand Down Expand Up @@ -373,7 +391,7 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
file(GLOB TEST_SRCS tests/cpp/*.cc)

if(NOT(ENABLE_DATATRANSFORM))
list(REMOVE_ITEM TEST_SRCS
list(REMOVE_ITEM TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/tests/cpp/dlr_data_transform_test.cc
${CMAKE_CURRENT_SOURCE_DIR}/tests/cpp/dlr_pipeline_skl_xgb_test.cc)
endif()
Expand All @@ -386,6 +404,8 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
get_filename_component(__srcname ${__srcpath} NAME)
string(REPLACE ".cc" "" __execname ${__srcname})
add_executable(${__execname} ${__srcpath})
target_compile_definitions(${__execname} PUBLIC DMLC_USE_LOGGING_LIBRARY=<tvm/runtime/logging.h>)
target_compile_definitions(${__execname} PRIVATE TVM_USE_LIBBACKTRACE=0)
target_link_libraries(${__execname} dlr gtest_main)
set_output_directory(${__execname} ${CMAKE_BINARY_DIR})
add_test(NAME ${__execname} COMMAND ${__execname})
Expand All @@ -397,7 +417,7 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
get_filename_component(__srcname ${__srcpath} NAME)
string(REPLACE ".cc" "" __execname ${__srcname})
add_executable(${__execname} ${__srcpath})
target_link_libraries(${__execname} gtest_main dl)
target_link_libraries(${__execname} gtest_main dl)
set_output_directory(${__execname} ${CMAKE_BINARY_DIR})
add_test(NAME ${__execname} COMMAND ${__execname})
message(STATUS "Added dlsym Test: " ${__execname})
Expand All @@ -412,7 +432,7 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
e35e82f3371bed37caa7ecece417f50876414077
)
endif()

set(STREET_IMAGE ${CMAKE_CURRENT_BINARY_DIR}/street_small.npy)
if(NOT EXISTS ${STREET_IMAGE})
download_file(
Expand All @@ -422,7 +442,7 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
206d1a12646a5fe43d0877d38bd137c70a5adc3b
)
endif()

# Download compiled model for unit tests
set(RESNET_MODEL ${CMAKE_CURRENT_BINARY_DIR}/resnet_v1_5_50)
if(NOT IS_DIRECTORY ${RESNET_MODEL})
Expand All @@ -443,26 +463,26 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
if(NOT IS_DIRECTORY ${XGBOOST_TEST_MODEL})
file(MAKE_DIRECTORY ${XGBOOST_TEST_MODEL})
download_file(
https://neo-ai-dlr-test-artifacts.s3-us-west-2.amazonaws.com/compiled-models/xgboost_test.tar.gz
/tmp/xgboost_test.tar.gz
https://neo-ai-dlr-test-artifacts.s3-us-west-2.amazonaws.com/compiled-models/release-1.10.0/xgboost-ml_m5.tar.gz
/tmp/xgboost-ml_m5.tar.gz
SHA1
4c8ac5f20db6c7c6d674dd2ac9d9e14ce887281e
1e45bb9d6108d70ac4ff37855cf13061d61ef742
)
# this is OS-agnostic
execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf /tmp/xgboost_test.tar.gz
execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf /tmp/xgboost-ml_m5.tar.gz
WORKING_DIRECTORY ${XGBOOST_TEST_MODEL})
file(REMOVE /tmp/xgboost_test.tar.gz)
file(REMOVE /tmp/xgboost-ml_m5.tar.gz)
endif()

set(RELAYVM_MODEL ssd_mobilenet_v1_ppn_shared_box_predictor_300x300_coco14_sync_2018_07_03-LINUX_X86_64.tar.gz)
set(RELAYVM_MODEL_DIR ${CMAKE_CURRENT_BINARY_DIR}/ssd_mobilenet_v1)
if(NOT IS_DIRECTORY ${RELAYVM_MODEL_DIR})
file(MAKE_DIRECTORY ${RELAYVM_MODEL_DIR})
download_file(
https://neo-ai-dlr-test-artifacts.s3-us-west-2.amazonaws.com/compiled-models/release-1.5.0/${RELAYVM_MODEL}
https://neo-ai-dlr-test-artifacts.s3-us-west-2.amazonaws.com/compiled-models/release-1.10.0/${RELAYVM_MODEL}
/tmp/${RELAYVM_MODEL}
SHA1
49ddd9e815c6cc14ef0e9a594c8c2d0d129e5e91
38111e6432d643122ebf6ed5493e415871dc3fa5
)
# this is OS-agnostic
execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf /tmp/${RELAYVM_MODEL}
Expand Down
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ pipeline {
unstash name: 'srcs'
sh """
tests/ci_build/git-clang-format.sh HEAD~1
tests/ci_build/git-clang-format.sh origin/main
tests/ci_build/git-clang-format.sh origin/$CHANGE_TARGET
"""
}
}
Expand Down
10 changes: 5 additions & 5 deletions include/dlr.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ extern "C" { // Open extern "C" block
/*! \brief major version */
#define DLR_MAJOR 1
/*! \brief minor version */
#define DLR_MINOR 9
#define DLR_MINOR 10
/*! \brief patch version */
#define DLR_PATCH 1
/*! \brief DLR version */
Expand Down Expand Up @@ -198,7 +198,7 @@ int SetDLRInput(DLRModelHandle* handle, const char* name, const int64_t* shape,

/*!
* \brief Sets the input according the node name from existing DLTensor. Can only be
* used with TVM models (GraphRuntime and VMRuntime)
* used with TVM models (GraphExecutor and VMRuntime)
* \param handle The model handle returned from CreateDLRModel().
* \param name The input node name.
* \param tensor The input DLTensor.
Expand All @@ -209,7 +209,7 @@ int SetDLRInputTensor(DLRModelHandle* handle, const char* name, void* tensor);

/*!
* \brief Sets the input from existing DLTensor without copying data. Can only be
* used with TVM models (GraphRuntime). Input tensor device must match the device of the
* used with TVM models (GraphExecutor). Input tensor device must match the device of the
* model, and data must be alligned to 128 bytes. GetDLRInput cannot be used for inputs set
* via SetDLRInputZeroCopy.
* \param handle The model handle returned from CreateDLRModel().
Expand Down Expand Up @@ -289,7 +289,7 @@ int GetDLROutputPtr(DLRModelHandle* handle, int index, const void** out);

/*!
* \brief Gets the index-th output from the model and copies it into the given DLTensor.
* Can only be used with TVM models (GraphRuntime and VMRuntime)
* Can only be used with TVM models (GraphExecutor and VMRuntime)
* \param handle The model handle returned from CreateDLRModel().
* \param index The index-th output.
* \param tensor The pointer to an existing/allocated DLTensor to copy the output into.
Expand All @@ -300,7 +300,7 @@ int GetDLROutputTensor(DLRModelHandle* handle, int index, void* tensor);

/*!
* \brief Gets the index-th output from the model and sets the pointer to it.
* Can only be used with TVM models (GraphRuntime and VMRuntime)
* Can only be used with TVM models (GraphExecutor and VMRuntime)
* \param handle The model handle returned from CreateDLRModel().
* \param index The index-th output.
* \param tensor The pointer to an unallocated DLManagedTensor pointer, will be
Expand Down
4 changes: 2 additions & 2 deletions include/dlr_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,15 @@ class DLR_DLL DLRModel {
size_t num_inputs_ = 1;
size_t num_weights_ = 0;
size_t num_outputs_ = 1;
DLContext ctx_;
DLDevice dev_;
std::vector<std::string> input_names_;
std::vector<std::string> input_types_;
std::vector<std::vector<int64_t>> input_shapes_;
virtual void ValidateDeviceTypeIfExists();

public:
nlohmann::json metadata_ = nullptr;
DLRModel(const DLContext& ctx, const DLRBackend& backend) : ctx_(ctx), backend_(backend) {}
DLRModel(const DLDevice& dev, const DLRBackend& backend) : dev_(dev), backend_(backend) {}
virtual ~DLRModel() {}

/* Input related functions */
Expand Down
8 changes: 4 additions & 4 deletions include/dlr_data_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class DLR_DLL Transformer {
/*! \brief Helper function for TransformInput. Allocates NDArray to store
* mapped input data. */
virtual void InitNDArray(const nlohmann::json& input_json, const nlohmann::json& transform,
DLDataType dtype, DLContext ctx,
DLDataType dtype, DLDevice dev,
tvm::runtime::NDArray& input_array) const;
};

Expand Down Expand Up @@ -79,7 +79,7 @@ class DLR_DLL DateTimeTransformer : public Transformer {
tvm::runtime::NDArray& input_array) const;

void InitNDArray(const nlohmann::json& input_json, const nlohmann::json& transform,
DLDataType dtype, DLContext ctx, tvm::runtime::NDArray& input_array) const;
DLDataType dtype, DLDevice dev, tvm::runtime::NDArray& input_array) const;
};

class DLR_DLL TextTransformer : public Transformer {
Expand All @@ -90,7 +90,7 @@ class DLR_DLL TextTransformer : public Transformer {
tvm::runtime::NDArray& input_array) const override;

virtual void InitNDArray(const nlohmann::json& input_json, const nlohmann::json& transform,
DLDataType dtype, DLContext ctx,
DLDataType dtype, DLDevice dev,
tvm::runtime::NDArray& input_array) const override;

inline void SetIndex(int idx) const { column_idx_ = idx; };
Expand Down Expand Up @@ -150,7 +150,7 @@ class DLR_DLL DataTransform {
* model input.
*/
void TransformInput(const nlohmann::json& metadata, const int64_t* shape, const void* input,
int dim, const std::vector<DLDataType>& dtypes, DLContext ctx,
int dim, const std::vector<DLDataType>& dtypes, DLDevice dev,
std::vector<tvm::runtime::NDArray>* tvm_inputs) const;

/*! \brief Transform integer output using CategoricalString output
Expand Down
2 changes: 1 addition & 1 deletion include/dlr_hexagon/dlr_hexagon.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class HexagonModel : public DLRModel {
public:
/*! \brief Load model files from given folder path.
*/
explicit HexagonModel(const std::vector<std::string>& files, const DLContext& ctx,
explicit HexagonModel(const std::vector<std::string>& files, const DLDevice& dev,
const int debug_level);
~HexagonModel();

Expand Down
5 changes: 2 additions & 3 deletions include/dlr_pipeline.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#ifndef DLR_PIPELINE_H_
#define DLR_PIPELINE_H_

#include <graph/graph_runtime.h>
#include <tvm/runtime/memory.h>

#include "dlr_common.h"
Expand All @@ -27,8 +26,8 @@ class DLR_DLL PipelineModel : public DLRModel {
public:
/*! \brief Load model files from given folder path.
*/
explicit PipelineModel(const std::vector<DLRModelPtr>& dlr_models, const DLContext& ctx)
: DLRModel(ctx, DLRBackend::kPIPELINE), dlr_models_(dlr_models) {
explicit PipelineModel(const std::vector<DLRModelPtr>& dlr_models, const DLDevice& dev)
: DLRModel(dev, DLRBackend::kPIPELINE), dlr_models_(dlr_models) {
SetupPipelineModel();
}

Expand Down
11 changes: 6 additions & 5 deletions include/dlr_relayvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
#define DLR_RELAYVM_H_

#include <dlpack/dlpack.h>
#include <tvm/runtime/container.h>
#include <tvm/runtime/container/adt.h>
#include <tvm/runtime/container/shape_tuple.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/ndarray.h>
#include <tvm/runtime/object.h>
Expand Down Expand Up @@ -62,15 +63,15 @@ class DLR_DLL RelayVMModel : public DLRModel {
DLDataType GetInputDLDataType(int index);

public:
explicit RelayVMModel(const std::vector<std::string>& files, const DLContext& ctx)
: DLRModel(ctx, DLRBackend::kRELAYVM),
explicit RelayVMModel(const std::vector<std::string>& files, const DLDevice& dev)
: DLRModel(dev, DLRBackend::kRELAYVM),
allocator_type_(tvm::runtime::vm::AllocatorType::kPooled) {
SetupVMModule(files);
FetchInputNodesData();
FetchOutputNodesData();
}
explicit RelayVMModel(std::vector<DLRModelElem> model_elems, const DLContext& ctx)
: DLRModel(ctx, DLRBackend::kRELAYVM),
explicit RelayVMModel(std::vector<DLRModelElem> model_elems, const DLDevice& dev)
: DLRModel(dev, DLRBackend::kRELAYVM),
allocator_type_(tvm::runtime::vm::AllocatorType::kPooled) {
SetupVMModule(model_elems);
FetchInputNodesData();
Expand Down
6 changes: 3 additions & 3 deletions include/dlr_treelite.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ struct TreeliteInput {
std::vector<size_t, DLRAllocator<size_t>> row_ptr;
size_t num_row;
size_t num_col;
CSRBatchHandle handle = nullptr;
DMatrixHandle handle = nullptr;
~TreeliteInput();
};

Expand Down Expand Up @@ -58,8 +58,8 @@ class DLR_DLL TreeliteModel : public DLRModel {
public:
/*! \brief Load model files from given folder path.
*/
explicit TreeliteModel(const std::vector<std::string>& files, const DLContext& ctx)
: DLRModel(ctx, DLRBackend::kTREELITE) {
explicit TreeliteModel(const std::vector<std::string>& files, const DLDevice& dev)
: DLRModel(dev, DLRBackend::kTREELITE) {
SetupTreeliteModule(files);
}

Expand Down
Loading

0 comments on commit 95c0559

Please sign in to comment.