Main merged release 1.10.0 (#395)

* Changes for TVM 1.10 Fix TRT define * Initial changes for treelite 1.x apis * Update versions * Update TVM submodule and cmake dlr version * Fixes after TVM update * Use MLAS, OPENMP, MKL * Allows user to configure RelayVM allocator via env var or metadata file (#361) * Allows user to configure RelayVM allocator via env var or metadata file * Add unit tests. Rename env var * Fix lint * Update TVM submodule * Update TVM submodule * Update: bring new commits from TVM 1.10.0-rc (#364) * Update TVM submodule (#368) * Update: make dmlc build statically into dlr * Update: bring new commits from TVM 1.10.0-rc * Fix invalid operands to binary expression in shape comparison * Use gtest EXPECT_EQ instead of TVM CHECK_EQ in dlr_data_transform_test.cc * Use std::equal for tvm::runtime::ShapeTuple comparison (#386) * Fix: update vm artifacts to pass unit test. (#389) * Use branch release-1.10.0 for git-clang-format.sh (#390) * Use branch release-1.10.0 for git-clang-format.sh * Upgrade cmake for CI builds to 3.21.4 * Fix: treelite unit test (#391) * Temporarily disable dlr_pipeline_skl_xgb_test (#392) * Fix: integration test: load_and_run_treelite_model.py (#393) * Enable treelite integration test for mnist-1.10.0 model (#394) Co-authored-by: Trevor Morris <[email protected]> Co-authored-by: Alexander Pivovarov <[email protected]>
neo-ai · Jan 28, 2022 · 95c0559 · 95c0559
1 parent f5df876
commit 95c0559
Show file tree

Hide file tree

Showing 32 changed files with 282 additions and 244 deletions.
diff --git a/3rdparty/treelite b/3rdparty/treelite
diff --git a/3rdparty/tvm b/3rdparty/tvm
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -17,7 +17,7 @@ if(ANDROID_BUILD)
     endif()
 endif(ANDROID_BUILD)
 
-project(dlr VERSION 1.9.0 LANGUAGES C CXX)
+project(dlr VERSION 1.10.0 LANGUAGES C CXX)
 
 message(STATUS "dlr version: ${dlr_VERSION}")
 
@@ -77,7 +77,14 @@ option(TEST_COVERAGE "C++ test coverage" OFF)
 # Compiler flags
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
+# --exclude-libs is not available on Windows and macOS. As such, Windows and
+# Mac do not support the creation of multiple DLRModel instances (in Python) in
+# case model folders have their own libdlr.so.
+if (WIN32 OR APPLE)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+else()
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--exclude-libs,ALL")
+endif()
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funroll-loops")
 set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}")
@@ -102,7 +109,6 @@ include_directories("${TVM_SRC}/src/runtime")
 include_directories("${DLPACK_SRC}/include")
 include_directories("${DMLC_CORE_SRC}/include")
 include_directories("${TREELITE_SRC}/include")
-include_directories("${TREELITE_SRC}/runtime/native/include")
 include_directories("${PROJECT_SOURCE_DIR}/include")
 include_directories("${JSON_SRC}")
 
@@ -141,7 +147,7 @@ endif(USE_OPENCL)
 
 if(USE_CUDA)
     message("USING CUDA")
-    find_cuda(${USE_CUDA})
+    find_cuda(${USE_CUDA} ${USE_CUDNN})
     if(NOT CUDA_FOUND)
         message(FATAL_ERROR "CUDA not found, please specify CUDA location with -DUSE_CUDA=/path/to/cuda/")
     endif(NOT CUDA_FOUND)
@@ -223,7 +229,7 @@ if(USE_TENSORRT)
     list(APPEND DLR_SRC ${RUNTIME_TENSORRT_SRCS})
 
     # Set defines
-    add_definitions(-DTVM_GRAPH_RUNTIME_TENSORRT)
+    add_definitions(-DTVM_GRAPH_EXECUTOR_TENSORRT)
 
     set(USE_TENSORRT OFF)
 endif()
@@ -233,7 +239,7 @@ if(WITH_HEXAGON)
 endif()
 
 if(ENABLE_DATATRANSFORM)
-    add_definitions(-DENABLE_DATATRANSFORM) 
+    add_definitions(-DENABLE_DATATRANSFORM)
 endif()
 
 if(AAR_BUILD)
@@ -259,23 +265,32 @@ if(USE_MKL)
 endif()
 
 if(USE_MLAS)
-    message(STATUS "Build with MLAS library")
-    if (NOT (USE_OPENMP STREQUAL "gnu" OR USE_OPENMP STREQUAL "intel"))
-        message(FATAL_ERROR "MLAS library must be built with USE_OPENMP=gnu or USE_OPENMP=intel")
-    endif()
-    file(GLOB RUNTIME_MLAS_SRCS ${TVM_SRC}/src/runtime/contrib/mlas/*.cc)
-    list(APPEND DLR_SRC ${RUNTIME_MLAS_SRCS})
-    list(APPEND DLR_LINKER_LIBS onnxruntime_mlas_static)
-    include_directories(${TVM_SRC}/3rdparty/mlas/inc)
+  message(STATUS "Build with MLAS library")
+  if (NOT (USE_OPENMP STREQUAL "gnu" OR USE_OPENMP STREQUAL "intel"))
+    message(FATAL_ERROR "MLAS library must be built with USE_OPENMP=gnu or USE_OPENMP=intel")
+  endif()
+  file(GLOB RUNTIME_MLAS_SRCS ${TVM_SRC}/src/runtime/contrib/mlas/*.cc)
+  list(APPEND DLR_SRC ${RUNTIME_MLAS_SRCS})
+  list(APPEND DLR_LINKER_LIBS onnxruntime_mlas_static)
+  include_directories(${TVM_SRC}/3rdparty/mlas/inc)
 endif()
 
 set(MAIN_EXEC "")
 FILE(GLOB MAIN_SRC src/*.cc)
 
+set(USE_LIBBACKTRACE OFF)
 add_subdirectory(${TVM_SRC} EXCLUDE_FROM_ALL)
+set(BUILD_SHARED_LIBS_SAVED "${BUILD_SHARED_LIBS}") # Save BUILD_SHARED_LIBS
+set(BUILD_STATIC_LIBS ON)
+set(BUILD_SHARED_LIBS OFF)
 add_subdirectory(${TREELITE_SRC} EXCLUDE_FROM_ALL)
+set(BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS_SAVED}")  # Restore BUILD_SHARED_LIBS
+set(BUILD_STATIC_LIBS OFF)
 add_library(objdlr OBJECT ${DLR_SRC})
 
+target_compile_definitions(objdlr PUBLIC DMLC_USE_LOGGING_LIBRARY=<tvm/runtime/logging.h>)
+target_compile_definitions(objdlr PRIVATE TVM_USE_LIBBACKTRACE=0)
+
 #shared_library
 find_package(Threads)
 set(THREADS_PREFER_PTHREAD_FLAG TRUE)
@@ -333,6 +348,9 @@ install(EXPORT dlrTargets
   FILE dlrTargets.cmake
   DESTINATION lib/cmake/dlr
 )
+if(USE_MLAS)
+  install(TARGETS onnxruntime_mlas_static EXPORT dlrTargets)
+endif()
 
 include(CMakePackageConfigHelpers)
 # generate the config file that is includes the exports
@@ -373,7 +391,7 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
   file(GLOB TEST_SRCS tests/cpp/*.cc)
 
   if(NOT(ENABLE_DATATRANSFORM))
-    list(REMOVE_ITEM TEST_SRCS 
+    list(REMOVE_ITEM TEST_SRCS
         ${CMAKE_CURRENT_SOURCE_DIR}/tests/cpp/dlr_data_transform_test.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/tests/cpp/dlr_pipeline_skl_xgb_test.cc)
   endif()
@@ -386,6 +404,8 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
     get_filename_component(__srcname ${__srcpath} NAME)
     string(REPLACE ".cc" "" __execname ${__srcname})
     add_executable(${__execname} ${__srcpath})
+    target_compile_definitions(${__execname} PUBLIC DMLC_USE_LOGGING_LIBRARY=<tvm/runtime/logging.h>)
+    target_compile_definitions(${__execname} PRIVATE TVM_USE_LIBBACKTRACE=0)
     target_link_libraries(${__execname} dlr gtest_main)
     set_output_directory(${__execname} ${CMAKE_BINARY_DIR})
     add_test(NAME ${__execname} COMMAND ${__execname})
@@ -397,7 +417,7 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
     get_filename_component(__srcname ${__srcpath} NAME)
     string(REPLACE ".cc" "" __execname ${__srcname})
     add_executable(${__execname} ${__srcpath})
-    target_link_libraries(${__execname} gtest_main dl) 
+    target_link_libraries(${__execname} gtest_main dl)
     set_output_directory(${__execname} ${CMAKE_BINARY_DIR})
     add_test(NAME ${__execname} COMMAND ${__execname})
     message(STATUS "Added dlsym Test: " ${__execname})
@@ -412,7 +432,7 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
     e35e82f3371bed37caa7ecece417f50876414077
   )
   endif()
-  
+
   set(STREET_IMAGE ${CMAKE_CURRENT_BINARY_DIR}/street_small.npy)
   if(NOT EXISTS ${STREET_IMAGE})
   download_file(
@@ -422,7 +442,7 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
     206d1a12646a5fe43d0877d38bd137c70a5adc3b
   )
   endif()
-  
+
   # Download compiled model for unit tests
   set(RESNET_MODEL ${CMAKE_CURRENT_BINARY_DIR}/resnet_v1_5_50)
   if(NOT IS_DIRECTORY ${RESNET_MODEL})
@@ -443,26 +463,26 @@ if(DLR_BUILD_TESTS AND NOT(AAR_BUILD))
   if(NOT IS_DIRECTORY ${XGBOOST_TEST_MODEL})
     file(MAKE_DIRECTORY ${XGBOOST_TEST_MODEL})
     download_file(
-      https://neo-ai-dlr-test-artifacts.s3-us-west-2.amazonaws.com/compiled-models/xgboost_test.tar.gz
-      /tmp/xgboost_test.tar.gz
+      https://neo-ai-dlr-test-artifacts.s3-us-west-2.amazonaws.com/compiled-models/release-1.10.0/xgboost-ml_m5.tar.gz
+      /tmp/xgboost-ml_m5.tar.gz
       SHA1
-      4c8ac5f20db6c7c6d674dd2ac9d9e14ce887281e
+      1e45bb9d6108d70ac4ff37855cf13061d61ef742
     )
     # this is OS-agnostic
-    execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf /tmp/xgboost_test.tar.gz
+    execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf /tmp/xgboost-ml_m5.tar.gz
                     WORKING_DIRECTORY ${XGBOOST_TEST_MODEL})
-    file(REMOVE /tmp/xgboost_test.tar.gz)
+    file(REMOVE /tmp/xgboost-ml_m5.tar.gz)
   endif()
 
   set(RELAYVM_MODEL ssd_mobilenet_v1_ppn_shared_box_predictor_300x300_coco14_sync_2018_07_03-LINUX_X86_64.tar.gz)
   set(RELAYVM_MODEL_DIR ${CMAKE_CURRENT_BINARY_DIR}/ssd_mobilenet_v1)
   if(NOT IS_DIRECTORY ${RELAYVM_MODEL_DIR})
     file(MAKE_DIRECTORY ${RELAYVM_MODEL_DIR})
     download_file(
-      https://neo-ai-dlr-test-artifacts.s3-us-west-2.amazonaws.com/compiled-models/release-1.5.0/${RELAYVM_MODEL}
+      https://neo-ai-dlr-test-artifacts.s3-us-west-2.amazonaws.com/compiled-models/release-1.10.0/${RELAYVM_MODEL}
       /tmp/${RELAYVM_MODEL}
       SHA1
-      49ddd9e815c6cc14ef0e9a594c8c2d0d129e5e91
+      38111e6432d643122ebf6ed5493e415871dc3fa5
     )
     # this is OS-agnostic
     execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf /tmp/${RELAYVM_MODEL}

diff --git a/Jenkinsfile b/Jenkinsfile
@@ -52,7 +52,7 @@ pipeline {
         unstash name: 'srcs'
         sh """
         tests/ci_build/git-clang-format.sh HEAD~1
-        tests/ci_build/git-clang-format.sh origin/main
+        tests/ci_build/git-clang-format.sh origin/$CHANGE_TARGET
         """
       }
     }

diff --git a/include/dlr.h b/include/dlr.h
@@ -19,7 +19,7 @@ extern "C" {  // Open extern "C" block
 /*! \brief major version */
 #define DLR_MAJOR 1
 /*! \brief minor version */
-#define DLR_MINOR 9
+#define DLR_MINOR 10
 /*! \brief patch version */
 #define DLR_PATCH 1
 /*! \brief DLR version */
@@ -198,7 +198,7 @@ int SetDLRInput(DLRModelHandle* handle, const char* name, const int64_t* shape,
 
 /*!
  * \brief Sets the input according the node name from existing DLTensor. Can only be
- *        used with TVM models (GraphRuntime and VMRuntime)
+ *        used with TVM models (GraphExecutor and VMRuntime)
  * \param handle The model handle returned from CreateDLRModel().
  * \param name The input node name.
  * \param tensor The input DLTensor.
@@ -209,7 +209,7 @@ int SetDLRInputTensor(DLRModelHandle* handle, const char* name, void* tensor);
 
 /*!
  * \brief Sets the input from existing DLTensor without copying data. Can only be
- *        used with TVM models (GraphRuntime). Input tensor device must match the device of the
+ *        used with TVM models (GraphExecutor). Input tensor device must match the device of the
  *        model, and data must be alligned to 128 bytes. GetDLRInput cannot be used for inputs set
  *        via SetDLRInputZeroCopy.
  * \param handle The model handle returned from CreateDLRModel().
@@ -289,7 +289,7 @@ int GetDLROutputPtr(DLRModelHandle* handle, int index, const void** out);
 
 /*!
  * \brief Gets the index-th output from the model and copies it into the given DLTensor.
- *        Can only be used with TVM models (GraphRuntime and VMRuntime)
+ *        Can only be used with TVM models (GraphExecutor and VMRuntime)
  * \param handle The model handle returned from CreateDLRModel().
  * \param index The index-th output.
  * \param tensor The pointer to an existing/allocated DLTensor to copy the output into.
@@ -300,7 +300,7 @@ int GetDLROutputTensor(DLRModelHandle* handle, int index, void* tensor);
 
 /*!
  * \brief Gets the index-th output from the model and sets the pointer to it.
- *        Can only be used with TVM models (GraphRuntime and VMRuntime)
+ *        Can only be used with TVM models (GraphExecutor and VMRuntime)
  * \param handle The model handle returned from CreateDLRModel().
  * \param index The index-th output.
  * \param tensor The pointer to an unallocated DLManagedTensor pointer, will be

diff --git a/include/dlr_common.h b/include/dlr_common.h
@@ -128,15 +128,15 @@ class DLR_DLL DLRModel {
   size_t num_inputs_ = 1;
   size_t num_weights_ = 0;
   size_t num_outputs_ = 1;
-  DLContext ctx_;
+  DLDevice dev_;
   std::vector<std::string> input_names_;
   std::vector<std::string> input_types_;
   std::vector<std::vector<int64_t>> input_shapes_;
   virtual void ValidateDeviceTypeIfExists();
 
  public:
   nlohmann::json metadata_ = nullptr;
-  DLRModel(const DLContext& ctx, const DLRBackend& backend) : ctx_(ctx), backend_(backend) {}
+  DLRModel(const DLDevice& dev, const DLRBackend& backend) : dev_(dev), backend_(backend) {}
   virtual ~DLRModel() {}
 
   /* Input related functions */

diff --git a/include/dlr_data_transform.h b/include/dlr_data_transform.h
@@ -23,7 +23,7 @@ class DLR_DLL Transformer {
   /*! \brief Helper function for TransformInput. Allocates NDArray to store
    * mapped input data. */
   virtual void InitNDArray(const nlohmann::json& input_json, const nlohmann::json& transform,
-                           DLDataType dtype, DLContext ctx,
+                           DLDataType dtype, DLDevice dev,
                            tvm::runtime::NDArray& input_array) const;
 };
 
@@ -79,7 +79,7 @@ class DLR_DLL DateTimeTransformer : public Transformer {
                     tvm::runtime::NDArray& input_array) const;
 
   void InitNDArray(const nlohmann::json& input_json, const nlohmann::json& transform,
-                   DLDataType dtype, DLContext ctx, tvm::runtime::NDArray& input_array) const;
+                   DLDataType dtype, DLDevice dev, tvm::runtime::NDArray& input_array) const;
 };
 
 class DLR_DLL TextTransformer : public Transformer {
@@ -90,7 +90,7 @@ class DLR_DLL TextTransformer : public Transformer {
                             tvm::runtime::NDArray& input_array) const override;
 
   virtual void InitNDArray(const nlohmann::json& input_json, const nlohmann::json& transform,
-                           DLDataType dtype, DLContext ctx,
+                           DLDataType dtype, DLDevice dev,
                            tvm::runtime::NDArray& input_array) const override;
 
   inline void SetIndex(int idx) const { column_idx_ = idx; };
@@ -150,7 +150,7 @@ class DLR_DLL DataTransform {
    * model input.
    */
   void TransformInput(const nlohmann::json& metadata, const int64_t* shape, const void* input,
-                      int dim, const std::vector<DLDataType>& dtypes, DLContext ctx,
+                      int dim, const std::vector<DLDataType>& dtypes, DLDevice dev,
                       std::vector<tvm::runtime::NDArray>* tvm_inputs) const;
 
   /*! \brief Transform integer output using CategoricalString output

diff --git a/include/dlr_hexagon/dlr_hexagon.h b/include/dlr_hexagon/dlr_hexagon.h
@@ -46,7 +46,7 @@ class HexagonModel : public DLRModel {
  public:
   /*! \brief Load model files from given folder path.
    */
-  explicit HexagonModel(const std::vector<std::string>& files, const DLContext& ctx,
+  explicit HexagonModel(const std::vector<std::string>& files, const DLDevice& dev,
                         const int debug_level);
   ~HexagonModel();
 

diff --git a/include/dlr_pipeline.h b/include/dlr_pipeline.h
@@ -1,7 +1,6 @@
 #ifndef DLR_PIPELINE_H_
 #define DLR_PIPELINE_H_
 
-#include <graph/graph_runtime.h>
 #include <tvm/runtime/memory.h>
 
 #include "dlr_common.h"
@@ -27,8 +26,8 @@ class DLR_DLL PipelineModel : public DLRModel {
  public:
   /*! \brief Load model files from given folder path.
    */
-  explicit PipelineModel(const std::vector<DLRModelPtr>& dlr_models, const DLContext& ctx)
-      : DLRModel(ctx, DLRBackend::kPIPELINE), dlr_models_(dlr_models) {
+  explicit PipelineModel(const std::vector<DLRModelPtr>& dlr_models, const DLDevice& dev)
+      : DLRModel(dev, DLRBackend::kPIPELINE), dlr_models_(dlr_models) {
     SetupPipelineModel();
   }
 

diff --git a/include/dlr_relayvm.h b/include/dlr_relayvm.h
@@ -2,7 +2,8 @@
 #define DLR_RELAYVM_H_
 
 #include <dlpack/dlpack.h>
-#include <tvm/runtime/container.h>
+#include <tvm/runtime/container/adt.h>
+#include <tvm/runtime/container/shape_tuple.h>
 #include <tvm/runtime/module.h>
 #include <tvm/runtime/ndarray.h>
 #include <tvm/runtime/object.h>
@@ -62,15 +63,15 @@ class DLR_DLL RelayVMModel : public DLRModel {
   DLDataType GetInputDLDataType(int index);
 
  public:
-  explicit RelayVMModel(const std::vector<std::string>& files, const DLContext& ctx)
-      : DLRModel(ctx, DLRBackend::kRELAYVM),
+  explicit RelayVMModel(const std::vector<std::string>& files, const DLDevice& dev)
+      : DLRModel(dev, DLRBackend::kRELAYVM),
         allocator_type_(tvm::runtime::vm::AllocatorType::kPooled) {
     SetupVMModule(files);
     FetchInputNodesData();
     FetchOutputNodesData();
   }
-  explicit RelayVMModel(std::vector<DLRModelElem> model_elems, const DLContext& ctx)
-      : DLRModel(ctx, DLRBackend::kRELAYVM),
+  explicit RelayVMModel(std::vector<DLRModelElem> model_elems, const DLDevice& dev)
+      : DLRModel(dev, DLRBackend::kRELAYVM),
         allocator_type_(tvm::runtime::vm::AllocatorType::kPooled) {
     SetupVMModule(model_elems);
     FetchInputNodesData();

diff --git a/include/dlr_treelite.h b/include/dlr_treelite.h
@@ -22,7 +22,7 @@ struct TreeliteInput {
   std::vector<size_t, DLRAllocator<size_t>> row_ptr;
   size_t num_row;
   size_t num_col;
-  CSRBatchHandle handle = nullptr;
+  DMatrixHandle handle = nullptr;
   ~TreeliteInput();
 };
 
@@ -58,8 +58,8 @@ class DLR_DLL TreeliteModel : public DLRModel {
  public:
   /*! \brief Load model files from given folder path.
    */
-  explicit TreeliteModel(const std::vector<std::string>& files, const DLContext& ctx)
-      : DLRModel(ctx, DLRBackend::kTREELITE) {
+  explicit TreeliteModel(const std::vector<std::string>& files, const DLDevice& dev)
+      : DLRModel(dev, DLRBackend::kTREELITE) {
     SetupTreeliteModule(files);
   }