Add Python API for specifying device options. (microsoft#4205)

* Add python API for specifying CUDA device id * Modification for providing session based python api for specifying device id * When include header file pybind11/stl.h, conversion between c++ containers and Python list, vector and dict data structure are automatically enabled. https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html# Therefore, refactor the code for better leverage this advantage. * Make struct CudaDeviceOptions as default cuda device options * Implement sess.set_providers(list_of_providers, list_of_provider_option_dicts) But still stay consistent with existing sess.set_providers(list_of_provider) * Add cuda provider option default setting * Add support for setting cuda cuda_mem_limit and arena_extend_strategy. Also resolved the merge conflict on session.py * Use python ctypes to call cuda library to help python unittest * Refine the code with reviewer's suggestions * Add the capability of getting execution provider's configuration - Once we introduced the capability to set execution provider's configuration, it makes sense to add capability of getting ep's configuration. * Modify the code with reviewer's suggestions. * Using stoull() and stoul() depends on 32/64-bits architecture. * Rewrite the testcases for testing setting CUDA device id Note: We need to make sure every ORT process be run on one CUDA device at a time. * Make sure old session object is destroyed by python gc before new session object is being created * Move testcases to original onnxruntime_test_python.py * Fix bugs to pass CI build * Make it pass CI build (cont.) * Make it pass CI build (cont.)
chihming · Jul 21, 2020 · affdeb5 · affdeb5
1 parent e11629d
commit affdeb5
Show file tree

Hide file tree

Showing 11 changed files with 526 additions and 7 deletions.
diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake
@@ -67,6 +67,9 @@ if (MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
     target_compile_options(onnxruntime_pybind11_state PRIVATE "/wd4244")
 endif()
 target_include_directories(onnxruntime_pybind11_state PRIVATE ${ONNXRUNTIME_ROOT} ${PYTHON_INCLUDE_DIR} ${NUMPY_INCLUDE_DIR} ${pybind11_INCLUDE_DIRS})
+if(onnxruntime_USE_CUDA)
+    target_include_directories(onnxruntime_pybind11_state PRIVATE ${onnxruntime_CUDNN_HOME}/include)
+endif()
 if (onnxruntime_ENABLE_TRAINING)
   target_include_directories(onnxruntime_pybind11_state PRIVATE ${ORTTRAINING_ROOT})
 endif()

diff --git a/include/onnxruntime/core/framework/execution_provider.h b/include/onnxruntime/core/framework/execution_provider.h
@@ -30,6 +30,13 @@ using CreateFunctionStateFunc = std::function<int(ComputeContext*, FunctionState
 using ComputeFunc = std::function<Status(FunctionState, const OrtApi*, OrtKernelContext*)>;
 using DestroyFunctionStateFunc = std::function<void(FunctionState)>;
 
+//unordered maps
+using UnorderedMapStringToString = std::unordered_map<std::string, std::string>;
+
+//data types for execution provider options
+using ProviderOptionsVector = std::vector<UnorderedMapStringToString>;  
+using ProviderOptionsMap = std::unordered_map<std::string, UnorderedMapStringToString>;  
+
 struct NodeComputeInfo {
   CreateFunctionStateFunc create_state_func;
   ComputeFunc compute_func;
@@ -98,6 +105,18 @@ class IExecutionProvider {
   */
   virtual int GetDeviceId() const { return -1; };
 
+  /**
+     Get execution provider's configurations. 
+   */
+  const UnorderedMapStringToString& GetProviderOptions() const { return provider_options_; }
+
+  /**
+     Store execution provider's configurations. 
+   */
+  void SetProviderOptions(UnorderedMapStringToString& options) { 
+    provider_options_ = options;
+  }
+
   /**
      Returns an opaque handle whose exact type varies based on the provider
      and is interpreted accordingly by the corresponding kernel implementation.
@@ -179,5 +198,7 @@ class IExecutionProvider {
   // convenience list of the allocators so GetAllocatorList doesn't have to build a new vector each time
   // contains the same instances as allocators_
   std::vector<AllocatorPtr> allocator_list_;
+  // It will be set when constructor is being called
+  UnorderedMapStringToString provider_options_;
 };
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/framework/execution_providers.h b/onnxruntime/core/framework/execution_providers.h
@@ -35,6 +35,9 @@ class ExecutionProviders {
 
     ORT_IGNORE_RETURN_VALUE(provider_idx_map_.insert({provider_id, new_provider_idx}));
 
+    // update execution provider options
+    exec_provider_options_[provider_id] = p_exec_provider->GetProviderOptions();
+
     exec_provider_ids_.push_back(provider_id);
     exec_providers_.push_back(std::move(p_exec_provider));
     return Status::OK();
@@ -66,6 +69,7 @@ class ExecutionProviders {
   }
 
   const std::vector<std::string>& GetIds() const { return exec_provider_ids_; }
+  const ProviderOptionsMap& GetAllProviderOptions() const { return exec_provider_options_; }
 
  private:
   // Some compilers emit incomprehensive output if this is allowed
@@ -74,6 +78,7 @@ class ExecutionProviders {
 
   std::vector<std::unique_ptr<IExecutionProvider>> exec_providers_;
   std::vector<std::string> exec_provider_ids_;
+  ProviderOptionsMap exec_provider_options_;
 
   // maps for fast lookup of an index into exec_providers_
   std::unordered_map<std::string, size_t> provider_idx_map_;

diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
@@ -97,6 +97,31 @@ CUDAExecutionProvider::PerThreadContext::~PerThreadContext() {
   }
 }
 
+/*
+ * This method should be called within the constructor,
+ * so that the configuration of provider related setting can be updated 
+ * and kept at IExecutionProvider level.
+ */
+void CUDAExecutionProvider::UpdateProviderOptionsInfo() {
+  UnorderedMapStringToString options;
+
+  options["device_id"] = std::to_string(device_id_); 
+  options["cuda_mem_limit"] = std::to_string(cuda_mem_limit_); 
+  std::string strategy;
+  if (arena_extend_strategy_ == ArenaExtendStrategy::kNextPowerOfTwo) {
+    strategy = "kNextPowerOfTwo";
+  }
+  else if (arena_extend_strategy_ == ArenaExtendStrategy::kSameAsRequested) {
+    strategy = "kSameAsRequested";
+  }
+  else {
+    strategy = "unknown"; 
+  }
+  options["arena_extend_strategy"] = strategy; 
+
+  IExecutionProvider::SetProviderOptions(options);
+}
+
 CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& info)
     : IExecutionProvider{onnxruntime::kCudaExecutionProvider},
       device_id_(info.device_id),
@@ -142,6 +167,8 @@ CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& in
        std::numeric_limits<size_t>::max()});
 
   InsertAllocator(CreateAllocator(cpu_memory_info, CPU_ALLOCATOR_DEVICE_ID));
+
+  UpdateProviderOptionsInfo();
 }
 
 CUDAExecutionProvider::~CUDAExecutionProvider() {

diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.h b/onnxruntime/core/providers/cuda/cuda_execution_provider.h
@@ -80,6 +80,7 @@ class CUDAExecutionProvider : public IExecutionProvider {
 
   int GetDeviceId() const { return device_id_; }
   const cudaDeviceProp& GetDeviceProp() const { return device_prop_; };
+  void UpdateProviderOptionsInfo();
 
  private:
   OrtDevice::DeviceId device_id_;

diff --git a/onnxruntime/core/providers/cuda/cuda_provider_options.h b/onnxruntime/core/providers/cuda/cuda_provider_options.h
@@ -0,0 +1,25 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+namespace onnxruntime {
+
+/**
+  * Configuration information for a cuda provider.
+  *
+  * Note: This struct is currently for internal use for Python API,
+  *       not for C/C++/C#...APIs. 
+  */
+struct CudaProviderOptions {
+
+  // use cuda device with id=0 as default device.
+  OrtDevice::DeviceId device_id = 0;
+
+  // set default cuda memory limitation to maximum finite value of size_t.
+  size_t cuda_mem_limit = std::numeric_limits<size_t>::max();
+
+  // set default area extend strategy to KNextPowerOfTwo.
+  onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo;
+};
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
@@ -987,6 +987,10 @@ const std::vector<std::string>& InferenceSession::GetRegisteredProviderTypes() c
   return execution_providers_.GetIds();
 }
 
+const ProviderOptionsMap& InferenceSession::GetAllProviderOptions() const {
+  return execution_providers_.GetAllProviderOptions();
+}
+
 const SessionOptions& InferenceSession::GetSessionOptions() const {
   return session_options_;
 }

diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h
@@ -319,10 +319,17 @@ class InferenceSession {
    */
   const SessionOptions& GetSessionOptions() const;
 
+
   /*
    * Get the DataTransferManager associated with this session
    */
   const DataTransferManager& GetDataTransferManager() const;
+
+  /*
+   * Get all the providers' options this session was initialized with.
+   */
+  const ProviderOptionsMap& GetAllProviderOptions() const;
+
 
   /**
     * Start profiling on this inference session. This simply turns on profiling events to be
@@ -346,6 +353,11 @@ class InferenceSession {
     */
   std::string EndProfiling();
 
+  /*
+   * Get InferenceSession logger.
+   */
+  const logging::Logger* GetLogger() const { return session_logger_; };
+
  protected:
   /**
     * Load an ONNX model.