Skip to content

Commit

Permalink
Add Python API for specifying device options. (microsoft#4205)
Browse files Browse the repository at this point in the history
* Add python API for specifying CUDA device id

* Modification for providing session based python api for specifying
device id

* When include header file pybind11/stl.h, conversion between c++
containers and Python list, vector and dict data structure are
automatically enabled.

https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html#

Therefore, refactor the code for better leverage this advantage.

* Make struct CudaDeviceOptions as default cuda device options

* Implement sess.set_providers(list_of_providers, list_of_provider_option_dicts)

But still stay consistent with existing sess.set_providers(list_of_provider)

* Add cuda provider option default setting

* Add support for setting cuda cuda_mem_limit and arena_extend_strategy.
Also resolved the merge conflict on session.py

* Use python ctypes to call cuda library to help python unittest

* Refine the code with reviewer's suggestions

* Add the capability of getting execution provider's configuration

- Once we introduced the capability to set execution provider's
configuration, it makes sense to add capability of getting ep's configuration.

* Modify the code with reviewer's suggestions.

* Using stoull() and stoul() depends on 32/64-bits architecture.

* Rewrite the testcases for testing setting CUDA device id

Note: We need to make sure every ORT process be run on one CUDA device
at a time.

* Make sure old session object is destroyed by python gc before new
session object is being created

* Move testcases to original onnxruntime_test_python.py

* Fix bugs to pass CI build

* Make it pass CI build (cont.)

* Make it pass CI build (cont.)
  • Loading branch information
chilo-ms authored Jul 21, 2020
1 parent e11629d commit affdeb5
Show file tree
Hide file tree
Showing 11 changed files with 526 additions and 7 deletions.
3 changes: 3 additions & 0 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ if (MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
target_compile_options(onnxruntime_pybind11_state PRIVATE "/wd4244")
endif()
target_include_directories(onnxruntime_pybind11_state PRIVATE ${ONNXRUNTIME_ROOT} ${PYTHON_INCLUDE_DIR} ${NUMPY_INCLUDE_DIR} ${pybind11_INCLUDE_DIRS})
if(onnxruntime_USE_CUDA)
target_include_directories(onnxruntime_pybind11_state PRIVATE ${onnxruntime_CUDNN_HOME}/include)
endif()
if (onnxruntime_ENABLE_TRAINING)
target_include_directories(onnxruntime_pybind11_state PRIVATE ${ORTTRAINING_ROOT})
endif()
Expand Down
21 changes: 21 additions & 0 deletions include/onnxruntime/core/framework/execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ using CreateFunctionStateFunc = std::function<int(ComputeContext*, FunctionState
using ComputeFunc = std::function<Status(FunctionState, const OrtApi*, OrtKernelContext*)>;
using DestroyFunctionStateFunc = std::function<void(FunctionState)>;

//unordered maps
using UnorderedMapStringToString = std::unordered_map<std::string, std::string>;

//data types for execution provider options
using ProviderOptionsVector = std::vector<UnorderedMapStringToString>;
using ProviderOptionsMap = std::unordered_map<std::string, UnorderedMapStringToString>;

struct NodeComputeInfo {
CreateFunctionStateFunc create_state_func;
ComputeFunc compute_func;
Expand Down Expand Up @@ -98,6 +105,18 @@ class IExecutionProvider {
*/
virtual int GetDeviceId() const { return -1; };

/**
Get execution provider's configurations.
*/
const UnorderedMapStringToString& GetProviderOptions() const { return provider_options_; }

/**
Store execution provider's configurations.
*/
void SetProviderOptions(UnorderedMapStringToString& options) {
provider_options_ = options;
}

/**
Returns an opaque handle whose exact type varies based on the provider
and is interpreted accordingly by the corresponding kernel implementation.
Expand Down Expand Up @@ -179,5 +198,7 @@ class IExecutionProvider {
// convenience list of the allocators so GetAllocatorList doesn't have to build a new vector each time
// contains the same instances as allocators_
std::vector<AllocatorPtr> allocator_list_;
// It will be set when constructor is being called
UnorderedMapStringToString provider_options_;
};
} // namespace onnxruntime
5 changes: 5 additions & 0 deletions onnxruntime/core/framework/execution_providers.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ class ExecutionProviders {

ORT_IGNORE_RETURN_VALUE(provider_idx_map_.insert({provider_id, new_provider_idx}));

// update execution provider options
exec_provider_options_[provider_id] = p_exec_provider->GetProviderOptions();

exec_provider_ids_.push_back(provider_id);
exec_providers_.push_back(std::move(p_exec_provider));
return Status::OK();
Expand Down Expand Up @@ -66,6 +69,7 @@ class ExecutionProviders {
}

const std::vector<std::string>& GetIds() const { return exec_provider_ids_; }
const ProviderOptionsMap& GetAllProviderOptions() const { return exec_provider_options_; }

private:
// Some compilers emit incomprehensive output if this is allowed
Expand All @@ -74,6 +78,7 @@ class ExecutionProviders {

std::vector<std::unique_ptr<IExecutionProvider>> exec_providers_;
std::vector<std::string> exec_provider_ids_;
ProviderOptionsMap exec_provider_options_;

// maps for fast lookup of an index into exec_providers_
std::unordered_map<std::string, size_t> provider_idx_map_;
Expand Down
27 changes: 27 additions & 0 deletions onnxruntime/core/providers/cuda/cuda_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,31 @@ CUDAExecutionProvider::PerThreadContext::~PerThreadContext() {
}
}

/*
* This method should be called within the constructor,
* so that the configuration of provider related setting can be updated
* and kept at IExecutionProvider level.
*/
void CUDAExecutionProvider::UpdateProviderOptionsInfo() {
UnorderedMapStringToString options;

options["device_id"] = std::to_string(device_id_);
options["cuda_mem_limit"] = std::to_string(cuda_mem_limit_);
std::string strategy;
if (arena_extend_strategy_ == ArenaExtendStrategy::kNextPowerOfTwo) {
strategy = "kNextPowerOfTwo";
}
else if (arena_extend_strategy_ == ArenaExtendStrategy::kSameAsRequested) {
strategy = "kSameAsRequested";
}
else {
strategy = "unknown";
}
options["arena_extend_strategy"] = strategy;

IExecutionProvider::SetProviderOptions(options);
}

CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& info)
: IExecutionProvider{onnxruntime::kCudaExecutionProvider},
device_id_(info.device_id),
Expand Down Expand Up @@ -142,6 +167,8 @@ CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& in
std::numeric_limits<size_t>::max()});

InsertAllocator(CreateAllocator(cpu_memory_info, CPU_ALLOCATOR_DEVICE_ID));

UpdateProviderOptionsInfo();
}

CUDAExecutionProvider::~CUDAExecutionProvider() {
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/cuda/cuda_execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class CUDAExecutionProvider : public IExecutionProvider {

int GetDeviceId() const { return device_id_; }
const cudaDeviceProp& GetDeviceProp() const { return device_prop_; };
void UpdateProviderOptionsInfo();

private:
OrtDevice::DeviceId device_id_;
Expand Down
25 changes: 25 additions & 0 deletions onnxruntime/core/providers/cuda/cuda_provider_options.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

namespace onnxruntime {

/**
* Configuration information for a cuda provider.
*
* Note: This struct is currently for internal use for Python API,
* not for C/C++/C#...APIs.
*/
struct CudaProviderOptions {

// use cuda device with id=0 as default device.
OrtDevice::DeviceId device_id = 0;

// set default cuda memory limitation to maximum finite value of size_t.
size_t cuda_mem_limit = std::numeric_limits<size_t>::max();

// set default area extend strategy to KNextPowerOfTwo.
onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo;
};
} // namespace onnxruntime
4 changes: 4 additions & 0 deletions onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,10 @@ const std::vector<std::string>& InferenceSession::GetRegisteredProviderTypes() c
return execution_providers_.GetIds();
}

const ProviderOptionsMap& InferenceSession::GetAllProviderOptions() const {
return execution_providers_.GetAllProviderOptions();
}

const SessionOptions& InferenceSession::GetSessionOptions() const {
return session_options_;
}
Expand Down
12 changes: 12 additions & 0 deletions onnxruntime/core/session/inference_session.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,10 +319,17 @@ class InferenceSession {
*/
const SessionOptions& GetSessionOptions() const;


/*
* Get the DataTransferManager associated with this session
*/
const DataTransferManager& GetDataTransferManager() const;

/*
* Get all the providers' options this session was initialized with.
*/
const ProviderOptionsMap& GetAllProviderOptions() const;


/**
* Start profiling on this inference session. This simply turns on profiling events to be
Expand All @@ -346,6 +353,11 @@ class InferenceSession {
*/
std::string EndProfiling();

/*
* Get InferenceSession logger.
*/
const logging::Logger* GetLogger() const { return session_logger_; };

protected:
/**
* Load an ONNX model.
Expand Down
Loading

0 comments on commit affdeb5

Please sign in to comment.