Skip to content

Commit ff608a9

Browse files
bddppqfacebook-github-bot
authored andcommitted
Back out "Revert D10123245: Back out "codemod cuda_gpu_id to device_id"" (pytorch#12232)
Summary: Pull Request resolved: pytorch#12232 Original commit changeset: fca91fea58b7 This adds proper modifications to the DeviceType <->DeviceOption conversion code added in D10033396 Reviewed By: jerryzh168 Differential Revision: D10132473 fbshipit-source-id: 801ef777e2950982cb47b48051b1471a0a91e64b
1 parent 696498d commit ff608a9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+163
-121
lines changed

caffe2/contrib/nccl/cuda_nccl_op_gpu.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ nccl::NCCLExecution getNCCLElements(
1111
// We either do an N-N op, or an N-1 op.
1212
CAFFE_ENFORCE(op->InputSize() == op->OutputSize() || op->OutputSize() == 1);
1313
nccl::NCCLExecution ex;
14-
ex.stream_gpu_id = context.device_id();
14+
ex.stream_gpu_id = context.cuda_gpu_id();
1515
ex.stream = context.cuda_stream();
1616
ex.root = op->template GetSingleArgument<int>("root", 0);
1717
ex.elements.resize(op->InputSize());
@@ -204,7 +204,7 @@ std::pair<std::vector<DeviceOption>, std::vector<DeviceOption>> ncclOpDevInfer(
204204
for (int i = 0; i < def.input().size(); ++i) {
205205
DeviceOption dev;
206206
dev.set_device_type(1);
207-
dev.set_device_id(i);
207+
dev.set_cuda_gpu_id(i);
208208
opt.push_back(dev);
209209
}
210210
return std::make_pair(opt, opt);

caffe2/contrib/nccl/nccl_ops_test.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
def gpu_device(i):
2222
device_option = caffe2_pb2.DeviceOption()
2323
device_option.device_type = caffe2_pb2.CUDA
24-
device_option.device_id = i
24+
device_option.cuda_gpu_id = i
2525
return device_option
2626

2727

caffe2/contrib/prof/prof_dag_net.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ void ProfDAGNet::ValidateOpTensorDevices() {
3333
had_mismatches = true;
3434
LOG(INFO) << "== PERFORMANCE WARNING == \n"
3535
<< " Operator " << node.operator_->debug_def().type()
36-
<< " expects GPU " << mismatch.second.first.device_id()
36+
<< " expects GPU " << mismatch.second.first.cuda_gpu_id()
3737
<< " but tensor [" << mismatch.first << "] is on GPU "
38-
<< mismatch.second.second.device_id();
38+
<< mismatch.second.second.cuda_gpu_id();
3939
}
4040
}
4141
if (!had_mismatches) {

caffe2/contrib/tensorboard/tensorboard_exporter.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def _tf_device(device_option):
177177
if device_option.device_type == caffe2_pb2.CPU:
178178
return "/cpu:*"
179179
if device_option.device_type == caffe2_pb2.CUDA:
180-
return "/gpu:{}".format(device_option.device_id)
180+
return "/gpu:{}".format(device_option.cuda_gpu_id)
181181
raise Exception("Unhandled device", device_option)
182182

183183

caffe2/contrib/warpctc/ctc_ops_test.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,11 @@ def test_ctc_cost_cpu(self):
7979
def test_ctc_cost_gpu(self):
8080
self.verify_cost(
8181
caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA,
82-
device_id=0),
82+
cuda_gpu_id=0),
8383
is_test=False)
8484
self.verify_cost(
8585
caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA,
86-
device_id=0),
86+
cuda_gpu_id=0),
8787
is_test=False,
8888
skip_input_lengths=True)
8989

@@ -99,10 +99,10 @@ def test_ctc_forward_only_cpu(self):
9999
def test_ctc_forward_only_gpu(self):
100100
self.verify_cost(
101101
caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA,
102-
device_id=0),
102+
cuda_gpu_id=0),
103103
is_test=True)
104104
self.verify_cost(
105105
caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA,
106-
device_id=0),
106+
cuda_gpu_id=0),
107107
is_test=True,
108108
skip_input_lengths=True)

caffe2/core/blob_gpu_test.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ TEST(TensorTest, TensorSerializationMultiDevices) {
195195
}
196196
EXPECT_TRUE(tensor_proto.has_device_detail());
197197
EXPECT_EQ(tensor_proto.device_detail().device_type(), PROTO_CUDA);
198-
EXPECT_EQ(tensor_proto.device_detail().device_id(), gpu_id);
198+
EXPECT_EQ(tensor_proto.device_detail().cuda_gpu_id(), gpu_id);
199199
// Test if the restored blob is still of the same device.
200200
blob.Reset();
201201
EXPECT_NO_THROW(DeserializeBlob(serialized, &blob));
@@ -205,7 +205,7 @@ TEST(TensorTest, TensorSerializationMultiDevices) {
205205
// Test if we force the restored blob on a different device, we
206206
// can still get so.
207207
blob.Reset();
208-
proto.mutable_tensor()->mutable_device_detail()->set_device_id(0);
208+
proto.mutable_tensor()->mutable_device_detail()->set_cuda_gpu_id(0);
209209
EXPECT_NO_THROW(DeserializeBlob(proto.SerializeAsString(), &blob));
210210
EXPECT_TRUE(BlobIsTensorType(blob, CUDA));
211211
EXPECT_EQ(GetGPUIDForPointer(blob.Get<TensorCUDA>().data<float>()), 0);

caffe2/core/context_gpu.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ CUDAContext::CUDAContext(const int gpu_id)
256256

257257
CUDAContext::CUDAContext(const DeviceOption& option)
258258
: gpu_id_(
259-
option.has_device_id() ? RectifyGPUID(option.device_id())
259+
option.has_cuda_gpu_id() ? RectifyGPUID(option.cuda_gpu_id())
260260
: CaffeCudaGetDevice()),
261261
random_seed_(
262262
option.has_random_seed() ? option.random_seed()

caffe2/core/context_gpu.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext {
184184
}
185185
}
186186

187-
inline int device_id() const {
187+
inline int cuda_gpu_id() const {
188188
return gpu_id_;
189189
}
190190

@@ -283,7 +283,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext {
283283
}
284284

285285
static bool IsStreamFree(const DeviceOption& option, int stream_id) {
286-
auto stream = CUDAContext::cuda_stream(option.device_id(), stream_id);
286+
auto stream = CUDAContext::cuda_stream(option.cuda_gpu_id(), stream_id);
287287
return cudaStreamQuery(stream) == cudaSuccess;
288288
}
289289

@@ -393,7 +393,7 @@ class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext {
393393

394394
void ExtractDeviceOption(DeviceOption* device, const void* data) override {
395395
device->set_device_type(TypeToProto(GetDeviceType()));
396-
device->set_device_id(GetGPUIDForPointer(data));
396+
device->set_cuda_gpu_id(GetGPUIDForPointer(data));
397397
}
398398

399399
protected:

caffe2/core/cudnn_wrappers.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -122,17 +122,17 @@ class CuDNNWrapper {
122122
void with_cudnn_state(size_t state_idx, F&& f) {
123123
CAFFE_ENFORCE(
124124
state_idx < CAFFE2_COMPILE_TIME_MAX_CUDNN_STATES, "Invalid state_idx");
125-
auto& sync_state = cudnn_states()[context_->device_id()][state_idx];
125+
auto& sync_state = cudnn_states()[context_->cuda_gpu_id()][state_idx];
126126

127-
DeviceGuard dg(context_->device_id());
127+
DeviceGuard dg(context_->cuda_gpu_id());
128128

129129
// We need to serialize execution on the CuDNNState as we can't
130130
// allow multiple threads to race through the cudaEventRecord
131131
// calls (so a worker thread might wait on another worker thread's
132132
// execution)
133133
std::lock_guard<std::mutex> g(sync_state.mutex);
134134
if (!sync_state.state.get()) {
135-
sync_state.state.reset(new CuDNNState(context_->device_id()));
135+
sync_state.state.reset(new CuDNNState(context_->cuda_gpu_id()));
136136
}
137137
CHECK_NOTNULL(sync_state.state.get())->execute(context_->cuda_stream(), f);
138138
}

caffe2/core/event_gpu.cc

+8-8
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,21 @@ namespace caffe2 {
99
struct CudaEventWrapper {
1010
explicit CudaEventWrapper(const DeviceOption& option)
1111
: cuda_stream_(nullptr),
12-
device_id_(option.device_id()),
12+
cuda_gpu_id_(option.cuda_gpu_id()),
1313
status_(EventStatus::EVENT_INITIALIZED) {
1414
CAFFE_ENFORCE(option.device_type(), PROTO_CUDA);
15-
DeviceGuard g(device_id_);
15+
DeviceGuard g(cuda_gpu_id_);
1616
CUDA_ENFORCE(cudaEventCreate(
1717
&cuda_event_, cudaEventDefault | cudaEventDisableTiming));
1818
}
1919
~CudaEventWrapper() {
20-
DeviceGuard g(device_id_);
20+
DeviceGuard g(cuda_gpu_id_);
2121
CUDA_CHECK(cudaEventDestroy(cuda_event_));
2222
}
2323

2424
cudaEvent_t cuda_event_;
2525
cudaStream_t cuda_stream_;
26-
int device_id_;
26+
int cuda_gpu_id_;
2727

2828
std::atomic<int> status_;
2929
std::mutex mutex_recorded_;
@@ -65,12 +65,12 @@ void EventRecordCUDA(Event* event, const void* context, const char* err_msg) {
6565
const auto& current_device = CaffeCudaGetDevice();
6666
CAFFE_ENFORCE_EQ(
6767
current_device,
68-
wrapper->device_id_,
68+
wrapper->cuda_gpu_id_,
6969
"When you call EventRecordCUDA, your current device should be the same "
7070
"as the device specified by the event.");
7171
CAFFE_ENFORCE_EQ(
7272
current_device,
73-
static_cast<const CUDAContext*>(context)->device_id());
73+
static_cast<const CUDAContext*>(context)->cuda_gpu_id());
7474
CUDA_ENFORCE(cudaEventRecord(
7575
wrapper->cuda_event_,
7676
static_cast<const CUDAContext*>(context)->cuda_stream()));
@@ -96,7 +96,7 @@ void EventFinishCUDA(const Event* event) {
9696

9797
if (wrapper->status_ == EventStatus::EVENT_SCHEDULED) {
9898
// ok, even if event is already completed and status was not yet updated
99-
DeviceGuard g(wrapper->device_id_);
99+
DeviceGuard g(wrapper->cuda_gpu_id_);
100100
auto cudaResult = cudaEventSynchronize(wrapper->cuda_event_);
101101
if (cudaResult == cudaSuccess) {
102102
wrapper->status_ = EventStatus::EVENT_SUCCESS;
@@ -127,7 +127,7 @@ void EventWaitCUDACUDA(const Event* event, void* context) {
127127
if (context_stream != event_stream) {
128128
// CAFFE_ENFORCE_EQ(
129129
// CaffeCudaGetDevice(),
130-
// static_cast<const CUDAContext*>(context)->device_id());
130+
// static_cast<const CUDAContext*>(context)->cuda_gpu_id());
131131
CUDA_CHECK(cudaStreamWaitEvent(context_stream, wrapper->cuda_event_, 0));
132132
}
133133
}

caffe2/core/hip/event_hip.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ void EventWaitHIPHIP(const Event* event, void* context)
138138
{
139139
// CAFFE_ENFORCE_EQ(
140140
// CaffeCudaGetDevice(),
141-
// static_cast<const CUDAContext*>(context)->device_id());
141+
// static_cast<const CUDAContext*>(context)->cuda_gpu_id());
142142
HIP_CHECK(hipStreamWaitEvent(context_stream, wrapper->hip_event_, 0));
143143
}
144144
}

caffe2/core/memonger.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ class ComputeBlobRecyclingForDag {
176176
// cuda device option but whose inputs/outputs are on CPU
177177
if (net.op(op_index).type() == "CopyGPUToCPU") {
178178
blob_device_[output].set_device_type(0);
179-
blob_device_[output].set_device_id(0);
179+
blob_device_[output].set_cuda_gpu_id(0);
180180
}
181181
}
182182
}
@@ -478,7 +478,7 @@ class ComputeBlobRecyclingForDag {
478478
const DeviceOption& device_option) {
479479
const DeviceOption& blob_device = blob_device_[blob_name];
480480
if (device_option.device_type() != blob_device.device_type() ||
481-
device_option.device_id() != blob_device.device_id()) {
481+
device_option.cuda_gpu_id() != blob_device.cuda_gpu_id()) {
482482
return false;
483483
}
484484
for (const int token : req_tokens_[blob_name]) {

caffe2/core/net_async_base.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ TaskThreadPool* AsyncNetBase::pool(const DeviceOption& device_option) {
157157
numa_node_id);
158158
return poolGetter(cpu_pools_, PROTO_CPU, numa_node_id, num_workers_);
159159
} else if (device_option.device_type() == PROTO_CUDA) {
160-
auto gpu_id = device_option.device_id();
160+
auto gpu_id = device_option.cuda_gpu_id();
161161
CAFFE_ENFORCE(
162162
gpu_id >= 0 && gpu_id < FLAGS_caffe2_net_async_max_gpus,
163163
"Invalid GPU id: " + caffe2::to_string(gpu_id));
@@ -173,7 +173,7 @@ int AsyncNetBase::stream(int task_id) {
173173
const auto& device_option = event(task_id).GetDeviceOption();
174174
int stream_id = 0;
175175
if (device_option.device_type() == PROTO_CUDA) {
176-
int gpu_id = device_option.device_id();
176+
int gpu_id = device_option.cuda_gpu_id();
177177
CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + caffe2::to_string(gpu_id));
178178
if ((unsigned)gpu_id >= getStreamCounters().size()) {
179179
getStreamCounters().resize(gpu_id + 1, 0);

caffe2/core/net_async_dag_gpu.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ AsyncDAGNet::AsyncDAGNet(
112112
int AsyncDAGNet::stream(const DeviceOption& device_option) {
113113
int stream_id = 0;
114114
if (device_option.device_type() == PROTO_CUDA) {
115-
int gpu_id = device_option.device_id();
115+
int gpu_id = device_option.cuda_gpu_id();
116116
CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + caffe2::to_string(gpu_id));
117117
if ((unsigned)gpu_id >= stream_counters_.size()) {
118118
stream_counters_.resize(gpu_id + 1, 0);

caffe2/core/net_gpu_test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ TEST(NetTest, DISABLED_ChainingForDifferentDevices) {
124124
type: "NetTestDummy"
125125
device_option {
126126
device_type: 1
127-
device_id: 1
127+
cuda_gpu_id: 1
128128
}
129129
}
130130
)DOC";

caffe2/core/operator.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@ std::map<string, std::pair<DeviceOption, DeviceOption>> ValidateTensorDevices(
649649
&blob_device);
650650

651651
if (blob_device.device_type() == PROTO_CUDA &&
652-
blob_device.device_id() != op_device.device_id()) {
652+
blob_device.cuda_gpu_id() != op_device.cuda_gpu_id()) {
653653
mismatches[blob_name] = std::make_pair(op_device, blob_device);
654654
} else if (
655655
blob_device.device_type() == PROTO_HIP &&

caffe2/mkl/utils/mkl_memory.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ static vector<int64_t> GetMKLTensorInfo(
2626
const mkl::MKLMemory<T>* tc = static_cast<const mkl::MKLMemory<T>*>(c);
2727
*capacity = tc->size() * sizeof(T);
2828
device->set_device_type(PROTO_MKLDNN);
29-
device->set_device_id(0);
29+
device->set_cuda_gpu_id(0);
3030
return tc->dims();
3131
}
3232

caffe2/observers/profile_observer_gpu.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ void ProfileOperatorObserver::Start() {
7070
int device;
7171
cudaGetDevice(&device);
7272

73-
cudaSetDevice(context->device_id());
73+
cudaSetDevice(context->cuda_gpu_id());
7474
cudaEventCreate(&start_);
7575
cudaEventRecord(start_, context->cuda_stream());
7676

@@ -92,7 +92,7 @@ void ProfileOperatorObserver::Stop() {
9292
int device;
9393
cudaGetDevice(&device);
9494

95-
cudaSetDevice(context->device_id());
95+
cudaSetDevice(context->cuda_gpu_id());
9696
cudaEventCreate(&stop_);
9797
cudaEventRecord(stop_, context->cuda_stream());
9898
cudaEventSynchronize(stop_);

caffe2/onnx/backend.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ caffe2::DeviceOption GetDeviceOption(const Device& onnx_device) {
6565
{DeviceType::CUDA, caffe2::DeviceType::CUDA}};
6666
caffe2::DeviceOption d;
6767
d.set_device_type(static_cast<int32_t>(m.at(onnx_device.type)));
68-
d.set_device_id(onnx_device.device_id);
68+
d.set_cuda_gpu_id(onnx_device.device_id);
6969
return d;
7070
}
7171

caffe2/operators/load_save_op_gpu.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ void LoadOp<CUDAContext>::SetCurrentDevice(BlobProto* proto) {
99
proto->mutable_tensor()->clear_device_detail();
1010
auto* device_detail = proto->mutable_tensor()->mutable_device_detail();
1111
device_detail->set_device_type(PROTO_CUDA);
12-
device_detail->set_device_id(CaffeCudaGetDevice());
12+
device_detail->set_cuda_gpu_id(CaffeCudaGetDevice());
1313
}
1414
}
1515

caffe2/operators/rnn/recurrent_network_executor_gpu.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ void CUDARecurrentNetworkExecutor::_ExecRange(int from, int to) {
7272
if (gpu_id == -1 &&
7373
rnn_op.op->device_option().device_type() ==
7474
DeviceTypeProto::PROTO_CUDA) {
75-
gpu_id = rnn_op.op->device_option().device_id();
75+
gpu_id = rnn_op.op->device_option().cuda_gpu_id();
7676
} else {
7777
CAFFE_ENFORCE(
7878
rnn_op.op->device_option().device_type() == 0 ||
79-
rnn_op.op->device_option().device_id() == gpu_id,
79+
rnn_op.op->device_option().cuda_gpu_id() == gpu_id,
8080
"RNN Executor only supports ops on one GPU");
8181
}
8282

caffe2/proto/caffe2.proto

+1-1
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ message DeviceOption {
183183
// optional DeviceType device_type = 1 [ default = CPU ];
184184
optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU.
185185
// [CUDA specific] the cuda gpu id.
186-
optional int32 device_id = 2;
186+
optional int32 cuda_gpu_id = 2;
187187
// [general] The random seed to start the device random number generator with.
188188
optional uint32 random_seed = 3;
189189
// [general] What node this op should execute on.

0 commit comments

Comments
 (0)