Skip to content

Commit

Permalink
Move c10d to libtorch(_cuda) (pytorch#59563)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#59563

ghstack-source-id: 131331264

Test Plan: CI

Reviewed By: malfet

Differential Revision: D28932239

fbshipit-source-id: 5df6cdfa5253b15cbbc97039fe672d6d97321e34
  • Loading branch information
lw authored and facebook-github-bot committed Jun 15, 2021
1 parent 8d50a4e commit a178043
Show file tree
Hide file tree
Showing 29 changed files with 183 additions and 239 deletions.
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,12 @@ cmake_dependent_option(
cmake_dependent_option(
USE_GLOO_WITH_OPENSSL "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
cmake_dependent_option(
USE_C10D_GLOO "USE C10D GLOO" ON "USE_DISTRIBUTED;USE_GLOO" OFF)
cmake_dependent_option(
USE_C10D_NCCL "USE C10D NCCL" ON "USE_DISTRIBUTED;USE_NCCL" OFF)
cmake_dependent_option(
USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" OFF)
cmake_dependent_option(
USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
"USE_DISTRIBUTED" OFF)
Expand Down
63 changes: 54 additions & 9 deletions caffe2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,8 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
"${TORCH_SRC_DIR}/csrc/distributed/rpc/process_group_agent.cpp"
"${TORCH_SRC_DIR}/csrc/distributed/rpc/process_group_agent.h"
)
target_link_libraries(process_group_agent PRIVATE torch c10d fmt::fmt-header-only)
add_dependencies(process_group_agent torch c10d)
target_link_libraries(process_group_agent PRIVATE torch fmt::fmt-header-only)
add_dependencies(process_group_agent torch)

if(USE_TENSORPIPE)
add_library(tensorpipe_agent
Expand All @@ -370,8 +370,8 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
"${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_utils.cpp"
"${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_utils.h"
)
target_link_libraries(tensorpipe_agent PRIVATE torch c10d tensorpipe fmt::fmt-header-only)
add_dependencies(tensorpipe_agent torch c10d)
target_link_libraries(tensorpipe_agent PRIVATE torch tensorpipe fmt::fmt-header-only)
add_dependencies(tensorpipe_agent torch)
if(USE_CUDA)
target_compile_definitions(tensorpipe_agent PUBLIC USE_CUDA)
endif()
Expand Down Expand Up @@ -621,8 +621,11 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
PROPERTIES COMPILE_FLAGS "-DC10_DISABLE_LEGACY_IMPORT"
)
endif()
if(USE_DISTRIBUTED AND NOT WIN32)
append_filelist("libtorch_distributed_sources" TORCH_SRCS)
if(USE_DISTRIBUTED)
append_filelist("libtorch_distributed_base_sources" TORCH_SRCS)
if(NOT WIN32)
append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS)
endif()
endif()
endif()

Expand Down Expand Up @@ -653,6 +656,17 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
list(APPEND Caffe2_GPU_SRCS
${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
endif()
if(USE_DISTRIBUTED)
if(BUILD_SPLIT_CUDA)
set(_target "Caffe2_GPU_SRCS_CPP")
else()
set(_target "Caffe2_GPU_SRCS")
endif()
append_filelist("libtorch_cuda_distributed_base_sources" ${_target})
if(NOT WIN32)
append_filelist("libtorch_cuda_distributed_extra_sources" ${_target})
endif()
endif()
set_source_files_properties(
${TORCH_ROOT}/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
PROPERTIES COMPILE_DEFINITIONS "NVRTC_SHORTHASH=${CUDA_NVRTC_SHORTHASH}"
Expand All @@ -670,6 +684,12 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
list(APPEND Caffe2_HIP_SRCS
${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
endif()
if(USE_DISTRIBUTED)
append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS)
if(NOT WIN32)
append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS)
endif()
endif()
# caffe2_nvrtc's stubs to driver APIs are useful for HIP.
# See NOTE [ ATen NVRTC Stub and HIP ]
add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS})
Expand Down Expand Up @@ -1047,6 +1067,9 @@ endif()
install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
FILES_MATCHING PATTERN "*.h")
install(DIRECTORY "${TORCH_SRC_DIR}/lib/c10d"
DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}
FILES_MATCHING PATTERN "*.hpp")
install(FILES
"${TORCH_SRC_DIR}/script.h"
"${TORCH_SRC_DIR}/extension.h"
Expand Down Expand Up @@ -1210,9 +1233,31 @@ endif()
# Pass USE_DISTRIBUTED to torch_cpu, as some codes in jit/pickler.cpp and
# jit/unpickler.cpp need to be compiled only when USE_DISTRIBUTED is set
if(USE_DISTRIBUTED)
target_compile_definitions(torch_cpu PRIVATE
USE_DISTRIBUTED
)
# Needed to support the inclusion of c10d/Foo.hpp headers.
target_include_directories(torch_cpu PUBLIC ${TORCH_SRC_DIR}/lib)
target_compile_definitions(torch_cpu PRIVATE USE_DISTRIBUTED)
if(USE_GLOO AND USE_C10D_GLOO)
target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO)
endif()
if(USE_NCCL AND USE_C10D_NCCL)
if(USE_ROCM)
target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL)
else()
if(BUILD_SPLIT_CUDA)
target_compile_definitions(torch_cuda_cpp PUBLIC USE_C10D_NCCL)
else()
target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL)
endif()
endif()
endif()
if(USE_MPI AND USE_C10D_MPI)
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set_source_files_properties(
"${TORCH_SRC_DIR}/lib/c10d/ProcessGroupMPI.cpp"
PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
endif()
target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI)
endif()
# Pass USE_RPC in order to reduce use of
# #if defined(USE_DISTRIBUTED) && !defined(_WIN32)
# need to be removed when RPC is supported
Expand Down
2 changes: 1 addition & 1 deletion test/cpp/rpc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set(TORCH_RPC_TEST_SOURCES
${TORCH_RPC_TEST_DIR}/test_wire_serialization.cpp
)
set(TORCH_RPC_TEST_DEPENDENCY_LIBS
torch c10d gtest process_group_agent
torch gtest process_group_agent
)

if(USE_GLOO)
Expand Down
48 changes: 41 additions & 7 deletions tools/build_variables.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,28 @@ core_sources_full = core_sources_full_mobile + [

libtorch_core_sources = sorted(core_sources_common + core_sources_full + core_trainer_sources)

libtorch_distributed_sources = [
# These files are the only ones that are supported on Windows.
libtorch_distributed_base_sources = [
"torch/lib/c10d/comm.cpp",
"torch/lib/c10d/default_comm_hooks.cpp",
"torch/lib/c10d/FileStore.cpp",
"torch/lib/c10d/GlooDeviceFactory.cpp",
"torch/lib/c10d/logger.cpp",
"torch/lib/c10d/ParamCommsUtils.cpp",
"torch/lib/c10d/PrefixStore.cpp",
"torch/lib/c10d/ProcessGroup.cpp",
"torch/lib/c10d/ProcessGroupGloo.cpp",
"torch/lib/c10d/ProcessGroupMPI.cpp",
"torch/lib/c10d/ProcessGroupWrapper.cpp",
"torch/lib/c10d/reducer.cpp",
"torch/lib/c10d/sequence_num.cpp",
"torch/lib/c10d/Store.cpp",
"torch/lib/c10d/TCPStore.cpp",
"torch/lib/c10d/Utils.cpp",
]

# These files are only supported on Linux (and others) but not on Windows.
libtorch_distributed_extra_sources = [
"torch/csrc/distributed/autograd/autograd.cpp",
"torch/csrc/distributed/autograd/utils.cpp",
"torch/csrc/distributed/autograd/context/container.cpp",
Expand Down Expand Up @@ -350,8 +371,12 @@ libtorch_distributed_sources = [
"torch/csrc/distributed/rpc/types.cpp",
"torch/csrc/distributed/rpc/utils.cpp",
"torch/csrc/distributed/rpc/metrics/registry.cpp",
"torch/lib/c10d/HashStore.cpp",
"torch/lib/c10d/ProcessGroupRoundRobin.cpp",
]

libtorch_distributed_sources = libtorch_distributed_base_sources + libtorch_distributed_extra_sources

jit_sources_full = [
"torch/csrc/jit/codegen/cuda/interface.cpp",
"torch/csrc/jit/passes/lower_graph.cpp",
Expand Down Expand Up @@ -490,7 +515,20 @@ libtorch_cuda_core_sources = [
"torch/csrc/jit/runtime/register_cuda_ops.cpp",
]

libtorch_cuda_sources = libtorch_cuda_core_sources + [
# These files are the only ones that are supported on Windows.
libtorch_cuda_distributed_base_sources = [
"torch/lib/c10d/reducer_cuda.cpp",
]

# These files are only supported on Linux (and others) but not on Windows.
libtorch_cuda_distributed_extra_sources = [
"torch/lib/c10d/NCCLUtils.cpp",
"torch/lib/c10d/ProcessGroupNCCL.cpp",
]

libtorch_cuda_distributed_sources = libtorch_cuda_distributed_base_sources + libtorch_cuda_distributed_extra_sources

libtorch_cuda_sources = libtorch_cuda_core_sources + libtorch_cuda_distributed_sources + [
"torch/csrc/cuda/nccl.cpp",
]

Expand Down Expand Up @@ -665,13 +703,9 @@ libtorch_python_core_sources = [
]

libtorch_python_distributed_core_sources = [
"torch/lib/c10d/comm.cpp",
"torch/lib/c10d/default_comm_hooks.cpp",
"torch/lib/c10d/reducer.cpp",
"torch/lib/c10d/reducer_cuda.cpp",
"torch/lib/c10d/logger.cpp",
"torch/csrc/distributed/c10d/python_comm_hook.cpp",
"torch/csrc/distributed/c10d/init.cpp",
"torch/lib/c10d/frontend.cpp",
]

libtorch_python_distributed_sources = libtorch_python_distributed_core_sources + [
Expand Down
12 changes: 11 additions & 1 deletion torch/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,17 @@ if(USE_DISTRIBUTED)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES tensorpipe)
list(APPEND TORCH_PYTHON_PUBLIC_COMPILE_DEFINITIONS USE_TENSORPIPE)
endif()
list(APPEND TORCH_PYTHON_LINK_LIBRARIES c10d)
# NCCL is a private dependency of libtorch, but libtorch_python includes
# some private headers of libtorch, which in turn include NCCL. As a hacky
# alternative to making NCCL a public dependency of libtorch, we make it
# a private dependency of libtorch_python as well.
if(USE_NCCL)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl)
endif()
# Same for MPI.
if(USE_MPI)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${MPI_CXX_LIBRARIES})
endif()
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D)
endif()

Expand Down
1 change: 0 additions & 1 deletion torch/csrc/cuda/nccl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <THC/THC.h>
#include <c10/cuda/CUDACachingAllocator.h>
#include <c10/util/Optional.h>

Expand Down
Loading

0 comments on commit a178043

Please sign in to comment.