Skip to content

Commit

Permalink
add ipu device p1 (PaddlePaddle#37841)
Browse files Browse the repository at this point in the history
  • Loading branch information
jianghaicheng authored Dec 7, 2021
1 parent de874cd commit c9a3c66
Show file tree
Hide file tree
Showing 15 changed files with 214 additions and 14 deletions.
9 changes: 9 additions & 0 deletions paddle/fluid/framework/garbage_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ void XPUGarbageCollector::ClearCallback(const std::function<void()> &callback) {
}
#endif

#ifdef PADDLE_WITH_IPU
IPUGarbageCollector::IPUGarbageCollector(const platform::IPUPlace &place,
size_t max_memory_size)
: GarbageCollector(place, max_memory_size) {}
void IPUGarbageCollector::ClearCallback(const std::function<void()> &callback) {
callback();
}
#endif

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
UnsafeFastGPUGarbageCollector::UnsafeFastGPUGarbageCollector(
const platform::CUDAPlace &place, size_t max_memory_size)
Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/framework/garbage_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@ class XPUGarbageCollector : public GarbageCollector {
};
#endif

#ifdef PADDLE_WITH_IPU
class IPUGarbageCollector : public GarbageCollector {
public:
IPUGarbageCollector(const platform::IPUPlace &place, size_t max_memory_size);

protected:
void ClearCallback(const std::function<void()> &callback) override;
};
#endif

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
class UnsafeFastGPUGarbageCollector : public GarbageCollector {
public:
Expand Down
4 changes: 3 additions & 1 deletion paddle/fluid/framework/library_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,16 @@ inline LibraryType StringToLibraryType(const char* ctype) {
return LibraryType::kPlain;
} else if (s == std::string("XPU")) {
return LibraryType::kPlain;
} else if (s == std::string("IPU")) {
return LibraryType::kPlain;
} else if (s == std::string("NPU")) {
return LibraryType::kPlain;
} else if (s == std::string("CUDA")) {
return LibraryType::kPlain;
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unknown LibraryType string (%s), only support library type string "
"include PLAIN, MKLDNN, CUDNN, CPU and CUDA.",
"include PLAIN, MKLDNN, CUDNN, CPU, CUDA and IPU.",
s.c_str()));
}
}
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/memory/allocation/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ if (WITH_GPU OR WITH_ROCM)
endif()
elseif(WITH_XPU)
set(AllocatorFacadeDeps xpu_info)
elseif(WITH_IPU)
set(AllocatorFacadeDeps ipu_info)
elseif(WITH_ASCEND)
set(AllocatorFacadeDeps ascend_npu_info)
else ()
Expand Down
38 changes: 38 additions & 0 deletions paddle/fluid/memory/allocation/allocator_facade.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@
#include "paddle/fluid/memory/allocation/npu_pinned_allocator.h"
#endif

#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#endif

PADDLE_DEFINE_EXPORTED_int64(
gpu_allocator_retry_time, 10000,
"The retry time (milliseconds) when allocator fails "
Expand Down Expand Up @@ -136,6 +140,11 @@ class AllocatorFacadePrivate {
switch (strategy_) {
case AllocatorStrategy::kNaiveBestFit: {
InitNaiveBestFitCPUAllocator();
#ifdef PADDLE_WITH_IPU
for (int dev_id = 0; dev_id < platform::GetIPUDeviceCount(); ++dev_id) {
InitNaiveBestFitIPUAllocator(platform::IPUPlace(dev_id));
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (FLAGS_use_stream_safe_cuda_allocator) {
LOG(WARNING) << "FLAGS_use_stream_safe_cuda_allocator is invalid for "
Expand Down Expand Up @@ -186,6 +195,11 @@ class AllocatorFacadePrivate {
for (int dev_id = 0; dev_id < platform::GetXPUDeviceCount(); ++dev_id) {
InitNaiveBestFitXPUAllocator(platform::XPUPlace(dev_id));
}
#endif
#ifdef PADDLE_WITH_IPU
for (int dev_id = 0; dev_id < platform::GetIPUDeviceCount(); ++dev_id) {
InitNaiveBestFitIPUAllocator(platform::IPUPlace(dev_id));
}
#endif
break;
}
Expand All @@ -197,6 +211,11 @@ class AllocatorFacadePrivate {
InitNaiveBestFitXPUAllocator(platform::XPUPlace(dev_id));
}
#endif
#ifdef PADDLE_WITH_IPU
for (int dev_id = 0; dev_id < platform::GetIPUDeviceCount(); ++dev_id) {
InitNaiveBestFitIPUAllocator(platform::IPUPlace(dev_id));
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (FLAGS_use_stream_safe_cuda_allocator) {
LOG(WARNING) << "FLAGS_use_stream_safe_cuda_allocator is invalid for "
Expand Down Expand Up @@ -570,6 +589,12 @@ class AllocatorFacadePrivate {
}
#endif

#ifdef PADDLE_WITH_IPU
void InitNaiveBestFitIPUAllocator(platform::IPUPlace p) {
allocators_[p] = std::make_shared<NaiveBestFitAllocator>(p);
}
#endif

#ifdef PADDLE_WITH_ASCEND_CL
void InitNaiveBestFitNPUAllocator(platform::NPUPlace p) {
allocators_[p] = std::make_shared<NaiveBestFitAllocator>(p);
Expand All @@ -591,6 +616,13 @@ class AllocatorFacadePrivate {
system_allocators_[p] = std::make_shared<NaiveBestFitAllocator>(p);
}
#endif
#ifdef PADDLE_WITH_IPU
int device_count = platform::GetIPUDeviceCount();
for (int i = 0; i < device_count; ++i) {
platform::IPUPlace p(i);
system_allocators_[p] = std::make_shared<NaiveBestFitAllocator>(p);
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
system_allocators_[platform::CUDAPinnedPlace()] =
std::make_shared<CPUPinnedAllocator>();
Expand Down Expand Up @@ -625,6 +657,12 @@ class AllocatorFacadePrivate {
places.emplace_back(platform::NPUPlace(dev_id));
}
#endif
#ifdef PADDLE_WITH_IPU
int device_count = platform::GetIPUDeviceCount();
for (int dev_id = 0; dev_id < device_count; ++dev_id) {
places.emplace_back(platform::IPUPlace(dev_id));
}
#endif

for (auto& p : places) {
zero_size_allocators_[p] = std::make_shared<ZeroSizeAllocator>(p);
Expand Down
26 changes: 26 additions & 0 deletions paddle/fluid/memory/memcpy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,32 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
VLOG(4) << "src: " << src << ", dst: " << dst << ", num: " << num;
std::memcpy(dst, src, num);
}
#ifdef PADDLE_WITH_IPU
template <>
void Copy<platform::IPUPlace, platform::CPUPlace>(platform::IPUPlace dst_place,
void* dst,
platform::CPUPlace src_place,
const void* src, size_t num) {
if (UNLIKELY(num == 0)) return;
std::memcpy(dst, src, num);
}
template <>
void Copy<platform::CPUPlace, platform::IPUPlace>(platform::CPUPlace dst_place,
void* dst,
platform::IPUPlace src_place,
const void* src, size_t num) {
if (UNLIKELY(num == 0)) return;
std::memcpy(dst, src, num);
}
template <>
void Copy<platform::IPUPlace, platform::IPUPlace>(platform::IPUPlace dst_place,
void* dst,
platform::IPUPlace src_place,
const void* src, size_t num) {
if (UNLIKELY(num == 0)) return;
std::memcpy(dst, src, num);
}
#endif

#ifdef PADDLE_WITH_XPU
template <>
Expand Down
8 changes: 7 additions & 1 deletion paddle/fluid/platform/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ IF(WITH_GPU OR WITH_ROCM)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader cuda_stream)
ENDIF()

IF(WITH_IPU)
set(IPU_CTX_DEPS ipu_backend)
ELSE()
set(IPU_CTX_DEPS)
ENDIF(WITH_IPU)

IF(WITH_ASCEND_CL)
set(NPU_CTX_DEPS npu_stream npu_info)
ENDIF()
Expand Down Expand Up @@ -109,7 +115,7 @@ cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost)
# memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies
cc_library(device_context SRCS device_context.cc init.cc DEPS simple_threadpool malloc xxhash ${STREAM_CALLBACK_DEPS}
place eigen3 stringpiece cpu_helper cpu_info framework_proto ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}
place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}
${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS})

cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce)
Expand Down
20 changes: 12 additions & 8 deletions paddle/fluid/platform/device/ipu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
cc_library(ipu_device SRCS device.cc DEPS enforce popart)
cc_library(ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart)
cc_library(ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce)
cc_library(ipu_optimizer SRCS ipu_optimizer.cc DEPS popart enforce)
cc_library(ipu_executor SRCS ipu_executor.cc DEPS ipu_optimizer ipu_utils popart graph framework_proto)
cc_library(popart_canonicalization_utils SRCS ${POPART_CANONICALIZATION_SRC} DEPS framework_proto enforce ipu_utils)
cc_library(ipu_compiler SRCS ipu_compiler.cc DEPS popart graph ipu_utils graph_helper)
cc_library(ipu_backend SRCS ipu_backend.cc DEPS popart ipu_compiler graph framework_proto enforce ipu_utils ipu_strategy ipu_device ipu_executor graph_helper)
# IPU
IF(WITH_IPU)
cc_library(ipu_device SRCS device.cc DEPS enforce popart)
cc_library(ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart)
cc_library(ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce)
cc_library(ipu_optimizer SRCS ipu_optimizer.cc DEPS popart enforce)
cc_library(ipu_executor SRCS ipu_executor.cc DEPS ipu_optimizer ipu_utils popart graph framework_proto)
cc_library(popart_canonicalization_utils SRCS ${POPART_CANONICALIZATION_SRC} DEPS framework_proto enforce ipu_utils)
cc_library(ipu_compiler SRCS ipu_compiler.cc DEPS popart graph ipu_utils graph_helper)
cc_library(ipu_backend SRCS ipu_backend.cc DEPS popart ipu_compiler graph framework_proto enforce ipu_utils ipu_strategy ipu_device ipu_executor graph_helper)
cc_library(ipu_info SRCS ipu_info.cc DEPS ipu_backend)
ENDIF()
2 changes: 1 addition & 1 deletion paddle/fluid/platform/device/ipu/device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/platform/ipu/device.h"
#include "paddle/fluid/platform/device/ipu/device.h"

namespace paddle {
namespace platform {
Expand Down
32 changes: 32 additions & 0 deletions paddle/fluid/platform/device/ipu/ipu_info.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"

namespace paddle {
namespace platform {

//! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedIPUDevices() {
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetDeviceIds();
}

//! Get the total number of IPU devices in system.
int GetIPUDeviceCount() {
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetNumDevices();
}
} // namespace platform
} // namespace paddle
24 changes: 24 additions & 0 deletions paddle/fluid/platform/device/ipu/ipu_info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once

#ifdef PADDLE_WITH_IPU
#include <memory>
#include <vector>
#include "glog/logging.h"

namespace paddle {
namespace platform {
std::vector<int> GetSelectedIPUDevices();
int GetIPUDeviceCount();
} // namespace platform
} // namespace paddle
#endif
2 changes: 1 addition & 1 deletion paddle/fluid/platform/device/ipu/ipu_optimizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/platform/ipu/ipu_optimizer.h"
#include "paddle/fluid/platform/device/ipu/ipu_optimizer.h"

namespace paddle {
namespace platform {
Expand Down
32 changes: 30 additions & 2 deletions paddle/fluid/platform/device_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ limitations under the License. */
#include "paddle/fluid/platform/device/npu/enforce_npu.h"
#include "paddle/fluid/platform/device/npu/npu_stream.h"
#endif
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/device/ipu/device.h"
#endif
#include "unsupported/Eigen/CXX11/Tensor"

namespace Eigen {
Expand Down Expand Up @@ -99,8 +102,8 @@ enum DeviceType {
CUDA = 1,
XPU = 2,
NPU = 3,

MAX_DEVICE_TYPES = 4,
IPU = 4,
MAX_DEVICE_TYPES = 5,
};

DeviceType Place2DeviceType(const platform::Place& place);
Expand All @@ -109,6 +112,7 @@ constexpr DeviceType kCPU = DeviceType::CPU;
constexpr DeviceType kCUDA = DeviceType::CUDA;
constexpr DeviceType kXPU = DeviceType::XPU;
constexpr DeviceType kNPU = DeviceType::NPU;
constexpr DeviceType kIPU = DeviceType::IPU;

class DeviceContext {
public:
Expand Down Expand Up @@ -140,6 +144,30 @@ struct DefaultDeviceContextType<platform::CPUPlace> {
using TYPE = CPUDeviceContext;
};

// Graphcore IPU
#ifdef PADDLE_WITH_IPU
class IPUDeviceContext : public DeviceContext {
public:
IPUDeviceContext() = delete;
explicit IPUDeviceContext(IPUPlace place);
virtual ~IPUDeviceContext();
Eigen::DefaultDevice* eigen_device() const { return nullptr; }
Place GetPlace() const override;
/*! \brief Wait for all operations completion in the stream. */
void Wait() const override;
int DeviceId() const { return device_.getId(); }

private:
IPUPlace place_;
platform::ipu::Device device_;
};
template <>
struct DefaultDeviceContextType<platform::IPUPlace> {
using TYPE = IPUDeviceContext;
};

#endif

#ifdef PADDLE_WITH_XPU
namespace xpu = baidu::xpu::api;
class XPUDeviceContext : public DeviceContext {
Expand Down
16 changes: 16 additions & 0 deletions paddle/fluid/platform/init.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ limitations under the License. */
#include "DbgHelp.h"
#endif

#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#endif

DECLARE_int32(paddle_num_threads);
PADDLE_DEFINE_EXPORTED_int32(
multiple_of_cupti_buffer_size, 1,
Expand Down Expand Up @@ -164,6 +168,15 @@ void InitDevices() {
LOG(WARNING)
<< "Compiled with PADDLE_WITH_ASCEND_CL, but no NPU found in runtime.";
}
#endif
#ifdef PADDLE_WITH_IPU
try {
// use user specified IPUs.
devices = platform::GetSelectedIPUDevices();
} catch (const std::exception &exp) {
LOG(WARNING)
<< "Compiled with PADDLE_WITH_IPU, but no IPU found in runtime.";
}
#endif
InitDevices(devices);
}
Expand All @@ -185,6 +198,9 @@ void InitDevices(const std::vector<int> devices) {
#ifdef PADDLE_WITH_XPU
places.emplace_back(platform::XPUPlace(devices[i]));
#endif
#ifdef PADDLE_WITH_IPU
places.emplace_back(platform::IPUPlace(devices[i]));
#endif
#ifdef PADDLE_WITH_ASCEND_CL
places.emplace_back(platform::NPUPlace(devices[i]));
#endif
Expand Down
Loading

0 comments on commit c9a3c66

Please sign in to comment.