Skip to content

Commit

Permalink
add ipu device p2 (PaddlePaddle#37840)
Browse files Browse the repository at this point in the history
  • Loading branch information
jianghaicheng authored Dec 9, 2021
1 parent 890638c commit cb636a4
Show file tree
Hide file tree
Showing 19 changed files with 599 additions and 14 deletions.
16 changes: 16 additions & 0 deletions paddle/fluid/eager/accumulation/gradient_accumulation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,22 @@ class TensorAddFunctor : public boost::static_visitor<> {
}
#endif

#ifdef PADDLE_WITH_IPU
void operator()(const paddle::platform::IPUPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode",
place));
}
#else
void operator()(const paddle::platform::IPUPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode",
place));
}
#endif

void operator()(const paddle::platform::NPUPinnedPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/framework/dlpack_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ struct DLDeviceVisitor : public boost::static_visitor<::DLDevice> {
return device;
}

inline ::DLDevice operator()(const platform::IPUPlace &place) const {
PADDLE_THROW(
platform::errors::Unimplemented("platform::IPUPlace is not supported"));
}

inline ::DLDevice operator()(const platform::XPUPlace &place) const {
PADDLE_THROW(
platform::errors::Unimplemented("platform::XPUPlace is not supported"));
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/framework/executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,14 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
#else
PADDLE_THROW(
platform::errors::Unimplemented("No XPU gc found in CPU/GPU paddle"));
#endif
} else if (platform::is_ipu_place(place_)) {
#ifdef PADDLE_WITH_IPU
gc.reset(new IPUGarbageCollector(
BOOST_GET_CONST(platform::IPUPlace, place_), max_memory_size));
#else
PADDLE_THROW(
platform::errors::Unimplemented("No IPU gc found in CPU/IPU paddle"));
#endif
} else if (platform::is_npu_place(place_)) {
#ifdef PADDLE_WITH_ASCEND_CL
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/framework/op_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)

#define REGISTER_OP_IPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, IPU, ::paddle::platform::IPUPlace, __VA_ARGS__)

#define REGISTER_OP_XPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, XPU, ::paddle::platform::XPUPlace, __VA_ARGS__)

Expand Down
51 changes: 46 additions & 5 deletions paddle/fluid/framework/tensor_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
}
#ifdef PADDLE_WITH_IPU
else if (platform::is_ipu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::IPUPlace, src_place), src_ptr, size);
} else if (platform::is_cpu_place(src_place) &&
platform::is_ipu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::IPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
} else if (platform::is_ipu_place(src_place) &&
platform::is_ipu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::IPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::IPUPlace, src_place), src_ptr, size);
}
#endif

#ifdef PADDLE_WITH_XPU
else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
Expand Down Expand Up @@ -386,25 +402,42 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
}
#ifdef PADDLE_WITH_IPU
else if (platform::is_ipu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::IPUPlace, src_place), src_ptr, size);
} else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_ipu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::IPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
} else { // NOLINT
PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
#endif
#ifdef PADDLE_WITH_XPU
else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::XPUPlace, src_place), src_ptr, size);
} else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
}
else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
} else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
}
else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::XPUPlace, src_place), src_ptr, size);
} else { // NOLINT
}
else { // NOLINT
PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
Expand Down Expand Up @@ -571,6 +604,11 @@ class AnyVisitor : public boost::static_visitor<bool> {
platform::errors::Unimplemented("Not supported on place (%s) ", npu));
// return GetResultHelper(out, npu);
}
bool GetResult(const framework::Tensor& out,
const platform::IPUPlace& ipu) const {
PADDLE_THROW(
platform::errors::Unimplemented("Not supported on place (%s) ", ipu));
}

bool GetResult(const framework::Tensor& out,
const platform::NPUPinnedPlace& cpu) const {
Expand Down Expand Up @@ -762,6 +800,9 @@ struct BothFalseVisitor : public boost::static_visitor<> {
void VisitorImpl(const platform::XPUPlace& xpu) const {
PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported"));
}
void VisitorImpl(const platform::IPUPlace& ipu) const {
PADDLE_THROW(platform::errors::Unimplemented("IPUPlace is not supported"));
}

void VisitorImpl(const platform::CUDAPlace& gpu) const {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/imperative/gradient_accumulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ class TensorAddFunctor : public boost::static_visitor<> {
"is not supported in imperative mode",
place));
}
// there is NO support in IPUPlace
void operator()(const platform::IPUPlace& place) {
PADDLE_THROW(platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode",
place));
}

private:
int64_t numel_;
Expand Down
28 changes: 28 additions & 0 deletions paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,34 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
return GetCPUBuddyAllocator()->Used();
}

// For Graphcore IPU
template <>
void *Alloc<platform::IPUPlace>(const platform::IPUPlace &place, size_t size) {
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
VLOG(10) << "IPUPlace, Allocate on cpu.";

void *p = GetCPUBuddyAllocator()->Alloc(size);
if (FLAGS_init_allocated_mem) {
memset(p, 0xEF, size);
}
VLOG(10) << " pointer=" << p;
return p;
}
template <>
void Free<platform::IPUPlace>(const platform::IPUPlace &place, void *p,
size_t size) {
VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place);
GetCPUBuddyAllocator()->Free(p);
}
template <>
uint64_t Release<platform::IPUPlace>(const platform::IPUPlace &place) {
return GetCPUBuddyAllocator()->Release();
}
template <>
size_t Used<platform::IPUPlace>(const platform::IPUPlace &place) {
return GetCPUBuddyAllocator()->Used();
}

// For kunlun XPU
template <>
void *Alloc<platform::XPUPlace>(const platform::XPUPlace &place, size_t size) {
Expand Down
62 changes: 62 additions & 0 deletions paddle/fluid/operators/ipu_runtime_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/operators/ipu_runtime_op.h"

namespace paddle {
namespace operators {

class IpuRuntimeOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {}

protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::proto::VarType::Type(ctx.Attr<int>("dtype")),
ctx.device_context());
}
};

class IpuRuntimeOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("FeedList", "FeedList of Graph").AsDuplicable();
AddOutput("FetchList", "FetchList of Graph").AsDuplicable();
AddAttr<int>("dtype",
"(int, default 5 (FP32)) "
"Output data type")
.SetDefault(framework::proto::VarType::FP32);
AddComment(R"DOC(
Run graph by PopART runtime.
)DOC");
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OPERATOR(ipu_runtime, ops::IpuRuntimeOp, ops::IpuRuntimeOpMaker);

REGISTER_OP_IPU_KERNEL(ipu_runtime, ops::IpuRuntimeKernel<float>,
ops::IpuRuntimeKernel<double>,
ops::IpuRuntimeKernel<int>,
ops::IpuRuntimeKernel<int64_t>,
ops::IpuRuntimeKernel<bool>,
ops::IpuRuntimeKernel<int8_t>,
ops::IpuRuntimeKernel<paddle::platform::float16>);
69 changes: 69 additions & 0 deletions paddle/fluid/operators/ipu_runtime_op.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <memory>
#include <vector>

#include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/framework/ipu/ipu_backend.h"
#include "paddle/fluid/framework/tensor.h"
#endif

namespace paddle {
namespace operators {

template <typename T>
class IpuRuntimeKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
#ifdef PADDLE_WITH_IPU
auto ipu_backend = framework::ipu::IpuBackend::GetInstance();
if (!ipu_backend->DeviceIsAttached()) {
const platform::IPUDeviceContext& ipu_ctx =
reinterpret_cast<const platform::IPUDeviceContext&>(
ctx.device_context());
ipu_backend->AttachDevice(ipu_ctx.DeviceId());
}

auto inputs = ctx.MultiInput<framework::Tensor>("FeedList");
auto outputs = ctx.MultiOutput<framework::Tensor>("FetchList");
auto output_names = ctx.OutputNames("FetchList");
VLOG(4) << "IpuRuntime Kernel, begin to run graph";
ipu_backend->Run(inputs, outputs, ctx);

// post-run
// resize tensor when tensor.dims() is empty
for (size_t i = 0; i < outputs.size(); ++i) {
auto* out = outputs[i];
if (out->dims().size() == 0) {
auto tensor_dtype = out->type();
auto sizeof_dtype = framework::SizeOfType(tensor_dtype);
int64_t dim = out->memory_size() / sizeof_dtype;
out->Resize({dim});
VLOG(10) << "set ipu_runtime_op output: " << output_names[i]
<< " dims from () to: "
<< "(" << dim << ")";
}
}
#else
PADDLE_THROW(platform::errors::PreconditionNotMet(
"Please compile WITH_IPU option to enable ipu_runtime op"));
#endif
}
};

} // namespace operators
} // namespace paddle
7 changes: 7 additions & 0 deletions paddle/fluid/operators/math/math_function.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,13 @@ void set_constant_with_place<platform::NPUPinnedPlace>(
platform::errors::Unimplemented("NPUPinnedPlace is not supported"));
}

template <>
void set_constant_with_place<platform::IPUPlace>(
const platform::DeviceContext& context, framework::Tensor* tensor,
float value) {
PADDLE_THROW(platform::errors::Unimplemented("IPUPlace is not supported"));
}

template <>
void set_constant_with_place<platform::CPUPlace>(
const platform::DeviceContext& context, framework::Tensor* tensor,
Expand Down
Loading

0 comments on commit cb636a4

Please sign in to comment.