diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cc b/paddle/fluid/operators/collective/c_broadcast_op.cc index e53794444ecc8..f1672f6dd04b0 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cc @@ -57,13 +57,3 @@ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(c_broadcast, ops::CBroadcastOp, ops::CBroadcastOpMaker); - -PD_REGISTER_STRUCT_KERNEL(c_broadcast, - CPU, - ALL_LAYOUT, - ops::CBroadcastOpCPUKernel, - float, - double, - int, - int64_t, - phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc deleted file mode 100644 index 069de3efe0d6c..0000000000000 --- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/collective/c_broadcast_op.h" -#include "paddle/phi/core/distributed/comm_context_manager.h" -#include "paddle/phi/core/distributed/nccl_comm_context.h" - -#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) -#include "paddle/fluid/distributed/collective/process_group.h" -#include "paddle/fluid/platform/device/gpu/nccl_helper.h" -#include "paddle/phi/core/platform/collective_helper.h" -#endif -#include "paddle/phi/api/include/tensor.h" - -namespace paddle { -namespace operators { - -template -class CBroadcastOpCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { -#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) - auto x = ctx.Input("X"); - auto out = ctx.Output("Out"); - - int rid = ctx.Attr("ring_id"); - const auto& place = ctx.GetPlace(); - ctx.device_context().Alloc(out); - - int root = ctx.Attr("root"); - - auto map = distributed::ProcessGroupMapFromGid::getInstance(); - if (map->has(rid)) { - distributed::ProcessGroup* pg = map->get(rid); - auto b_opts = distributed::BroadcastOptions(); - b_opts.source_rank = rid; - b_opts.source_root = root; - auto task = pg->Broadcast(out, *x, b_opts, false); - task->Wait(); - return; - } - - gpuStream_t stream = ctx.cuda_device_context().stream(); - const auto& comm_context_manager = - phi::distributed::CommContextManager::GetInstance(); - if (comm_context_manager.Has(std::to_string(rid))) { - auto* comm_context = static_cast( - comm_context_manager.Get(std::to_string(rid))); - - comm_context->Broadcast(out, *x, root, stream); - } else { - // NOTE(liyurui): This will be removed after moving this operator to phi. - int numel = x->numel(); - ncclDataType_t dtype = phi::ToNCCLDataType(x->dtype()); - auto comm = platform::NCCLCommContext::Instance().Get(rid, place); - if (root == comm->rank()) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclBcast( - reinterpret_cast(const_cast(x->data())), - numel, - dtype, - root, - comm->comm(), - stream)); - VLOG(3) << "rank " << comm->rank() << " invoke Bcast. sent " - << x->numel(); - if (out != x) { - framework::TensorCopy(*static_cast(x), - place, - *phi::DeviceContextPool::Instance().Get(place), - static_cast(out)); - } - } else { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclBcast( - out->data(), numel, dtype, root, comm->comm(), stream)); - VLOG(3) << "rank " << comm->rank() << " invoke Bcast. received " - << common::product(out->dims()); - } - } - - out->set_lod(x->lod()); -#else - PADDLE_THROW(common::errors::PreconditionNotMet( - "PaddlePaddle should compile with GPU.")); -#endif - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(c_broadcast, - GPU, - ALL_LAYOUT, - ops::CBroadcastOpCUDAKernel, - int, - int64_t, - float, - double, -#if (NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000) || \ - defined(PADDLE_WITH_HIP) - phi::dtype::bfloat16, -#endif - phi::dtype::float16) { -} diff --git a/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc b/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc deleted file mode 100644 index 3192e122f3791..0000000000000 --- a/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/collective/c_broadcast_op.h" -#include "paddle/phi/core/distributed/comm_context_manager.h" - -#ifdef PADDLE_WITH_XPU_BKCL -#include "paddle/common/flags.h" -#include "paddle/fluid/platform/device/xpu/bkcl_helper.h" -#include "paddle/phi/core/distributed/bkcl_comm_context.h" -#include "paddle/phi/core/platform/collective_helper.h" -COMMON_DECLARE_bool(dynamic_static_unified_comm); -#endif -#include "paddle/fluid/distributed/collective/process_group.h" - -namespace paddle { -namespace operators { - -template -class CBroadcastOpXPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { -#if defined(PADDLE_WITH_XPU_BKCL) - auto x = ctx.Input("X"); - auto out = ctx.Output("Out"); - size_t numel = x->numel(); - - BKCLDataType dtype = phi::ToBKCLDataType(x->dtype()); - int ring_id = ctx.Attr("ring_id"); - auto place = ctx.GetPlace(); - int root = ctx.Attr("root"); - - platform::BKCLComm* comm = nullptr; - phi::distributed::BKCLCommContext* comm_ctx = nullptr; - XPUStream stream = nullptr; - const auto& comm_context_manager = - phi::distributed::CommContextManager::GetInstance(); - if (FLAGS_dynamic_static_unified_comm) { - PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), - true, - common::errors::InvalidArgument( - "You choose to use new communication library by " - "setting environment " - "variable FLAGS_dynamic_static_unified_comm True. " - "But ring_id(%d) is " - "not found in comm_context_manager.", - std::to_string(ring_id))); - comm_ctx = static_cast( - comm_context_manager.Get(std::to_string(ring_id))); - PADDLE_ENFORCE_NE(comm_ctx, - nullptr, - common::errors::Unavailable( - "BKCLCommContext is nullptr, collective op should " - "has ring_id attr.")); - stream = comm_ctx->GetStream(); - VLOG(3) << "new comm_context_manager has rid " << ring_id; - } else { // old comm_context - comm = paddle::platform::BKCLCommContext::Instance().Get(ring_id, place); - stream = comm->stream(); - VLOG(3) << "old BKCLCommContext has rid " << ring_id; - } - if (comm_ctx) { - comm_ctx->Broadcast(out, *x, root, stream); - } else { - void* send_recv_buffer = nullptr; - if (root == comm->rank()) { - send_recv_buffer = - reinterpret_cast(const_cast(x->data())); - PADDLE_ENFORCE_XPU_SUCCESS(bkcl_broadcast(comm->comm(), - send_recv_buffer, - send_recv_buffer, - numel, - dtype, - root, - stream)); - VLOG(3) << "rank " << comm->rank() << " invoke Bcast. sent " - << x->numel(); - if (out != x) { - framework::TensorCopy(*static_cast(x), - place, - *phi::DeviceContextPool::Instance().Get(place), - static_cast(out)); - } - } else { - auto& dev_ctx = ctx.template device_context(); - dev_ctx.template Alloc(out); - send_recv_buffer = out->data(); - PADDLE_ENFORCE_XPU_SUCCESS(bkcl_broadcast(comm->comm(), - send_recv_buffer, - send_recv_buffer, - numel, - dtype, - root, - stream)); - VLOG(3) << "rank " << comm->rank() << " invoke Bcast. received " - << phi::product(out->dims()); - } - } - out->Resize(x->dims()); - out->set_lod(x->lod()); -#else - PADDLE_THROW(common::errors::PreconditionNotMet( - "PaddlePaddle should be compiled with XPU and BKCL.")); -#endif - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(c_broadcast, - XPU, - ALL_LAYOUT, - ops::CBroadcastOpXPUKernel, - float, - double, - phi::dtype::float16, - int, - int64_t) {}